def min_value(dtype):
- """minimum value of dtype"""
+ """minimum value of dtype
+
+ Parameters
+ ----------
+ dtype : str
+ The data type.
+
+ Returns
+ -------
+ value : tvm.Expr
+ The minimum value of dtype.
+ """
return _api_internal._min_value(dtype)
def max_value(dtype):
- """maximum value of dtype"""
+ """maximum value of dtype
+
+ Parameters
+ ----------
+ dtype : str
+ The data type.
+
+ Returns
+ -------
+ value : tvm.Expr
+ The maximum value of dtype.
+ """
return _api_internal._max_value(dtype)
-def const(value, dtype=None):
- """construct a constant"""
- if dtype is None:
- if isinstance(value, _Integral):
- dtype = 'int32'
- else:
- dtype = 'float32'
+def const(value, dtype):
+ """construct a constant
+
+ Parameters
+ ----------
+ value : number
+ The content of the constant number.
+
+ dtype : str
+ The data type.
+
+ Returns
+ -------
+ const_val: tvm.Expr
+ The result expression.
+ """
return _api_internal._const(value, dtype)
_internal_assert(isinstance(args[0], str), \
"A loop bind's first argument should be a string!")
iter_var = _api.thread_axis(args[0])
- low, ext = _api.const(0), args[1]
+ low, ext = _api.const(0, "int32"), args[1]
for_type = None
return iter_var, low, ext, for_type
import operator
import logging
import sys
+from numbers import Integral
+
from .util import _internal_assert
from . import calls
from . import util
return self._args[s]
return self.alloc_buffers[s][0]
+ def _const(self, value, dtype=None):
+ if dtype is None:
+ if isinstance(value, bool):
+ dtype = "bool"
+ elif isinstance(value, Integral):
+ dtype = "int32"
+ else:
+ dtype = "float32"
+ return _api.const(value, dtype)
#pylint: disable=invalid-name, missing-docstring
def visit_Module(self, node):
if isinstance(res, tuple):
buf = res[0]
if isinstance(node.ctx, ast.Load):
- return _make.Call(buf.dtype, buf.name, [_api.const(0)], \
+ return _make.Call(buf.dtype, buf.name, [self._const(0)], \
_expr.Call.Halide, buf.op, buf.value_index)
- return buf, [_api.const(0)]
+ return buf, [self._const(0)]
if isinstance(node.ctx, ast.Load):
return res
return None
def visit_Num(self, node):
- return _api.const(node.n)
+ return self._const(node.n)
def visit_AugAssign(self, node):
_internal_assert(len(buf) == 2, "LHS is supposed to be (buf, args)!")
buf, args = buf
else:
- args = [_api.const(0)]
+ args = [self._const(0)]
_internal_assert(isinstance(buf, Tensor), "LHS is supposed to be Tensor!")
read = _make.Call(buf.dtype, buf.name, args, _expr.Call.Halide, buf.op, buf.value_index)
if iter_var is None:
_internal_assert(for_type is not None, "The loop bind function parse error!")
offset = iter_var = _api.var(_name)
- if not _ir_pass.Equal(low, _api.const(0)):
+ if not _ir_pass.Equal(low, self._const(0)):
offset = iter_var + low
self.loops_above[_name] = offset
else:
if for_type is None:
res = _make.AttrStmt(iter_var, 'thread_extent', ext, _body)
else:
- res = _make.For(iter_var, _api.const(0), ext, for_type, 0, _body)
+ res = _make.For(iter_var, self._const(0), ext, for_type, 0, _body)
self.loops_above.pop(_name)
return res
"""
if isinstance(value, (_base.numeric_types, (bool, list))):
value = _np.array(value, dtype=dtype)
- # convert default to int32 and float32
- if dtype is None:
- if value.dtype == "float64":
- value = value.astype("float32")
- elif value.dtype == "int64":
- value = value.astype("int32")
if isinstance(value, (_np.ndarray, _np.generic)):
value = _nd.array(value)
def test_id():
x = relay.var('x', 'float32')
ident = relay.Function([x], x)
- check_eval(ident, [1.0], 1.0)
+ one = np.array(1.0, 'float32')
+ check_eval(ident, [one], one)
def test_add_const():
j = relay.var('i', shape=[], dtype='int32')
z = relay.equal(i, j)
func = relay.Function([i, j], z, ret_type=relay.TensorType([], 'bool'))
- i_data = relay.const(0)
- j_data = relay.const(0)
+ i_data = relay.const(0, 'int32')
+ j_data = relay.const(0, 'int32')
check_eval(func, [i_data, j_data], True)
i = relay.var('i', shape=[], dtype='int32')
accum = relay.var('accum', shape=[], dtype='int32')
sb = ScopeBuilder()
- with sb.if_scope(relay.equal(i, relay.const(0))):
+ with sb.if_scope(relay.equal(i, relay.const(0, 'int32'))):
sb.ret(accum)
with sb.else_scope():
- one_less = relay.subtract(i, relay.const(1))
+ one_less = relay.subtract(i, relay.const(1, 'int32'))
new_accum = relay.add(accum, i)
sb.ret(relay.Call(sum_up, [one_less, new_accum]))
func = relay.Function([i, accum], sb.get())
global _test_debug_hit
_test_debug_hit = True
prog = debug(x, debug_func=did_exec)
- result = ex.evaluate(prog, { x: const(1) })
+ result = ex.evaluate(prog, { x: const(1, 'int32') })
assert _test_debug_hit
assert result.asnumpy() == 1
+
def test_debug_with_expr():
global _test_debug_hit
_test_debug_hit = False
global _test_debug_hit
_test_debug_hit = True
prog = debug(x + x * x, debug_func=did_exec)
- result = ex.evaluate(prog, { x: const(2) })
+ result = ex.evaluate(prog, { x: const(2, 'int32') })
assert _test_debug_hit
assert result.asnumpy() == 6
for target, ctx in ctx_list():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
- op_res = intrp.evaluate(func)(fill_value)
+ op_res = intrp.evaluate(func)(np.array(fill_value, dtype))
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
verify_full(4, (1, 3, 4, 4), "int32")
verify_full(4.0, (1, 4), "float32")
for target, ctx in ctx_list():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
- op_res = intrp.evaluate(func)(x_data, fill_value)
+ op_res = intrp.evaluate(func)(x_data, np.array(fill_value, dtype))
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
verify_full_like((1, 3, 4, 4), 4, "int32")
verify_full_like((1, 1), 44.0, "float32")
@register_alter_op_layout("nn.conv2d", level=100)
def alter_conv2d(attrs, inputs, tinfos):
data, weight = inputs
- weight = relay.multiply(weight, relay.const(2.0))
+ weight = relay.multiply(weight, relay.const(2.0, "float32"))
return relay.nn.conv2d(data, weight, **attrs)
def expected():
x = relay.var("x", shape=(1, 64, 56, 56))
weight = relay.var('weight', shape=(64, 64, 3, 3))
- y = relay.nn.conv2d(x, relay.multiply(weight, relay.const(2.0)),
+ y = relay.nn.conv2d(x, relay.multiply(weight, relay.const(2.0, "float32")),
channels=64,
kernel_size=(3, 3),
padding=(1, 1))
test_alter_layout_dual_path()
test_alter_layout_resnet()
test_alter_layout_broadcast_op()
-
assert zz.a == x and zz.b.value == 4
n = tvm.var('n')
- assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n % (-1)), tvm.const(0))
- assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n % 1), tvm.const(0))
+ assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n % (-1)), tvm.const(0, "int32"))
+ assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n % 1), tvm.const(0, "int32"))
assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n / 1), n)
tvm.ir_pass.CanonicalSimplify(n / (-1))
# This is not true in the current implementation
ry = tvm.var("ry")
y = tvm.var("y")
x = tvm.var("x")
- vmap = {rx: tvm.Range(tvm.const(0), tvm.const(3)),
- ry: tvm.Range(tvm.const(0), tvm.const(3)),
- y: tvm.Range(tvm.const(0), tvm.const(2)),
- x: tvm.Range(tvm.const(0), tvm.const(14))}
+ i32_const = lambda x: tvm.const(x, "int32")
+ vmap = {rx: tvm.Range(i32_const(0), i32_const(3)),
+ ry: tvm.Range(i32_const(0), i32_const(3)),
+ y: tvm.Range(i32_const(0), i32_const(2)),
+ x: tvm.Range(i32_const(0), i32_const(14))}
idx = ry * 16 + rx + y * 16 + x
z1 = tvm.ir_pass.CanonicalSimplify(idx // 16, vmap)
z2 = tvm.ir_pass.CanonicalSimplify(idx % 16, vmap)
test_modular()
test_simplify()
test_mul()
- test_simplify_minmax()
\ No newline at end of file
+ test_simplify_minmax()
import tvm
def test_const():
- x = tvm.const(1)
+ x = tvm.const(1, "int32")
print(x.dtype)
assert x.dtype == tvm.int32
assert isinstance(x, tvm.expr.IntImm)
def test_make():
- x = tvm.const(1)
+ x = tvm.const(1, "int32")
y = tvm.var("x")
z = x + y
assert isinstance(tvm.max(x, y), tvm.expr.Max)
assert isinstance(tvm.min(x, y), tvm.expr.Min)
def test_ir():
- x = tvm.const(1)
+ x = tvm.const(1, "int32")
y = tvm.make.IntImm('int32', 1)
z = x + y
stmt = tvm.make.Evaluate(z)
def test_const_fold():
def check(f, *args):
- x = f(*[tvm.const(x) for x in args])
+ x = f(*[tvm.const(x, "int32") for x in args])
y = f(*args)
if not isinstance(x, (tvm.expr.IntImm, tvm.expr.UIntImm)) or x.value != int(y):
raise ValueError("check error: %s vs %s " % (x, y))
def test_const_saveload_json():
# save load json
- x = tvm.const(1)
- y = tvm.const(10)
+ x = tvm.const(1, "int32")
+ y = tvm.const(10, "int32")
z = x + y
z = z + z
json_str = tvm.save_json(z)
def test_make_smap():
# save load json
- x = tvm.const(1)
- y = tvm.const(10)
+ x = tvm.const(1, "int32")
+ y = tvm.const(10, "int32")
z = tvm.expr.Add(x, y)
smap = tvm.convert({"z": z, "x": x})
json_str = tvm.save_json(tvm.convert([smap]))
def test_bound():
m = tvm.var('m')
- vrange = tvm.convert({m: tvm.Range(tvm.const(0), tvm.const(10))})
+ vrange = tvm.convert({m: tvm.Range(tvm.const(0, "int32"), tvm.const(10, "int32"))})
ret = tvm.ir_pass.Simplify(m % 10, vrange)
assert ret == m
def test_canonical():
x = tvm.var("x")
- z = tvm.const(3)
+ z = tvm.const(3, "int32")
ret = tvm.ir_pass.CanonicalSimplify(x / (z*z) - x / (z*z))
assert(tvm.ir_pass.Equal(ret, 0))
n = tvm.var("n")
with ib.for_range(0, n, name="t") as i:
ib.scope_attr(
- tvm.const(1) , "pragma_scope", tvm.make.StringImm("parallel_launch_point"))
+ tvm.const(1, "int32") , "pragma_scope",
+ tvm.make.StringImm("parallel_launch_point"))
with ib.for_range(0, n, name="i", for_type="parallel") as i:
with ib.for_range(0, 10, name="j") as j:
A = ib.allocate("float32", n, name="A", scope="global")
assert ret.for_type == tvm.stmt.For.Unrolled
ib = tvm.ir_builder.create()
- ib.scope_attr(tvm.const(0), "pragma_auto_unroll_max_step", 16)
+ ib.scope_attr(tvm.const(0, "int32"), "pragma_auto_unroll_max_step", 16)
ib.emit(stmt)
wrapped = ib.get()
wrapped = tvm.make.Block(wrapped, stmt)
if __name__ == "__main__":
test_unroll_loop()
- test_unroll_fake_loop()
\ No newline at end of file
+ test_unroll_fake_loop()
def test_schedule_bound_condition():
A = tvm.placeholder((64,), name='A', dtype="float32")
- Apad = tvm.compute((66,), lambda i: tvm.select(tvm.all(i>0, i < 65), A[i-1], tvm.const(0.)), name='Apad')
+ Apad = tvm.compute((66,), lambda i: tvm.select(
+ tvm.all(i>0, i < 65), A[i-1], tvm.const(0., "float32")), name='Apad')
Apad2 = tvm.compute((66,), lambda i: Apad[i]*2, name='Apad2')
s = tvm.create_schedule(Apad2.op)
AL1 = s.cache_read(A,"local",[Apad])
element = data(*idx)
for b in range(bits):
- extracted_bit = ((element & tvm.const(masks[b])) >> b).astype(pack_type)
+ extracted_bit = ((element & tvm.const(masks[b], "int32")) >> b).astype(pack_type)
packed_data[b] = (packed_data[b] | extracted_bit)
if k < data_width - 1:
packed_data[b] = packed_data[b] << 1
def test_util():
- x = tvm.const(100)
+ x = tvm.const(100, "int32")
assert util.get_const_int(x) == 100
assert util.get_const_tuple((x, x)) == (100, 100)
def test_util():
- x = tvm.const(100)
+ x = tvm.const(100, "int32")
assert util.get_const_int(x) == 100
assert util.get_const_tuple((x, x)) == (100, 100)
+++ /dev/null
-import tvm
-import topi
-from topi import util
-
-
-def test_util():
- x = tvm.const(100)
- assert util.get_const_int(x) == 100
- assert util.get_const_tuple((x, x)) == (100, 100)
-
-
-def test_ewise():
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((m, l), name='A')
-
- def test_apply(func, name):
- B = func(A)
- assert tuple(B.shape) == tuple(A.shape)
- assert B.op.body[0].name == name
-
- test_apply(topi.cpp.exp, "exp")
- test_apply(topi.cpp.tanh, "tanh")
- test_apply(topi.cpp.sigmoid, "sigmoid")
- test_apply(topi.cpp.log, "log")
- test_apply(topi.cpp.sqrt, "sqrt")
-
-def test_flatten_tag():
- A = tvm.placeholder((3, 4), name='A')
- B = topi.cpp.nn.flatten(A)
- assert B.op.tag == topi.tag.INJECTIVE
-
-if __name__ == "__main__":
- test_util()
- test_ewise()
- test_flatten_tag()
+++ /dev/null
-"""Test code for binary neural network operators."""
-import numpy as np
-import tvm
-import topi
-from topi.util import get_const_tuple
-from tvm.contrib.pickle_memoize import memoize
-
-
-def verify_binary_dense(batch, in_dim, out_dim):
- A = tvm.placeholder((batch, in_dim), name='A')
- B = tvm.placeholder((out_dim, in_dim), name='B')
- bnn_A = topi.cpp.nn.binarize_pack(A, 1)
- bnn_B = topi.cpp.nn.binarize_pack(B, 1)
- # binary dense
- bnn_A1 = tvm.placeholder(bnn_A.shape, dtype=bnn_A.dtype)
- bnn_B1 = tvm.placeholder(bnn_B.shape, dtype=bnn_B.dtype)
- bnn_C = topi.cpp.nn.binary_dense(bnn_A1, bnn_B1)
- # schedule
- target = topi.cpp.TEST_create_target("llvm")
- s1 = topi.cpp.x86.schedule_binarize_pack(target, [bnn_A])
- s2 = topi.cpp.x86.schedule_binarize_pack(target, [bnn_B])
- s3 = topi.cpp.x86.schedule_binary_dense(target, [bnn_C])
-
- dtype = A.dtype
- @memoize("topi.tests.test_topi_binary_dense")
- def get_ref_data():
- # generate random matrix of +1 or -1 value
- a_np = (np.random.randint(2, size=(batch, in_dim)) * 2 - 1).astype(dtype)
- b_np = (np.random.randint(2, size=(out_dim, in_dim)) * 2 - 1).astype(dtype)
- c_np = np.dot(a_np, b_np.T)
- return (a_np, b_np, c_np)
-
- a_np, b_np, c_np = get_ref_data()
-
- ctx = tvm.cpu(0)
- a = tvm.nd.array(a_np, ctx)
- b = tvm.nd.array(b_np, ctx)
- bnn_a = tvm.nd.array(np.zeros(get_const_tuple(bnn_A.shape), dtype=bnn_A.dtype), ctx)
- bnn_b = tvm.nd.array(np.zeros(get_const_tuple(bnn_B.shape), dtype=bnn_B.dtype), ctx)
- bnn_c = tvm.nd.array(np.zeros(get_const_tuple(bnn_C.shape), dtype=bnn_C.dtype), ctx)
- f1 = tvm.build(s1, [A, bnn_A], 'llvm')
- f2 = tvm.build(s2, [B, bnn_B], 'llvm')
- f3 = tvm.build(s3, [bnn_A1, bnn_B1, bnn_C], 'llvm')
- f1(a, bnn_a)
- f2(b, bnn_b)
- f3(bnn_a, bnn_b, bnn_c)
- tvm.testing.assert_allclose(bnn_c.asnumpy(), c_np, rtol=1e-5)
-
-def test_binary_dense():
- verify_binary_dense(1, 4096, 1024)
- verify_binary_dense(1, 1024, 1000)
-
-
-if __name__ == "__main__":
- test_binary_dense()
+++ /dev/null
-"""Test code for clip operator"""
-import numpy as np
-import tvm
-import topi
-from topi.util import get_const_tuple
-from tvm.contrib.pickle_memoize import memoize
-
-
-def verify_clip(N, a_min, a_max, dtype):
- A = tvm.placeholder((N, N), dtype=dtype, name='A')
- B = topi.cpp.clip(A, a_min, a_max)
-
- # use memoize to pickle the test data for next time use
- @memoize("topi.tests.test_topi_clip")
- def get_ref_data():
- a_np = np.random.uniform(a_min*2, a_max*2, size=(N, N)).astype(dtype)
- b_np = np.clip(a_np, a_min, a_max)
- return a_np, b_np
- a_np, b_np = get_ref_data()
-
- def check_device(device):
- if not tvm.module.enabled(device):
- print("Skip because %s is not enabled" % device)
- return
- target = topi.cpp.TEST_create_target(device)
- s = topi.cpp.generic.default_schedule(target, [B], False)
- ctx = tvm.cpu(0) if device == "llvm" else tvm.gpu(0)
- a = tvm.nd.array(a_np, ctx)
- b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
- f = tvm.build(s, [A, B], device, name="clip")
- f(a, b)
- tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
- for device in ['llvm']:
- check_device(device)
-
-def test_clip():
- verify_clip(1024, -127, 127, 'int8')
- verify_clip(1024, -127, 127, 'int16')
- verify_clip(1024, -127, 127, 'float32')
-
-
-if __name__ == "__main__":
- test_clip()
+++ /dev/null
-"""Test code for dense operator"""
-import numpy as np
-import tvm
-import topi
-from topi.util import get_const_tuple
-from tvm.contrib.pickle_memoize import memoize
-
-
-def verify_dense(batch, in_dim, out_dim, use_bias=True):
- A = tvm.placeholder((batch, in_dim), name='A')
- B = tvm.placeholder((out_dim, in_dim), name='B')
- C = tvm.placeholder((out_dim,), name='C')
- D = topi.cpp.nn.dense(A, B, C if use_bias else None)
- D = topi.cpp.nn.relu(D)
- dtype = A.dtype
-
- # use memoize to pickle the test data for next time use
- @memoize("topi.tests.test_topi_dense")
- def get_ref_data():
- a_np = np.random.uniform(size=(batch, in_dim)).astype(dtype)
- b_np = np.random.uniform(size=(out_dim, in_dim)).astype(dtype)
- c_np = np.random.uniform(size=(out_dim,)).astype(dtype)
- if use_bias:
- d_np = np.maximum(np.dot(a_np, b_np.T) + c_np, 0.0)
- else:
- d_np = np.maximum(np.dot(a_np, b_np.T), 0.0)
- return (a_np, b_np, c_np, d_np)
- # get the test data
- a_np, b_np, c_np, d_np = get_ref_data()
-
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.schedule_dense(target, [D])
- elif device == "rocm":
- s = topi.cpp.rocm.schedule_dense(target, [D])
- else:
- s = topi.cpp.cuda.schedule_dense(target, [D])
- a = tvm.nd.array(a_np, ctx)
- b = tvm.nd.array(b_np, ctx)
- c = tvm.nd.array(c_np, ctx)
- d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
- f = tvm.build(s, [A, B, C, D], device, name="dense")
- f(a, b, c, d)
- tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
-
- for device in ['cuda', 'opencl', 'metal', 'rocm']:
- check_device(device)
-
-def test_dense():
- verify_dense(1, 1024, 1000, use_bias=True)
- verify_dense(1, 1024, 1000, use_bias=False)
-
-
-if __name__ == "__main__":
- test_dense()
+++ /dev/null
-import tvm
-import topi
-import topi.testing
-import numpy as np
-
-def test_dilate():
- target = 'llvm'
- ctx = tvm.cpu(0)
-
- def _test_dilate(input_size, strides):
- Input = tvm.placeholder((input_size))
- Output = topi.cpp.nn.dilate(Input, strides)
- tgt = topi.cpp.TEST_create_target(target)
- schedule = topi.cpp.generic.default_schedule(tgt, [Output], True)
- input_np = np.random.uniform(size=input_size).astype(Input.dtype)
- output_np = topi.testing.dilate_python(input_np, strides)
- input_tvm = tvm.nd.array(input_np, ctx=ctx)
- output_size = topi.util.get_const_tuple(Output.shape)
- output_tvm = tvm.nd.array(np.zeros(shape=output_size).astype(Output.dtype), ctx=ctx)
- f = tvm.build(schedule, [Input, Output], target)
- f(input_tvm, output_tvm)
- tvm.testing.assert_allclose(output_tvm.asnumpy(), output_np, rtol=1e-5)
-
- _test_dilate((32,), (2,))
- _test_dilate((32,32), (2,2))
- _test_dilate((1,3,32,32), (1,1,1,1))
- _test_dilate((1,3,32,32), (2,2,2,2))
- _test_dilate((1,32,32,3,3), (1,1,1,1,1))
- _test_dilate((1,32,32,3,3), (2,2,2,2,2))
- _test_dilate((1,32,32,32,3,3), (1,1,1,2,2,2))
- _test_dilate((1,32,32,32,3,3), (2,2,2,1,1,1))
-
-
-if __name__ == "__main__":
- test_dilate()
+++ /dev/null
-"""Test code for l2 normalization"""
-import numpy as np
-import tvm
-import topi
-import logging
-from topi.util import get_const_tuple
-import topi.testing
-
-def verify_l2_normalize(shape, eps, axis=None):
- '''Verify l2 normalization operator by comparing outputs from tvm and numpy implementation'''
- A = tvm.placeholder(shape, name='A')
- B = topi.cpp.nn.l2_normalize(A, eps, axis)
- dtype = A.dtype
-
- a_np = np.random.uniform(size=shape).astype(dtype)
- b_np = topi.testing.l2_normalize_python(a_np, eps, axis)
-
- def check_device(device):
- if not tvm.module.enabled(device):
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.default_schedule(target, [B], False)
- else:
- s = topi.cpp.cuda.schedule_l2_normalize(target, [B])
- ctx = tvm.context(device, 0)
- a = tvm.nd.array(a_np, ctx)
- b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
- func = tvm.build(s, [A, B], device, name="l2_normalize")
- func(a, b)
- tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
- for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm']:
- check_device(device)
-
-def test_l2_normalize():
- verify_l2_normalize((1, 3, 20, 20), 0.001)
- verify_l2_normalize((1, 3, 20, 20), 0.001, (1,))
- verify_l2_normalize((1, 3, 20, 20), 0.001, (1, 2))
- verify_l2_normalize((1, 3, 20, 20), 0.001, (2, 3))
- verify_l2_normalize((1, 3, 20, 20), 0.001, (0, 3))
- verify_l2_normalize((1, 3, 20, 20), 0.001, (0, 2, 3))
-
-if __name__ == "__main__":
- logging.basicConfig(level=logging.DEBUG)
- test_l2_normalize()
+++ /dev/null
-"""Test code for LRN"""
-import numpy as np
-import tvm
-import topi
-import logging
-from topi.util import get_const_tuple
-import topi.testing
-
-def verify_lrn(shape, size, axis, bias, alpha, beta):
- '''Verify Local response normalization operator by comparing outputs from tvm and numpy implementation'''
- A = tvm.placeholder(shape, name='A')
- B = topi.cpp.nn.lrn(A, size, axis, alpha, beta, bias)
- dtype = A.dtype
-
- a_np = np.random.uniform(size=shape).astype(dtype)
- b_np = topi.testing.lrn_python(a_np, size, axis, bias, alpha, beta)
- def check_device(device):
- if not tvm.module.enabled(device):
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.default_schedule(target, [B], False)
- else:
- s = topi.cpp.cuda.schedule_lrn(target, [B])
- ctx = tvm.context(device, 0)
- a = tvm.nd.array(a_np, ctx)
- b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
- f = tvm.build(s, [A, B], device)
- f(a, b)
- tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-1)
-
- for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm']:
- check_device(device)
-
-def test_lrn():
- verify_lrn((1, 3, 5, 5), 3, 3, 1.0, 1.0, 0.5)
- verify_lrn((1, 3, 5, 5), 3, 3, 1.0, 1.0, 0.5)
- verify_lrn((1, 3, 20, 20), 3, 1, 2.0, 1.0, 0.75)
-
-if __name__ == "__main__":
- logging.basicConfig(level=logging.DEBUG)
- test_lrn()
+++ /dev/null
-"""Test code for pooling"""
-import numpy as np
-import tvm
-import topi
-import math
-from topi.util import get_const_tuple
-
-pool_code = {
- "avg": 0,
- "max": 1
-}
-def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_pad=True):
- iw = ih
- kw = kh
- sw = sh
- pt, pl, pb, pr = padding
- A = tvm.placeholder((n, ic, ih, iw), name='A')
- B = topi.cpp.nn.pool(A, [kh, kw], [sh, sw], padding,
- pool_code[pool_type], ceil_mode, "NCHW", count_include_pad)
- B = topi.cpp.nn.relu(B)
- dtype = A.dtype
-
- bshape = get_const_tuple(B.shape)
- ashape = get_const_tuple(A.shape)
- if ceil_mode:
- assert bshape[2] == int(math.ceil(float(ashape[2] - kh + pt + pb) / sh) + 1)
- assert bshape[3] == int(math.ceil(float(ashape[3] - kw + pl + pr) / sw) + 1)
- else:
- assert bshape[2] == int(math.floor(float(ashape[2] - kh + pt + pb) / sh) + 1)
- assert bshape[3] == int(math.floor(float(ashape[3] - kw + pl + pr) / sw) + 1)
-
-
- a_np = np.random.uniform(size=(n, ic, ih, iw)).astype(dtype)
- pad_np = np.zeros(shape=(n, ic, ih+pt+pb, iw+pl+pr)).astype(dtype)
- no_zero = (range(n), range(ic), (range(pt, ih+pt)), (range(pl, iw+pl)))
- pad_np[np.ix_(*no_zero)] = a_np
- _, oc, oh, ow = get_const_tuple(B.shape)
- b_np = np.zeros(shape=(n, oc, oh, ow)).astype(dtype)
-
- if pool_type == 'avg':
- for i in range(oh):
- for j in range(ow):
- if count_include_pad:
- b_np[:,:,i,j] = np.mean(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3))
- else:
- pad_count = np.sum(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw] > 0, axis=(2,3))
- b_np[:,:,i,j] = np.sum(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3)) / np.maximum(pad_count, 1)
-
- elif pool_type =='max':
- for i in range(oh):
- for j in range(ow):
- b_np[:,:,i,j] = np.max(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3))
- b_np = np.maximum(b_np, 0.0)
-
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.default_schedule(target, [B], False)
- else:
- s = topi.cpp.cuda.schedule_pool(target, [B])
- a = tvm.nd.array(a_np, ctx)
- b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
- f = tvm.build(s, [A, B], device)
- f(a, b)
- tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
- for device in ['cuda', 'opencl', 'metal', 'rocm']:
- check_device(device)
-
-def test_pool():
- verify_pool(1, 256, 32, 2, 2, [0, 0, 0, 0], 'avg', False, True)
- verify_pool(1, 256, 31, 3, 3, [1, 2, 1, 2], 'avg', False, True)
- verify_pool(1, 256, 32, 2, 2, [1, 2, 1, 2], 'avg', False, False)
- verify_pool(1, 256, 31, 4, 4, [3, 3, 3, 3], 'avg', False, False)
- verify_pool(1, 256, 31, 4, 4, [0, 0, 0, 0], 'avg', False, False)
- verify_pool(1, 256, 32, 2, 2, [0, 0, 0, 0], 'max', False)
- verify_pool(1, 256, 31, 3, 3, [2, 1, 2, 1], 'max', False)
- verify_pool(1, 256, 31, 3, 3, [2, 1, 2, 1], 'max', True)
-
- verify_pool(1, 256, 31, 3, 3, [2, 1, 0, 3], 'avg', False, True)
- verify_pool(1, 256, 32, 2, 2, [0, 3, 2, 1], 'avg', False, False)
- verify_pool(1, 256, 31, 3, 3, [1, 0, 3, 2], 'max', False)
- verify_pool(1, 256, 31, 3, 3, [3, 2, 1, 0], 'max', True)
-
-
-def verify_global_pool(n, c, h, w, pool_type):
- A = tvm.placeholder((n, c, h, w), name='A')
- B = topi.cpp.nn.global_pool(A, pool_code[pool_type])
- B = topi.cpp.nn.relu(B)
-
- a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
- if pool_type == 'avg':
- b_np = np.mean(a_np, axis=(2,3), keepdims=True)
- elif pool_type =='max':
- b_np = np.max(a_np, axis=(2,3), keepdims=True)
- b_np = np.maximum(b_np, 0.0)
-
- def check_device(device):
- if not tvm.module.enabled(device):
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.default_schedule(target, [B], False)
- else:
- s = topi.cpp.cuda.schedule_global_pool(target, [B])
- ctx = tvm.context(device, 0)
- a = tvm.nd.array(a_np, ctx)
- b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
- f = tvm.build(s, [A, B], device)
- f(a, b)
- tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
- for device in ['cuda', 'opencl', 'metal', 'rocm']:
- check_device(device)
-
-def test_global_pool():
- verify_global_pool(1, 1024, 7, 7, 'avg')
- verify_global_pool(4, 1024, 7, 7, 'avg')
- verify_global_pool(1, 1024, 7, 7, 'max')
- verify_global_pool(4, 1024, 7, 7, 'max')
-
-
-if __name__ == "__main__":
- test_pool()
- test_global_pool()
+++ /dev/null
-"""Test code for reduce."""
-import os
-import numpy as np
-import tvm
-import topi
-
-def _my_npy_argmax(arr, axis, keepdims):
- if not keepdims:
- return arr.argmax(axis=axis)
- else:
- if axis is not None:
- out_shape = list(arr.shape)
- out_shape[axis] = 1
- else:
- out_shape = [1 for _ in range(len(arr.shape))]
- return arr.argmax(axis=axis).reshape(out_shape)
-
-
-def _my_npy_argmin(arr, axis, keepdims):
- if not keepdims:
- return arr.argmin(axis=axis)
- else:
- out_shape = list(arr.shape)
- out_shape[axis] = 1
- return arr.argmin(axis=axis).reshape(out_shape)
-
-def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum"):
- # Build the logic and compile the function
- dat_dtype = "float32"
- A = tvm.placeholder(shape=in_shape, name="A", dtype=dat_dtype)
- A1 = topi.cpp.sqrt(topi.cpp.exp(A))
- out_dtype = "float32"
- if type == "sum":
- B = topi.cpp.sum(A1, axis, keepdims)
- elif type == "max":
- B = topi.cpp.max(A1, axis, keepdims)
- elif type == "min":
- B = topi.cpp.min(A1, axis, keepdims)
- elif type == "argmax":
- B = topi.cpp.argmax(A1, axis, keepdims)
- out_dtype = "int32"
- elif type == "argmin":
- B = topi.cpp.argmin(A1, axis, keepdims)
- out_dtype = "int32"
- elif type == "prod":
- B = topi.cpp.prod(A1, axis, keepdims)
- else:
- raise NotImplementedError
-
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.default_schedule(target, [B], True)
- else:
- s = topi.cpp.cuda.schedule_reduce(target, [B])
-
- foo = tvm.build(s, [A, B], device, name=type)
- # Test
- in_npy = np.random.uniform(size=in_shape).astype(np.float32)
- in_npy_map = np.sqrt(np.exp(in_npy)).astype(np.float32)
- if type == "sum":
- out_npy = in_npy_map.sum(axis=axis, keepdims=keepdims)
- elif type == "max":
- out_npy = in_npy_map.max(axis=axis, keepdims=keepdims)
- elif type == "min":
- out_npy = in_npy_map.min(axis=axis, keepdims=keepdims)
- elif type == "argmax":
- out_npy = _my_npy_argmax(in_npy_map, axis=axis, keepdims=keepdims)
- elif type == "argmin":
- out_npy = _my_npy_argmin(in_npy_map, axis=axis, keepdims=keepdims)
- elif type == "prod":
- out_npy = in_npy_map.prod(axis=axis, keepdims=keepdims)
- else:
- raise NotImplementedError
- data_tvm = tvm.nd.array(in_npy, ctx=ctx)
- out_tvm = tvm.nd.empty(shape=out_npy.shape, ctx=ctx, dtype=out_dtype)
- for _ in range(1):
- foo(data_tvm, out_tvm)
- if type == "argmax" or type == "argmin":
- out_tvm_indices = out_tvm.asnumpy()
- if keepdims:
- out_tvm_indices = np.take(out_tvm_indices, indices=0, axis=axis)
- if axis is None:
- out_tvm_val = in_npy_map.ravel()[out_tvm_indices]
- else:
- other_indices = tuple(np.indices(in_shape[0:axis] + in_shape[(axis+1):]))
- sel_indices = other_indices[0:axis] + (out_tvm_indices,) + other_indices[axis:]
- out_tvm_val = in_npy_map[sel_indices]
- if type == "argmax":
- tvm.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1E-3, 1E-3)
- elif type == "argmin":
- tvm.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1E-3, 1E-3)
- else:
- tvm.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1E-3, 1E-3)
- for device in ["cuda", "opencl", "metal", "llvm", "rocm"]:
- check_device(device)
-
-
-def test_reduce_map():
- verify_reduce_map_ele(in_shape=(128, 24, 128, 24),
- axis=(1, 2, 3),
- keepdims=True,
- type="sum")
- verify_reduce_map_ele(in_shape=(128, 24 * 128 * 24),
- axis=(1,),
- keepdims=False,
- type="max")
- verify_reduce_map_ele(in_shape=(32, 128, 24),
- axis=None,
- keepdims=True,
- type="sum")
- verify_reduce_map_ele(in_shape=(128, 24, 128, 24),
- axis=(0, 2),
- keepdims=False,
- type="min")
- verify_reduce_map_ele(in_shape=(128, 4, 4, 128),
- axis=(1, ),
- keepdims=True,
- type="prod")
- verify_reduce_map_ele(in_shape=(4, 4),
- axis=(0, 1),
- keepdims=False,
- type="prod")
- verify_reduce_map_ele(in_shape=(32, 128),
- axis=1,
- keepdims=True,
- type="argmax")
- verify_reduce_map_ele(in_shape=(32, 24, 32, 24),
- axis=2,
- keepdims=False,
- type="argmin")
- verify_reduce_map_ele(in_shape=(31, 21, 15),
- axis=None,
- keepdims=True,
- type="argmax")
- verify_reduce_map_ele(in_shape=(31, 21, 15),
- axis=None,
- keepdims=False,
- type="sum")
-
-if __name__ == "__main__":
- test_reduce_map()
+++ /dev/null
-"""Test code for region"""
-import logging
-import numpy as np
-import tvm
-import topi
-import topi.testing
-from topi.util import get_const_tuple
-
-def verify_region(batch, in_size, in_channel, n, classes, coords, background, l_softmax):
- '''Verify region operator by comparing outputs from tvm and numpy implementation'''
- in_height = in_width = in_size
-
- A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
- B = topi.cpp.yolo.region(A, n, classes, coords, background, l_softmax)
-
- a_shape = get_const_tuple(A.shape)
- dtype = A.dtype
-
- def get_ref_data_region():
- '''Randomly initialize the data variables and get refernce output for the region operation'''
- a_np = np.random.uniform(size=a_shape).astype(dtype)
- b_np = topi.testing.region_python(a_np, n, classes, coords, background, l_softmax)
- return a_np, b_np
-
- a_np, b_np = get_ref_data_region()
- def check_device(device):
- '''Check the device is available and if so, build and run the program'''
- if not tvm.module.enabled(device):
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.default_schedule(target, [B], False)
- else:
- s = topi.cpp.rocm.schedule_region(target, [B])
- ctx = tvm.context(device, 0)
- a = tvm.nd.array(a_np, ctx)
- b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
- func = tvm.build(s, [A, B], device, name="region")
- func(a, b)
- tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
- for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm', 'vulkan']:
- check_device(device)
-
-def test_region():
- verify_region(1, 19, 425, 5, 80, 4, 0, 1)
-
-if __name__ == "__main__":
- logging.basicConfig(level=logging.DEBUG)
- test_region()
+++ /dev/null
-"""Test code for relu activation"""
-import os
-import numpy as np
-import tvm
-import topi
-from topi.util import get_const_tuple
-
-def verify_relu(m, n, dtype):
- A = tvm.placeholder((m, n), name='A', dtype=dtype)
- B = topi.cpp.nn.relu(A)
- assert B.dtype == dtype
-
- a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype)
- b_np = a_np * (a_np > 0)
-
- def check_device(device):
- if not tvm.module.enabled(device):
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.schedule_injective(target, [B])
- else:
- s = topi.cpp.cuda.schedule_injective(target, [B])
- ctx = tvm.context(device, 0)
- a = tvm.nd.array(a_np, ctx)
- b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
- foo = tvm.build(s, [A, B], device, name="relu")
- foo(a, b)
- tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
- for device in ['cuda', 'opencl', 'metal', 'rocm']:
- check_device(device)
-
-
-def verify_leaky_relu(m, alpha):
- A = tvm.placeholder((m,), name='A')
- B = topi.cpp.nn.leaky_relu(A, alpha)
- device = "llvm"
- target = topi.cpp.TEST_create_target(device)
- s = topi.cpp.generic.schedule_injective(target, [B])
-
- a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype)
- b_np = a_np * (a_np > 0) + a_np * (a_np < 0) * alpha
- ctx = tvm.cpu(0)
- a = tvm.nd.array(a_np, ctx)
- b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
- foo = tvm.build(s, [A, B], device, name="leaky_relu")
- foo(a, b)
- tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
-def verify_prelu(x, w, axis, weight_reshape):
- X = tvm.placeholder((x), name='X')
- W = tvm.placeholder((w), name='W')
- x_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(X.shape)).astype(X.dtype)
- w_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(W.shape)).astype(W.dtype)
- def _prelu_numpy(x, W):
- return (x < 0) * (x *W.reshape(weight_reshape)) + (x>=0) * x
-
- out_np = _prelu_numpy(x_np, w_np)
- B = topi.cpp.nn.prelu(X, W, axis)
- device = "llvm"
- target = topi.cpp.TEST_create_target(device)
- s = topi.cpp.generic.schedule_injective(target, [B])
-
- ctx = tvm.cpu(0)
- x_tvm = tvm.nd.array(x_np, ctx)
- w_tvm = tvm.nd.array(w_np, ctx)
-
- b = tvm.nd.array(np.zeros(get_const_tuple(X.shape), dtype=B.dtype), ctx)
- foo = tvm.build(s, [X, W, B], "llvm", name="prelu")
- foo(x_tvm, w_tvm, b)
- tvm.testing.assert_allclose(b.asnumpy(), out_np, rtol=1e-5)
-
-def test_relu():
- for dtype in ['float32', 'float64', 'int32', 'int16', 'int8', 'int64']:
- verify_relu(10, 128, dtype)
-
-def test_leaky_relu():
- verify_leaky_relu(100, 0.5)
-
-def test_prelu():
- verify_prelu((1, 3, 2, 2), (3,), 1, (3, 1, 1))
- verify_prelu((1, 3, 2, 2), (2,), 2, (2, 1))
-
-if __name__ == "__main__":
- test_relu()
- test_leaky_relu()
- test_prelu()
+++ /dev/null
-"""Test code for reorg"""
-import logging
-import numpy as np
-import tvm
-import topi
-import topi.testing
-from topi.util import get_const_tuple
-
-def verify_reorg(batch, in_size, in_channel, stride):
- '''Verify reorg operator by comparing outputs from tvm and numpy implementation'''
- in_height = in_width = in_size
-
- A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
- B = topi.cpp.vision.reorg(A, stride)
-
- a_shape = get_const_tuple(A.shape)
- dtype = A.dtype
-
- def get_ref_data_reorg():
- '''Randomly initialize the data variables and get refernce output for the reorg operation'''
- a_np = np.random.uniform(size=a_shape).astype(dtype)
- b_np = topi.testing.reorg_python(a_np, stride)
- return a_np, b_np
-
- a_np, b_np = get_ref_data_reorg()
- def check_device(device):
- '''Check the device is available and if so, build and run the program'''
- if not tvm.module.enabled(device):
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.default_schedule(target, [B], False)
- else:
- s = topi.cpp.cuda.schedule_injective(target, [B])
- ctx = tvm.context(device, 0)
- a = tvm.nd.array(a_np, ctx)
- b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
- func = tvm.build(s, [A, B], device, name="reorg")
- func(a, b)
- tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
- for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm', 'vulkan']:
- check_device(device)
-
-def test_reorg():
- verify_reorg(1, 38, 64, 2)
-
-if __name__ == "__main__":
- logging.basicConfig(level=logging.DEBUG)
- test_reorg()
+++ /dev/null
-"""Test code for softmax"""
-import os
-import numpy as np
-import tvm
-import topi
-import logging
-import topi.testing
-from topi.util import get_const_tuple
-
-def verify_softmax(m, n):
- A = tvm.placeholder((m, n), name='A')
- B = topi.cpp.nn.softmax(A, 1)
- # confirm lower works
- s = tvm.create_schedule([B.op])
- tvm.lower(s, [A, B], simple_mode=True)
-
- a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
- b_np = topi.testing.softmax_python(a_np)
-
- def check_device(device):
- if not tvm.module.enabled(device):
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.default_schedule(target, [B], False)
- else:
- s = topi.cpp.cuda.schedule_softmax(target, [B])
- ctx = tvm.context(device, 0)
- a = tvm.nd.array(a_np, ctx)
- b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
- foo = tvm.build(s, [A, B], device, name="softmax")
- foo(a, b)
- tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
- for device in ['cuda', 'opencl', 'metal', 'rocm']:
- check_device(device)
-
-def test_softmax():
- verify_softmax(32, 10)
- verify_softmax(3, 4)
-
-
-def verify_log_softmax(m, n):
- A = tvm.placeholder((m, n), name='A')
- B = topi.cpp.nn.log_softmax(A)
- # confirm lower works
- s = tvm.create_schedule([B.op])
- tvm.lower(s, [A, B], simple_mode=True)
- a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
- b_np = topi.testing.log_softmax_python(a_np)
-
- def check_device(device):
- if not tvm.module.enabled(device):
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.default_schedule(target, [B], False)
- else:
- s = topi.cpp.cuda.schedule_softmax(target, [B])
- ctx = tvm.context(device, 0)
- a = tvm.nd.array(a_np, ctx)
- b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
- foo = tvm.build(s, [A, B], device, name="log_softmax")
- foo(a, b)
- tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
- for device in ["cuda", "opencl", "metal", "rocm"]:
- check_device(device)
-
-
-def test_log_softmax():
- verify_log_softmax(32, 10)
- verify_log_softmax(3, 4)
-
-if __name__ == "__main__":
- logging.basicConfig(level=logging.DEBUG)
- test_softmax()
- test_log_softmax()
+++ /dev/null
-"""Test code for tensor operator"""
-import numpy as np
-import tvm
-import topi
-
-def verify_elemwise_sum(num_args, dtype):
- shape = (3,5,4)
-
- tvm_placeholders = []
- for i in range(num_args):
- tvm_placeholders.append(
- tvm.placeholder(shape, name="data"+str(i), dtype=dtype))
- esum = topi.cpp.elemwise_sum(tvm_placeholders)
- s = tvm.create_schedule([esum.op])
-
- def get_ref_data():
- np_nd = [np.random.uniform(0, 10, size=shape).astype(dtype)
- for i in range(num_args)]
- return np_nd
- np_nd = get_ref_data()
-
- def check_device(device):
- if not tvm.module.enabled(device):
- print("Skip because %s is not enabled" % device)
- return
-
- ctx = tvm.context(device, 0)
- out = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
- f = tvm.build(s, tvm_placeholders + [esum], device, name="elemwise_sum")
- tvm_nd = [tvm.nd.array(nd, ctx) for nd in np_nd] + [out]
- f(*tvm_nd)
- np_out = np.sum(np.array(np_nd), axis=0)
- tvm.testing.assert_allclose(out.asnumpy(), np_out, rtol=1e-5)
-
- for device in ["llvm"]:
- check_device(device)
-
-
-def verify_full(shape, dtype, fill_value):
- A = tvm.placeholder(shape, dtype=dtype, name="A")
- B = topi.cpp.full_like(A, fill_value)
- C = topi.cpp.full(shape, dtype, fill_value)
- s1 = tvm.create_schedule([B.op])
- s2 = tvm.create_schedule([C.op])
-
- def get_ref_data():
- return np.full(shape, fill_value, dtype)
- np_nd = get_ref_data()
-
- def check_device(device):
- if not tvm.module.enabled(device):
- print("Skip because %s is not enabled" % device)
- return
- target = topi.cpp.TEST_create_target(device)
- ctx = tvm.context(device, 0)
- out = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
- f = tvm.build(s1, [A, B], device, name="full_like")
- f(tvm.nd.array(np.zeros(shape, dtype), ctx), out)
- tvm.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)
-
- f = tvm.build(s2, [C], device, name="full")
- f(out)
- tvm.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)
-
- for device in ["llvm"]:
- check_device(device)
-
-
-def test_elemwise_sum():
- verify_elemwise_sum(1, "float32")
- verify_elemwise_sum(5, "float32")
- verify_elemwise_sum(4, "int32")
-
-
-def test_full():
- verify_full((3,4,5), "float32", 3.14)
- verify_full((10,), "int32", 7)
-
-if __name__ == "__main__":
- test_elemwise_sum()
- test_full()
+++ /dev/null
-"""Test code for broadcasting operators."""
-import numpy as np
-import tvm
-import topi
-
-def verify_expand_dims(in_shape, out_shape, axis, num_newaxis):
- A = tvm.placeholder(shape=in_shape, name="A")
- B = topi.cpp.expand_dims(A, axis, num_newaxis)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.schedule_injective(target, [B])
- else:
- s = topi.cpp.cuda.schedule_injective(target, [B])
- foo = tvm.build(s, [A, B], device, name="expand_dims")
- data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
- out_npy = data_npy.reshape(out_shape)
- data_nd = tvm.nd.array(data_npy, ctx)
- out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), ctx)
- foo(data_nd, out_nd)
- tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
-
- for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
- check_device(device)
-
-
-def verify_tranpose(in_shape, axes):
- A = tvm.placeholder(shape=in_shape, name="A")
- B = topi.cpp.transpose(A, axes)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.schedule_injective(target, [B])
- else:
- s = topi.cpp.cuda.schedule_injective(target, [B])
- ctx = tvm.context(device, 0)
- foo = tvm.build(s, [A, B], device, name="tranpose")
- data_npy = np.arange(np.prod(in_shape)).reshape(in_shape).astype(A.dtype)
- out_npy = data_npy.transpose(axes)
- data_nd = tvm.nd.array(data_npy, ctx)
- out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=B.dtype)
- foo(data_nd, out_nd)
- tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
-
- for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
- check_device(device)
-
-
-def verify_reshape(src_shape, dst_shape):
- A = tvm.placeholder(shape=src_shape, name="A")
- B = topi.cpp.reshape(A, dst_shape)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.schedule_injective(target, [B])
- else:
- s = topi.cpp.cuda.schedule_injective(target, [B])
- foo = tvm.build(s, [A, B], device, name="reshape")
- data_npy = np.random.normal(size=src_shape).astype(A.dtype)
- out_npy = np.reshape(data_npy, newshape=dst_shape)
- data_nd = tvm.nd.array(data_npy, ctx)
- out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype)
- foo(data_nd, out_nd)
- tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
-
- for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
- check_device(device)
-
-
-def verify_squeeze(src_shape, axis):
- A = tvm.placeholder(shape=src_shape, name="A")
- B = topi.cpp.squeeze(A, axis)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.schedule_injective(target, [B])
- else:
- s = topi.cpp.cuda.schedule_injective(target, [B])
- foo = tvm.build(s, [A, B], device, name="squeeze")
- data_npy = np.random.normal(size=src_shape).astype(A.dtype)
- out_npy = np.squeeze(data_npy, axis=axis)
- data_nd = tvm.nd.array(data_npy, ctx)
- out_nd_shape = out_npy.shape
- out_nd = tvm.nd.empty(out_nd_shape, ctx=ctx, dtype=B.dtype)
- foo(data_nd, out_nd)
- tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
-
- for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
- check_device(device)
-
-def verify_concatenate(shapes, axis):
- tensor_l = []
- for i, shape in enumerate(shapes):
- tensor_l.append(tvm.placeholder(shape, name="A" + str(i)))
- out_tensor = topi.cpp.concatenate(tensor_l, axis)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.schedule_injective(target, [out_tensor])
- else:
- s = topi.cpp.cuda.schedule_injective(target, [out_tensor])
- foo = tvm.build(s, tensor_l + [out_tensor], device, name="concatenate")
- data_npys = [np.random.normal(size=shape).astype(tensor_l[0].dtype) for shape in shapes]
- out_npy = np.concatenate(data_npys, axis=axis)
- data_nds = [tvm.nd.array(data_npy, ctx) for data_npy in data_npys]
- out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=out_tensor.dtype)
- foo(*(data_nds + [out_nd]))
- tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
-
- for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
- check_device(device)
-
-
-def verify_split(src_shape, indices_or_sections, axis):
- A = tvm.placeholder(shape=src_shape, name="A")
- tensor_l = topi.cpp.split(A, indices_or_sections, axis)
- tensor_l = list(tensor_l)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.schedule_injective(target, tensor_l)
- else:
- s = topi.cpp.cuda.schedule_injective(target, tensor_l)
- ctx = tvm.context(device, 0)
- foo = tvm.build(s, [A] + tensor_l, device, name="split")
- data_npy = np.random.normal(size=src_shape).astype(A.dtype)
- out_npys = np.split(data_npy, indices_or_sections, axis=axis)
- data_nd = tvm.nd.array(data_npy, ctx)
- out_nds = [tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=tensor_l[0].dtype) for out_npy in out_npys]
- foo(*([data_nd] + out_nds))
- for out_nd, out_npy in zip(out_nds, out_npys):
- tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
-
- for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
- check_device(device)
-
-def verify_take(src_shape, indices_src, axis=None):
- src_dtype = "float32"
- indices_dtype = "int32"
- indices_src = np.array(indices_src, dtype=indices_dtype)
- A = tvm.placeholder(shape=src_shape, dtype=src_dtype, name="A")
- indices = tvm.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices")
- if axis is None:
- out_tensor = topi.cpp.take(A, indices)
- else:
- out_tensor = topi.cpp.take(A, indices, axis)
-
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- with tvm.target.create(device):
- s = topi.generic.schedule_injective(out_tensor)
-
- foo = tvm.build(s, [A] + [indices] + [out_tensor] , device, name="take")
- shape_size = 1
- for i in range(len(src_shape)):
- shape_size = shape_size * src_shape[i]
- data_npy = np.arange(shape_size, dtype=src_dtype).reshape((src_shape))
-
- if axis is None:
- out_npys = np.take(data_npy, indices_src)
- else:
- out_npys = np.take(data_npy, indices_src, axis=axis)
- data_nd = tvm.nd.array(data_npy, ctx)
- indices_nd = tvm.nd.array(indices_src, ctx)
- out_nd = tvm.nd.empty(out_npys.shape, ctx=ctx, dtype=src_dtype)
- foo(data_nd, indices_nd, out_nd)
- tvm.testing.assert_allclose(out_nd.asnumpy(), out_npys)
-
- for device in ["llvm", "opencl"]:
- check_device(device)
-
-def verify_where(condition, x, y):
- dtype = "float32"
- if len(condition.shape) == 1:
- np_out = np.array([xv if c else yv for (c,xv,yv) in zip(condition,x,y)])
- else:
- np_out = np.where(condition, x, y)
- A = tvm.placeholder(shape=condition.shape, dtype=dtype, name="condition")
- B = tvm.placeholder(shape=x.shape, dtype=dtype, name="x")
- C = tvm.placeholder(shape=y.shape, dtype=dtype, name="y")
- out_tensor = topi.cpp.where(A, B, C)
-
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- with tvm.target.create(device):
- s = topi.generic.schedule_injective(out_tensor)
-
- foo = tvm.build(s, [A, B, C, out_tensor], device, name="where")
- tvm_out = tvm.nd.empty(x.shape, ctx=ctx, dtype=dtype)
- foo(tvm.nd.array(condition, ctx), tvm.nd.array(x, ctx),
- tvm.nd.array(y, ctx), tvm_out)
- tvm.testing.assert_allclose(tvm_out.asnumpy(), np_out)
-
- for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
- check_device(device)
-
-def verify_concatenate_split(shapes, axis, indices_or_sections):
- tensor_l_concatenate = []
- for i, shape in enumerate(shapes):
- tensor_l_concatenate.append(tvm.placeholder(shape, name="A" + str(i)))
- out_tensor = topi.cpp.concatenate(tensor_l_concatenate, axis)
- tensor_l = topi.cpp.split(out_tensor, indices_or_sections, axis)
- tensor_l = list(tensor_l)
- def check_device(device):
- if not tvm.module.enabled(device):
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.schedule_injective(target, tensor_l)
- else:
- s = topi.cpp.cuda.schedule_injective(target, tensor_l)
- ctx = tvm.context(device, 0)
- foo = tvm.build(s, tensor_l_concatenate + tensor_l, device, name="concatenate_split")
- data_npys = [np.random.normal(size=shape).astype(tensor_l_concatenate[0].dtype) for shape in shapes]
- out_npy_conc = np.concatenate(data_npys, axis=axis)
- out_npys_split = np.split(out_npy_conc, indices_or_sections, axis=axis)
- data_nds = [tvm.nd.array(data_npy, ctx) for data_npy in data_npys]
- out_nds = [tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=tensor_l[0].dtype) for out_npy in out_npys_split]
- foo(*(data_nds + out_nds))
- for out_nd, out_npy in zip(out_nds, out_npys_split):
- tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
-
- for device in ["llvm", "cuda", "opencl", "metal", "rocm"]:
- check_device(device)
-
-def verify_concatenate_broadcast(shapes, axis, rhs_shape):
- B = tvm.placeholder(shape=rhs_shape, name="B")
- tensor_l = []
- for i, shape in enumerate(shapes):
- tensor_l.append(tvm.placeholder(shape, name="A" + str(i)))
- out_tensor = topi.cpp.concatenate(tensor_l, axis)
- C = out_tensor + B
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
- print("Running on target: %s" % device)
- target = topi.cpp.TEST_create_target(device)
- if device == "llvm":
- s = topi.cpp.generic.schedule_injective(target, [C])
- else:
- s = topi.cpp.cuda.schedule_injective(target, [C])
- ctx = tvm.context(device, 0)
- foo = tvm.build(s, tensor_l + [B, C], device, name="broadcast_binary_add")
- data_npys = [np.random.normal(size=shape).astype(tensor_l[0].dtype) for shape in shapes]
- lhs_npy = np.concatenate(data_npys, axis=axis)
- rhs_npy = np.random.uniform(size=rhs_shape).astype(B.dtype)
- out_npy = lhs_npy + rhs_npy
- data_nds = [tvm.nd.array(data_npy, ctx) for data_npy in data_npys]
- rhs_nd = tvm.nd.array(rhs_npy, ctx)
- out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), ctx)
- for _ in range(1):
- foo(*(data_nds + [rhs_nd] + [out_nd]))
- tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)
-
- for device in ["llvm", "cuda", "opencl", "metal", "rocm"]:
- check_device(device)
-
-
-def test_expand_dims():
- verify_expand_dims((3, 10), (3, 10, 1, 1), 2, 2)
- verify_expand_dims((3, 10), (1, 3, 10), -3, 1)
-
-
-def test_tranpose():
- verify_tranpose((3, 10, 2), (1, 0, 2))
- verify_tranpose((3, 10, 5), (2, 0, 1))
- verify_tranpose((3, 10), None)
- verify_tranpose((3, 10, 5), (2, -3, 1))
-
-
-def test_reshape():
- verify_reshape((1, 2, 3, 4), (2, 3, 4))
- verify_reshape((4, 2, 3, 4), (2, 4, 12))
- verify_reshape((4, 2, 3, 4), (2, 48))
- verify_reshape((16, ), (2, 2, 2, 2))
-
-
-def test_squeeze():
- verify_squeeze((1, 2, 3, 4), 0)
- verify_squeeze((1, 2, 1, 4), None)
- verify_squeeze((1, 1, 1, 4), (1, 2))
- verify_squeeze((1, 1, 1, 1), None)
-
-
-def test_concatenate():
- verify_concatenate([(2,), (2,), (2,)], 0)
- verify_concatenate([(2, 3, 4), (2, 2, 4), (2, 5, 4)], 1)
- verify_concatenate([(1, 2, 4), (1, 2, 3), (1, 2, 7), (1, 2, 8), (1, 2, 1)], -1)
- verify_concatenate([(5, 6, 7, 3),
- (16, 6, 7, 3),
- (12, 6, 7, 3),
- (8, 6, 7, 3),
- (2, 6, 7, 3)], 0)
-
-
-def test_split():
- verify_split((2, 12, 3), 3, 1)
- verify_split((2, 12, 3), 3, -1)
- verify_split((2, 12, 3), [2, 4], 1)
- verify_split((10, 12, 24), [5, 7, 9], -1)
-
-def test_take():
- verify_take((4,), [1])
- verify_take((4,), [[0,1,2,3]])
- verify_take((3,3,3), [[11,25]])
- verify_take((4,), [[0,1],[2,3]])
- verify_take((4,), [1], 0)
- verify_take((2,2), [[[1,0],[0,1]]], 0)
- verify_take((2,2), [[[1,0],[0,1]]], 1)
- verify_take((4,3,5,6), [[2,1,0,0]], -2)
-
-def test_where():
- shape = (10, 3, 7, 13)
- condition = np.random.uniform(low=-1, high=1, size=shape).astype("float32")
- x = np.random.uniform(size=shape).astype("float32")
- y = np.random.uniform(size=shape).astype("float32")
- verify_where(condition, x, y)
- condition = np.random.uniform(low=-1, high=1, size=(shape[0],)).astype("float32")
- x = np.random.uniform(size=shape).astype("float32")
- y = np.random.uniform(size=shape).astype("float32")
- verify_where(condition, x, y)
-
-
-def test_regression_1():
- verify_concatenate_split([(2, 3, 4), (2, 2, 4), (2, 5, 4)], 1, [3, 7])
- verify_concatenate_split([(3, 4), (2, 4), (3, 4)], 0, [1, 2, 3, 4])
-
-def test_regression_2():
- verify_concatenate_broadcast([(5, 1, 3), (5, 1, 3)], 1, [2, 1])
- verify_concatenate_broadcast([(5, 1, 2), (5, 1, 3)], 2, [1, 5])
-
-if __name__ == "__main__":
- test_concatenate()
- test_tranpose()
- test_expand_dims()
- test_reshape()
- test_squeeze()
- test_split()
- test_take()
- test_where()
- test_regression_1()
- test_regression_2()
# our customized lowering pass to manipulate the IR directly instead of using schedule premitives.
#
-n = tvm.const(128)
+n = tvm.const(128, "int32")
a = tvm.placeholder((n, ), name="a")
b = tvm.placeholder((n, ), name="b")
c = tvm.compute((n, ), lambda i: a[i] + b[i], name='c')
lambda yy, xx, cc, nn: tvm.select(
tvm.all(yy >= pad, yy - pad < in_size,
xx >= pad, xx - pad < in_size),
- A[yy - pad, xx - pad, cc, nn], tvm.const(0.)),
+ A[yy - pad, xx - pad, cc, nn], tvm.const(0., "float32")),
name='Apad')
# Create reduction variables
rc = tvm.reduce_axis((0, in_channel), name='rc')
###############################################################################
# Memory Hierarchy
# ----------------
-#
+#
# We first specify the memory hierarchy for buffers. The figure below shows the
# GPU memory hierarchy. One important difference from CPU memory hierarchy is
# that GPU provides a cache buffer called shared memory, which is managed by
elif isinstance(loop_body.value, tvm.expr.Load):
alu_opcode = env.dev.ALU_OPCODE_SHR
lhs = loop_body.value
- rhs = tvm.const(0)
+ rhs = tvm.const(0, "int32")
else:
raise RuntimeError(
"Expression not recognized %s, %s, %s" % (