* [REFACTOR][PY][API-CHANGE] Remove legacy python files.
Remove legacy python files.
Use the te namespace for most of the tensor expression primitives.
- tvm.create_schedule -> tvm.te.create_schedule
- tvm.placeholder -> tvm.te.placeholder
- tvm.compute -> tvm.te.compute
* Remove top-level exposures.
"""
import tvm
+from tvm import te
import os
from tvm import rpc
from tvm.contrib import util, ndk
def test_rpc_module():
# graph
- n = tvm.convert(1024)
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ n = tvm.runtime.convert(1024)
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
a_np = np.random.uniform(size=1024).astype(A.dtype)
temp = util.tempdir()
session_timeout=60)
# Compile the Graph for CPU target
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=64)
s[B].parallel(xi)
s[B].pragma(xo, "parallel_launch_point")
# Compile the Graph for OpenCL target
if test_opencl:
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=64)
- s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
- s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
+ s[B].bind(xi, te.thread_axis("threadIdx.x"))
+ s[B].bind(xo, te.thread_axis("blockIdx.x"))
# Build the dynamic lib.
# If we don't want to do metal and only use cpu, just set target to be target
f = tvm.build(s, [A, B], "opencl", target_host=target, name="myadd")
# Compile the Graph for Vulkan target
if test_vulkan:
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=64)
- s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
- s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
+ s[B].bind(xi, te.thread_axis("threadIdx.x"))
+ s[B].bind(xo, te.thread_axis("blockIdx.x"))
# Build the dynamic lib.
# If we don't want to do metal and only use cpu, just set target to be target
f = tvm.build(s, [A, B], "vulkan", target_host=target, name="myadd")
import numpy as np
import tvm
+from tvm import te
from tvm.contrib.util import tempdir
import tvm.contrib.graph_runtime as runtime
from tvm import relay
import numpy as np
import tvm
+from tvm import te
import tvm.contrib.graph_runtime as runtime
from tvm import relay
import numpy as np
import tvm
+from tvm import te
from tvm.contrib.util import tempdir
import tvm.contrib.graph_runtime as runtime
from tvm import relay
import os
from tvm import relay
import tvm
+from tvm import te
import logging
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import os
def test_plugin_module():
import ctypes
# Import TVM first to get library symbols
import tvm
+from tvm import te
def load_lib():
"""Load library, the functions will be registered into TVM"""
# under the License.
import tvm_ext
import tvm
+import tvm._ffi.registry
+from tvm import te
import numpy as np
def test_bind_add():
def test_ext_dev():
n = 10
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute((n,), lambda *i: A(*i) + 1.0, name='B')
- s = tvm.create_schedule(B.op)
+ A = te.placeholder((n,), name='A')
+ B = te.compute((n,), lambda *i: A(*i) + 1.0, name='B')
+ s = te.create_schedule(B.op)
def check_llvm():
if not tvm.runtime.enabled("llvm"):
return
def test_sym_add():
- a = tvm.var('a')
- b = tvm.var('b')
+ a = te.var('a')
+ b = te.var('b')
c = tvm_ext.sym_add(a, b)
assert c.a == a and c.b == b
assert(isinstance(v2, tvm_ext.IntVec))
assert v2[2] == 3
- tvm.convert(ivec_cb)(ivec)
+ tvm.runtime.convert(ivec_cb)(ivec)
def test_extract_ext():
- fdict = tvm.extract_ext_funcs(tvm_ext._LIB.TVMExtDeclare)
+ fdict = tvm._ffi.registry.extract_ext_funcs(
+ tvm_ext._LIB.TVMExtDeclare)
assert fdict["mul"](3, 4) == 12
def test_extern_call():
n = 10
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute((n,), lambda *i: tvm.call_extern("float32", "TVMTestAddOne", A(*i)), name='B')
- s = tvm.create_schedule(B.op)
+ A = te.placeholder((n,), name='A')
+ B = te.compute((n,), lambda *i: tvm.tir.call_extern("float32", "TVMTestAddOne", A(*i)), name='B')
+ s = te.create_schedule(B.op)
def check_llvm():
if not tvm.runtime.enabled("llvm"):
# under the License.
"""Script to prepare test_addone.so"""
import tvm
+from tvm import te
import os
def prepare_test_libs(base_path):
- n = tvm.var("n")
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
- s = tvm.create_schedule(B.op)
+ n = te.var("n")
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ s = te.create_schedule(B.op)
# Compile library as dynamic library
fadd_dylib = tvm.build(s, [A, B], "llvm", name="addone")
dylib_path = os.path.join(base_path, "test_addone_dll.so")
# file python_deploy.py
import tvm
+from tvm import te
import numpy as np
def verify(mod, fname):
"""
import tvm
+from tvm import te
import os
import re
import sys
def test_rpc_module():
# graph
- n = tvm.convert(1024)
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ n = tvm.runtime.convert(1024)
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
temp = util.tempdir()
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=64)
- s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
- s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
+ s[B].bind(xi, te.thread_axis("threadIdx.x"))
+ s[B].bind(xo, te.thread_axis("blockIdx.x"))
# Build the dynamic lib.
# If we don't want to do metal and only use cpu, just set target to be target
f = tvm.build(s, [A, B], "metal", target_host=target, name="myadd")
arch=arch, sdk=sdk)
xcode.codesign(path_dso1)
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=64)
s[B].parallel(xi)
s[B].pragma(xo, "parallel_launch_point")
from tvm import relay
from tvm.relay import testing
import tvm
+from tvm import te
def main():
import os.path as osp
import numpy as np
import tvm
+from tvm import te
CWD = osp.abspath(osp.dirname(__file__))
:members:
:imported-members:
:exclude-members:
+ any, all, min_value, max_value, trace,
exp, erf, tanh, sigmoid, log, cos, sin, atan, sqrt, rsqrt, floor, ceil,
trunc, abs, round, nearbyint, isnan, power, popcount, fmod, if_then_else,
div, indexdiv, indexmod, truncdiv, truncmod, floordiv, floormod,
.. automodule:: tvm.tir
:members:
:imported-members:
- :exclude-members: PrimExpr
+ :exclude-members: PrimExpr, const
:autosummary:
os.environ['TVM_BUILD_DOC'] = '1'
# Version information.
import tvm
+from tvm import te
version = tvm.__version__
release = tvm.__version__
from __future__ import absolute_import, print_function
import tvm
+from tvm import te
import numpy as np
# Global declarations of environment.
######################################################################
# Describe the Computation
# ------------------------
-n = tvm.var("n")
-A = tvm.placeholder((n,), name='A')
-B = tvm.placeholder((n,), name='B')
-C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
+n = te.var("n")
+A = te.placeholder((n,), name='A')
+B = te.placeholder((n,), name='B')
+C = te.compute(A.shape, lambda i: A[i] + B[i], name="C")
######################################################################
# Schedule the Computation
# ------------------------
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
######################################################################
# Compilation
import os
import tvm
+from tvm import te
from tvm.contrib import cc, util
def test_add(target_dir):
- n = tvm.var("n")
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
- s = tvm.create_schedule(C.op)
+ n = te.var("n")
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda i: A[i] + B[i], name="C")
+ s = te.create_schedule(C.op)
fadd = tvm.build(s, [A, B, C], "llvm", target_host="llvm", name="myadd")
fadd.save(os.path.join(target_dir, "add_cpu.o"))
import os
import tvm
+from tvm import te
from tvm.contrib import cc, util
def test_add(target_dir):
if not tvm.runtime.enabled("cuda"):
print("skip %s because cuda is not enabled..." % __file__)
return
- n = tvm.var("n")
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
+ n = te.var("n")
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda i: A[i] + B[i], name="C")
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
bx, tx = s[C].split(C.op.axis[0], factor=64)
- s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[C].bind(bx, te.thread_axis("blockIdx.x"))
+ s[C].bind(tx, te.thread_axis("threadIdx.x"))
fadd_cuda = tvm.build(s, [A, B, C], "cuda", target_host="llvm", name="myadd")
fadd_cuda.save(os.path.join(target_dir, "add_gpu.o"))
import os
import tvm
+from tvm import te
import json
from tvm.contrib import graph_runtime
def dump_graph_lib(target_dir):
dim = 4
- A = tvm.placeholder((dim,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
- sched = tvm.create_schedule(B.op)
+ A = te.placeholder((dim,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ sched = te.create_schedule(B.op)
node0 = {"op": "null", "name": "x", "inputs": []}
node1 = {"op": "tvm_op", "name": "add",
# tvm._ffi
from ._ffi.base import TVMError, __version__
from ._ffi.runtime_ctypes import TypeCode, DataType
-from ._ffi.registry import register_object, register_func, register_extension
+from ._ffi import register_object, register_func, register_extension, get_global_func
# top-level alias
# tvm.runtime
# tvm.target
from . import target
-from .target import build_config
# tvm.te
-from .te import decl_tensor_intrin, create_schedule, tag_scope
+from . import te
# tvm.testing
from . import testing
# others
from . import arith
-# backward compact for topi, to be removed later
-from .api import *
-from .tir import expr, stmt, ir_builder, ir_pass, generic
-from .te import tensor, schedule
-from .tir.op import *
-from . import intrin
-from . import make
-
# Contrib initializers
from .contrib import rocm as _rocm, nvcc as _nvcc, sdaccel as _sdaccel
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Functions defined in TVM."""
-# pylint: disable=invalid-name,unused-import,redefined-builtin
-import tvm._ffi
-import tvm.ir
-import tvm.tir
-
-from tvm.runtime import convert, const, DataType
-from tvm.ir import container as _container, Range
-from tvm.tir import decl_buffer, layout, bijective_layout
-from tvm.tir import min_value, max_value, indexdiv, indexmod, all, any
-from tvm.te import placeholder, compute, scan, extern, var, size_var, thread_axis, reduce_axis
-
-
-from ._ffi.base import string_types, TVMError
-from ._ffi.registry import register_func, get_global_func, extract_ext_funcs
-
-from . import make as _make
-
-int8 = "int8"
-int32 = "int32"
-float32 = "float32"
-handle = "handle"
--------
.. code-block:: python
- x = tvm.var("x")
+ x = te.var("x")
analyzer = tvm.arith.Analyzer()
with analzyer.constraint_scope(x % 3 == 0):
# constraint in effect
import struct
import numpy as np
+import tvm._ffi
-from tvm import schedule, ir_pass, get_global_func, target as _target
+from tvm import target as _target
+from tvm.tir import ir_pass
+from tvm.te import schedule
from tvm.driver import build_module
def ana_lower(sch, args,
return stmt
try:
- _get_buffer_curve_sample_flatten = get_global_func(
+ _get_buffer_curve_sample_flatten = tvm._ffi.get_global_func(
"autotvm.feature.GetCurveSampleFeatureFlatten")
- _get_itervar_feature = get_global_func("autotvm.feature.GetItervarFeature")
- _get_itervar_feature_flatten = get_global_func("autotvm.feature.GetItervarFeatureFlatten")
+ _get_itervar_feature = tvm._ffi.get_global_func(
+ "autotvm.feature.GetItervarFeature")
+ _get_itervar_feature_flatten = tvm._ffi.get_global_func(
+ "autotvm.feature.GetItervarFeatureFlatten")
except ValueError as e:
def raise_error(*args, **kwargs): # pylint: disable=unused-argument
raise RuntimeError("Cannot load autotvm c++ API")
Parameters
----------
- sch: tvm.schedule.Schedule
- args: Array of tvm.tensor.Tensor
+ sch: tvm.te.schedule.Schedule
+ args: Array of te.tensor.Tensor
the buffer args for lower
take_log: bool
whether take log of numerical statics
Parameters
----------
- sch: tvm.schedule.Schedule
- args: Array of tvm.tensor.Tensor
+ sch: tvm.te.schedule.Schedule
+ args: Array of te.tensor.Tensor
the buffer args for lower
take_log: bool
whether take log of numerical statics
Parameters
----------
- sch: tvm.schedule.Schedule
- args: Array of tvm.tensor.Tensor
+ sch: tvm.te.schedule.Schedule
+ args: Array of te.tensor.Tensor
the buffer args for lower
sample_n: int
number of sample points along one dimension
import topi
import tvm
+from tvm import te
from tvm import autotvm, relay
from tvm.autotvm.task import get_config
from tvm.autotvm.record import encode, load_from_file
_, out_layout = o_input_info[0]
else:
_, out_layout = o_output_info[0]
- data_placeholder = tvm.placeholder(in_shape, name="data",
- dtype=self._dtype)
+ data_placeholder = te.placeholder(in_shape, name="data",
+ dtype=self._dtype)
args = [data_placeholder, in_layout, out_layout]
callback(i_idx, o_idx, m, n, args)
import numpy as np
-from ... import ir_pass, build, build_config, nd, TVMError, register_func, \
- rpc as _rpc, target as _target
-from ...contrib import nvcc, ndk, tar
+import tvm._ffi
+from tvm import nd, rpc as _rpc, target as _target
+from tvm.tir import ir_pass
+from tvm.error import TVMError
+from tvm.target import build_config
+from tvm.driver import build
+from tvm.contrib import nvcc, ndk, tar
from ..util import get_const_tuple
from ..env import AutotvmGlobalScope
return not t.is_alive()
-@register_func
+@tvm._ffi.register_func
def tvm_callback_cuda_compile(code):
"""use nvcc to generate ptx code for better optimization"""
curr_cuda_target_arch = AutotvmGlobalScope.current.cuda_target_arch
import inspect
import zlib
-from tvm import schedule
+from tvm.te import schedule
def attach_code_hash(s):
"""Decorator for attaching a code hash to a schedule
Parameters
----------
s: Schedule
- tvm.schedule.Schedule to attach the hash to
+ tvm.te.schedule.Schedule to attach the hash to
"""
def decorator(func):
def wrapper(*args, **kwargs):
from collections import namedtuple, OrderedDict
import numpy as np
-from tvm import schedule, thread_axis
+from tvm.te import schedule, thread_axis
from tvm.autotvm.util import get_const_int
Axis = namedtuple('Axis', ['space', 'index'])
.. note::
We can regard our schedule code as a transformation graph of axes.
- Starting from raw axes in the definition of tvm.compute, we can transform these axes
+ Starting from raw axes in the definition of te.compute, we can transform these axes
by some operators. The operator includes 'split', 'reorder' and 'annotate'.
Each operator has some tunable parameters (e.g. the split factor).
Then the tuning process is just to find good parameters of these op.
Parameters
----------
- var: int or tvm.schedule.IterVar
+ var: int or tvm.te.schedule.IterVar
If is int, return a virtual axis whose length is the provided argument.
If is IterVar, return a virtual axis whose length is extracted from
the IterVar's extent domain.
Parameters
----------
- sch: tvm.schedule.Schedule
+ sch: tvm.te.schedule.Schedule
The tvm schedule
- op: tvm.tensor.Operation
+ op: tvm.te.Operation
The stage to be applied
- axis: tvm.schedule.IterVar
+ axis: tvm.te.schedule.IterVar
axis to split
Returns
Parameters
----------
- sch: tvm.schedule.Schedule
+ sch: tvm.te.schedule.Schedule
The tvm schedule
- op: tvm.tensor.Operation
+ op: tvm.te.Operation
The stage to be applied
- axis: tvm.schedule.IterVar
+ axis: tvm.te.schedule.IterVar
axis to split
Returns
Parameters
----------
- sch: tvm.schedule.Schedule
+ sch: tvm.te.schedule.Schedule
The tvm schedule
- op: tvm.tensor.Operation
+ op: tvm.te.Operation
The stage to be applied
- axes: Array of tvm.schedule.IterVar
+ axes: Array of tvm.te.schedule.IterVar
axis to split
axis_lens: Array of int, optional
the length of axes
Returns
-------
- axes : list of tvm.schedule.IterVar
+ axes : list of tvm.te.schedule.IterVar
The transformed axes
"""
if source is not None: # special case : attach cache_read/cache_write
Parameters
----------
- var: int or tvm.schedule.IterVar
+ var: int or tvm.te.schedule.IterVar
If is int, return an axis whose length is the provided argument.
If is IterVar, return an axis whose length is extracted from the
IterVar's extent domain.
----------
name: str
name to index the entity of this space
- axis: tvm.schedule.IterVar
+ axis: tvm.te.schedule.IterVar
axis to split
policy: str
name of policy.
----------
name: str
name to index the entity of this space
- axes: Array of tvm.schedule.IterVar
+ axes: Array of tvm.te.schedule.IterVar
axes to reorder
policy: str
name of policy
----------
name: str
name to index the entity of this space
- axes: Array of tvm.schedule.IterVar
+ axes: Array of tvm.te.schedule.IterVar
axes to annotate
policy: str
name of policy
func is a state-less function, or a string that
registers the standard task.
"""
-
import numpy as np
-from ... import tensor, expr, container, placeholder, target as _target
+from tvm import target as _target
+from tvm.ir import container
+from tvm.tir import expr
+from tvm.te import tensor, placeholder
+
from ..util import get_const_int, get_const_tuple
from .dispatcher import DispatchContext, ApplyConfig
def args_to_workload(args, task_name=None):
"""Convert argument list to hashable workload tuple.
This function will convert list to tuple, tvm node to python value and
- flatten tvm.tensor.Tensor to a tuple
+ flatten te.tensor.Tensor to a tuple
Parameters
----------
Returns
-------
- sch: tvm.schedule.Schedule
+ sch: tvm.te.schedule.Schedule
The tvm schedule
- arg_bufs: Array of tvm.tensor.Tensor
+ arg_bufs: Array of te.tensor.Tensor
The input/output buffers
"""
config.flop = 0
@autotvm.register_customized_task("matmul")
def matmul(N, L, M, dtype):
- A = tvm.placeholder((N, L), name='A', dtype=dtype)
- B = tvm.placeholder((L, M), name='B', dtype=dtype)
+ A = te.placeholder((N, L), name='A', dtype=dtype)
+ B = te.placeholder((L, M), name='B', dtype=dtype)
- k = tvm.reduce_axis((0, L), name='k')
- C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C')
- s = tvm.create_schedule(C.op)
+ k = te.reduce_axis((0, L), name='k')
+ C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C')
+ s = te.create_schedule(C.op)
# schedule
y, x = s[C].op.axis
Parameters
----------
- sch: tvm.schedule.Schedule
+ sch: tvm.te.schedule.Schedule
schedule
Returns
elif isinstance(op, tensor.PlaceholderOp):
pass
else:
- raise FlopCalculationError("Only support tvm.compute currently. "
- "Other ops like tvm.scan/tvm.extern is not supported")
+ raise FlopCalculationError("Only support te.compute currently. "
+ "Other ops like tvm.te.scan/te.extern is not supported")
return ret
try:
These decorators can make your simple implementation be able to use different configurations
for different workloads.
Here we directly use all arguments to the TOPI call as "workload", so make sure all the arguments
-(except tvm.Tensor) in you calls are hashable. For tvm.Tensor, we will serialize it to a hashable
-tuple.
+(except tvm.te.Tensor) in you calls are hashable. For tvm.te.Tensor,
+we will serialize it to a hashable tuple.
See tvm/topi/python/topi/arm_cpu/depthwise_conv2d.py for example usage.
"""
import tvm.te._ffi_api
from tvm import target as _target
+from tvm.te import tensor
-from ... import tensor
from .task import args_to_workload, DispatchContext, \
register_task_compute, register_task_schedule, serialize_args
import numpy as np
-from .. import expr, ir_pass
+from tvm.tir import expr, ir_pass
logger = logging.getLogger('autotvm')
"""Utilities for binary file manipulation"""
import os
import subprocess
+import tvm._ffi
from . import util
-from ..api import register_func
+
RELOCATION_LD_SCRIPT_TEMPLATE = """
/* linker symbol for use in UTVMInit */
return output
-@register_func("tvm_callback_get_section_size")
+@tvm._ffi.register_func("tvm_callback_get_section_size")
def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
"""Finds size of the section in the binary.
Assumes `size` shell command exists (typically works only on Linux machines)
return section_size
-@register_func("tvm_callback_relocate_binary")
+@tvm._ffi.register_func("tvm_callback_relocate_binary")
def tvm_callback_relocate_binary(
binary_path,
word_size,
return rel_bin
-@register_func("tvm_callback_read_binary_section")
+@tvm._ffi.register_func("tvm_callback_read_binary_section")
def tvm_callback_read_binary_section(binary, section, toolchain_prefix):
"""Returns the contents of the specified section in the binary byte array
return section_bin
-@register_func("tvm_callback_get_symbol_map")
+@tvm._ffi.register_func("tvm_callback_get_symbol_map")
def tvm_callback_get_symbol_map(binary, toolchain_prefix):
"""Obtains a map of symbols to addresses in the passed binary
# under the License.
"""External function interface to BLAS libraries."""
import tvm
-from .. import api as _api
+from tvm import te
def matmul(lhs, rhs, transa=False, transb=False, **kwargs):
"""
n = lhs.shape[1] if transa else lhs.shape[0]
m = rhs.shape[0] if transb else rhs.shape[1]
- return _api.extern(
+ return te.extern(
(n, m),
[lhs, rhs],
lambda ins, outs: tvm.tir.call_packed(
b = lhs.shape[0]
n = lhs.shape[2] if transa else lhs.shape[1]
m = rhs.shape[1] if transb else rhs.shape[2]
- return _api.extern(
+ return te.extern(
(b, n, m),
[lhs, rhs],
lambda ins, outs: tvm.tir.call_packed(
# under the License.
"""External function interface to cuBLAS libraries."""
import tvm
-from .. import api as _api
+from tvm import te
+
def matmul(lhs, rhs, transa=False, transb=False, dtype=None):
"""Create an extern op that compute matrix mult of A and rhs with cuBLAS
n = lhs.shape[1] if transa else lhs.shape[0]
m = rhs.shape[0] if transb else rhs.shape[1]
dtype = dtype if dtype is not None else lhs.dtype
- return _api.extern(
+ return te.extern(
(n, m), [lhs, rhs],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.cublas.matmul",
n = lhs.shape[2] if transa else lhs.shape[1]
m = rhs.shape[1] if transb else rhs.shape[2]
dtype = dtype if dtype is not None else lhs.dtype
- return _api.extern(
+ return te.extern(
(b, n, m), [lhs, rhs],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.cublas.batch_matmul",
# under the License.
"""External function interface to cuBLASlt libraries."""
import tvm
-from .. import api as _api
+from tvm import te
def matmul(lhs, rhs, transa=False, transb=False, n=0, m=0, dtype=None):
if m == 0:
m = rhs.shape[0] if transb else rhs.shape[1]
dtype = dtype if dtype is not None else lhs.dtype
- return _api.extern(
+ return te.extern(
(n, m), [lhs, rhs],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.cublaslt.matmul",
import ctypes
import numpy as np
import tvm
-from .. import api as _api
-from .. import get_global_func as _get_global_func
+
+import tvm._ffi
+from tvm import te
# algos can be read from cudnn.h
_FWD_ALGOS = [
_prepare_global_func_params(dims - 2, pad, stride, dilation, x_shape, w_shape)
oshape = np.zeros((dims), dtype=np.int32)
- func = _get_global_func("tvm.contrib.cudnn.conv.output_shape")
+ func = tvm._ffi.get_global_func("tvm.contrib.cudnn.conv.output_shape")
func(tensor_format,
dims - 2,
_get_np_int32_array_handle(pad),
pad, stride, dilation, xshape, wshape = \
_prepare_global_func_params(dims - 2, pad, stride, dilation, x_shape, w_shape)
yshape = np.array(y_shape, dtype=np.int32)
- func = _get_global_func("tvm.contrib.cudnn.conv.find_algo")
+ func = tvm._ffi.get_global_func("tvm.contrib.cudnn.conv.find_algo")
return func(tensor_format,
dims - 2,
_get_np_int32_array_handle(pad),
conv_dtype)
if dims == 4:
- return _api.extern(
+ return te.extern(
oshape, [x, w],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.cudnn.conv2d.forward",
outs[0],
conv_dtype), name="y")
- return _api.extern(
+ return te.extern(
oshape, [x, w],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.cudnn.conv3d.forward",
import numpy as np
import tvm
+
GRAPH_DUMP_FILE_NAME = '_tvmdbg_graph_dump.json'
CHROME_TRACE_FILE_NAME = "_tvmdbg_execution_trace.json"
import ctypes
import numpy as np
import tvm
-from .. import api as _api
-from .. import get_global_func as _get_global_func
+import tvm._ffi
+
+from tvm import te
def _get_np_int32_array_handle(arr):
oshape = np.zeros((len(x.shape)), dtype=np.int32)
xshape = x.shape
wshape = w.shape
- setup_func = _get_global_func("tvm.contrib.miopen.conv2d.setup")
+ setup_func = tvm._ffi.get_global_func("tvm.contrib.miopen.conv2d.setup")
algo = setup_func(conv_mode,
data_type,
pad_h,
group_count,
_get_np_int32_array_handle(oshape))
- return _api.extern(
+ return te.extern(
list(oshape), [x, w],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.miopen.conv2d.forward",
# under the License.
"""External function interface to MPS libraries."""
import tvm
-from .. import api as _api
+from tvm import te
+
# pylint: disable=C0103,W0612
m = b
if transb:
n = c
- return _api.extern(
+ return te.extern(
(m, n), [lhs, rhs],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.mps.matmul", ins[0], ins[1], outs[0], transa, transb),
ho = hi // stride
wo = wi // stride
- return _api.extern(
+ return te.extern(
(n, ho, wo, co), [data, weight],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.mps.conv2d", ins[0], ins[1], outs[0], padding, stride),
# under the License.
"""External function interface to NNPACK libraries."""
import tvm
+from tvm import te
import tvm._ffi
-from .. import api as _api
def is_available():
lhs 1D array out[output_channels] of FP32 elements.
"""
m = rhs.shape[0]
- return _api.extern(
+ return te.extern(
(m, ), [lhs, rhs],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.nnpack.fully_connected_inference",
assert isinstance(stride, list) and len(stride) == 2
batch, _, input_height, input_width = data.shape
output_channels, _, kernel_height, kernel_width = kernel.shape
- idxdiv = _api.indexdiv
+ idxdiv = te.indexdiv
output_height = idxdiv(
input_height + padding[0] + padding[1] - kernel_height, stride[0]) + 1
output_width = idxdiv(
input_width + padding[0] + padding[1] - kernel_width, stride[1]) + 1
- return _api.extern(
+ return te.extern(
(batch, output_channels, output_height, output_width),
[data, kernel, bias] if bias is not None else [data, kernel],
lambda ins, outs: tvm.tir.call_packed(
batch, _, input_height, input_width = data.shape
output_channels, _, _, _ = transformed_kernel.shape
kernel_height, kernel_width = (3, 3)
- idxdiv = _api.indexdiv
+ idxdiv = te.indexdiv
output_height = idxdiv(input_height + padding[0] + padding[1] - kernel_height, stride[0]) + 1
output_width = idxdiv(input_width + padding[0] + padding[1] - kernel_width, stride[1]) + 1
- return _api.extern(
+ return te.extern(
(batch, output_channels, output_height, output_width),
[data, transformed_kernel, bias] if bias is not None else [data, transformed_kernel],
lambda ins, outs: tvm.tir.call_packed(
transform_tile_size = 8
if not isinstance(dtype, str):
dtype = dtype.dtype
- return _api.extern(
+ return te.extern(
(output_channels, input_channels, transform_tile_size, transform_tile_size),
[kernel],
lambda ins, outs: tvm.tir.call_packed(
import subprocess
import os
import warnings
+
+import tvm._ffi
from tvm.runtime import ndarray as nd
from . import util
-from ..api import register_func
from .._ffi.base import py_str
def compile_cuda(code,
raise RuntimeError("Cannot read cuda version file")
-@register_func("tvm_callback_libdevice_path")
+@tvm._ffi.register_func("tvm_callback_libdevice_path")
def find_libdevice_path(arch):
"""Utility function to find libdevice
import logging
import tvm
+from tvm import te
from . import util
from .. import rpc
base_type = str(base_type) + str(bits)
dtype = base_type if lanes == 1 else base_type + "x" + str(lanes)
- k = tvm.reduce_axis((0, m), name="k")
+ k = te.reduce_axis((0, m), name="k")
- x = tvm.placeholder((n,), dtype=dtype, name="x")
- op = tvm.comm_reducer(lambda x, y: x*y, lambda t: tvm.const(1, dtype=t), name="sum")
- y = tvm.compute((n // m,),
- lambda i: op(x[i // stride * stride * m + i % stride + k * stride], axis=k))
- s = tvm.create_schedule(y.op)
+ x = te.placeholder((n,), dtype=dtype, name="x")
+ op = te.comm_reducer(lambda x, y: x*y, lambda t: tvm.tir.const(1, dtype=t), name="sum")
+ y = te.compute((n // m,),
+ lambda i: op(x[i // stride * stride * m + i % stride + k * stride], axis=k))
+ s = te.create_schedule(y.op)
yo, yi = s[y].split(y.op.axis[0], target.max_num_threads)
- s[y].bind(yo, tvm.thread_axis("blockIdx.x"))
- s[y].bind(yi, tvm.thread_axis("threadIdx.x"))
+ s[y].bind(yo, te.thread_axis("blockIdx.x"))
+ s[y].bind(yi, te.thread_axis("threadIdx.x"))
s[y].unroll(k)
try:
def extern(ins, outs):
# pylint: disable=unused-argument
"""construct measurement function by building IR directly"""
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
- bx = tvm.thread_axis("blockIdx.x")
- tx = tvm.thread_axis("threadIdx.x")
+ bx = te.thread_axis("blockIdx.x")
+ tx = te.thread_axis("threadIdx.x")
ib.scope_attr(bx, "thread_extent", n // max_threads)
ib.scope_attr(tx, "thread_extent", max_threads)
ib.emit(outs[0].vstore(idx, b[0]))
return ib.get()
- y = tvm.extern((n,), [], extern, name="y", dtype=dtype)
- s = tvm.create_schedule(y.op)
+ y = te.extern((n,), [], extern, name="y", dtype=dtype)
+ s = te.create_schedule(y.op)
try:
func = tvm.build(s, [y], target, target_host=target_host)
# under the License.
"""External function interface to random library."""
import tvm
+from tvm import te
import tvm._ffi
-from .. import api as _api
def randint(low, high, size, dtype='int32'):
A tensor with specified size and dtype
"""
assert 'int' in dtype, "the type of randint output must be int or uint"
- return _api.extern(size, [], lambda ins, outs: tvm.tir.call_packed(
+ return te.extern(size, [], lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.random.randint", int(low), int(high), outs[0]), dtype=dtype)
out : Tensor
A tensor with specified size and dtype.
"""
- return _api.extern(size, [], lambda ins, outs: tvm.tir.call_packed(
+ return te.extern(size, [], lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.random.uniform", float(low), float(high), outs[0]), dtype='float32')
out : Tensor
A tensor with specified size and dtype
"""
- return _api.extern(size, [], lambda ins, outs: tvm.tir.call_packed(
+ return te.extern(size, [], lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.random.normal", float(loc), float(scale), outs[0]), dtype='float32')
# under the License.
"""External function interface to rocBLAS libraries."""
import tvm
-from .. import api as _api
+from tvm import te
+
def matmul(lhs, rhs, transa=False, transb=False):
"""Create an extern op that compute matrix mult of A and rhs with rocBLAS
"""
n = lhs.shape[1] if transa else lhs.shape[0]
m = rhs.shape[0] if transb else rhs.shape[1]
- return _api.extern(
+ return te.extern(
(n, m), [lhs, rhs],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.rocblas.matmul",
import subprocess
from os.path import join, exists
+import tvm._ffi
from tvm._ffi.base import py_str
+import tvm.runtime
import tvm.target
from . import util
-from ..api import register_func, convert
+
def find_lld(required=True):
"""Find ld.lld in system.
raise RuntimeError(msg)
-@register_func("tvm_callback_rocm_link")
+@tvm._ffi.register_func("tvm_callback_rocm_link")
def callback_rocm_link(obj_bin):
"""Links object file generated from LLVM to HSA Code Object
cobj_bin = bytearray(open(tmp_cobj, "rb").read())
return cobj_bin
-@register_func("tvm_callback_rocm_bitcode_path")
+@tvm._ffi.register_func("tvm_callback_rocm_bitcode_path")
def callback_rocm_bitcode_path(rocdl_dir="/opt/rocm/lib/"):
"""Utility function to find ROCm device library bitcodes
"oclc_wavefrontsize64_on.amdgcn.bc"
]
paths = [join(rocdl_dir, bitcode) for bitcode in bitcode_files]
- return convert([path for path in paths if exists(path)])
+ return tvm.runtime.convert([path for path in paths if exists(path)])
"""Utility for Interacting with SDAccel Tools"""
import subprocess
import os
+
+import tvm._ffi
from . import util
-from ..api import register_func
-@register_func("tvm_callback_sdaccel_compile")
+@tvm._ffi.register_func("tvm_callback_sdaccel_compile")
def compile_vhls(kernel_info, device_name):
"""Compile Vivado HLS code for SDAccel.
# pylint: disable=invalid-name
import numpy as _np
from tvm.runtime import ndarray as _nd
-
-from .. import expr as _expr
-from .. import api as _api
-from .. import tensor as _tensor
+from tvm import te
+from tvm.tir import expr as _expr
+from tvm.te import tensor as _tensor
float32 = "float32"
"""
SparsePlaceholderOp.__init__(self, shape, nonzeros, dtype, name)
self.stype = 'csr'
- self.data = _api.placeholder((nonzeros,), dtype=dtype, name=self.name+'_data')
- self.indices = _api.placeholder((nonzeros,), dtype=itype, name=self.name+'_indices')
- self.indptr = _api.placeholder((self.shape[0]+1,), dtype=itype, name=self.name+'_indptr')
+ self.data = te.placeholder((nonzeros,), dtype=dtype, name=self.name+'_data')
+ self.indices = te.placeholder((nonzeros,), dtype=itype, name=self.name+'_indices')
+ self.indptr = te.placeholder((self.shape[0]+1,), dtype=itype, name=self.name+'_indptr')
assert isinstance(self.data, _tensor.Tensor)
assert isinstance(self.indices, _tensor.Tensor)
assert isinstance(self.indptr, _tensor.Tensor)
def encode_itervar_relation(obj_manager, rel):
"""Extract and encode IterVar Relationship visualization data to a dictionary"""
rel_type = type(rel)
- if rel_type is tvm.schedule.Split:
+ if rel_type is tvm.te.schedule.Split:
node_type = 'Split_Relation'
rel_dict = {
"type": node_type,
"outer": obj_manager.get_dom_path(rel.outer),
"inner": obj_manager.get_dom_path(rel.inner),
}
- elif rel_type is tvm.schedule.Fuse:
+ elif rel_type is tvm.te.schedule.Fuse:
node_type = 'Fuse_Relation'
rel_dict = {
"type": node_type,
"outer": obj_manager.get_dom_path(rel.outer),
"inner": obj_manager.get_dom_path(rel.inner),
}
- elif rel_type is tvm.schedule.Singleton:
+ elif rel_type is tvm.te.schedule.Singleton:
node_type = 'Singleton_Relation'
rel_dict = {
"type": node_type,
dict : dictionary
A nested dictionary
"""
- assert isinstance(sch, tvm.schedule.Schedule
- ), 'Input is not a tvm.schedule.Schedule object.'
+ assert isinstance(sch, tvm.te.schedule.Schedule
+ ), 'Input is not a tvm.te.schedule.Schedule object.'
range_map = None
if need_range:
try:
- range_map = tvm.schedule.InferBound(sch)
+ range_map = tvm.te.schedule.InferBound(sch)
except tvm._ffi.base.TVMError as expt:
warnings.warn(
'Ranges are not available, because InferBound fails with the following error:\n'
"""According to the given schedule, form the raw body
Parameters
----------
- sch : tvm.schedule.Schedule
+ sch : tvm.te.schedule.Schedule
The given scheduler to form the raw body
Returns
Parameters
----------
- sch : tvm.schedule.Schedule
+ sch : tvm.te.schedule.Schedule
The schedule to be built
args : list of Buffer or Tensor or Var
Parameters
----------
- inputs : tvm.Schedule, LoweredFunc, or dict of target to LoweredFunc list
+ inputs : tvm.te.Schedule, LoweredFunc, or dict of target to LoweredFunc list
The schedule to be built
args : list of Buffer or Tensor or Var, optional
.. code-block:: python
n = 2
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
- s = tvm.create_schedule(C.op)
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
+ s = tvm.te.create_schedule(C.op)
f = tvm.lower(s, [A, B, C], name="test_add")
m = tvm.build(f, target="llvm")
.. code-block:: python
n = 2
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
- s1 = tvm.create_schedule(C.op)
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
+ s1 = tvm.te.create_schedule(C.op)
with tvm.target.cuda() as cuda_tgt:
s2 = topi.cuda.schedule_injective(cuda_tgt, [C])
f1 = tvm.lower(s1, [A, B, C], name="test_add1")
# under the License.
"""Intrinsics of TVM-Python Hybrid Script for Python compilation time
semantic support."""
+
+from tvm.runtime import const, convert
+import tvm.te
from tvm.ir.container import Array
from tvm import target as _tgt
from tvm.tir import expr as _expr
from tvm.tir import call_pure_intrin
from tvm.tir.stmt import For
-from .. import api as _api
-
from .util import _internal_assert
# pylint: disable=redefined-builtin
"""Handling TVM loop types"""
n = args.__len__()
if n == 1:
- low, ext = _api.const(0, dtype='int32'), args[0]
+ low, ext = const(0, dtype='int32'), args[0]
else:
_internal_assert(n == 2, "A loop intrinsic should only have 1 or 2 arguments!")
low, ext = args[0], args[1]
- if not ir_pass.Equal(low, _api.const(0, dtype='int32')):
+ if not ir_pass.Equal(low, const(0, dtype='int32')):
ext = ext - low
for_type = LOOP_INTRIN[annotation]
iter_var = None
_internal_assert(args.__len__() == 2, "A loop bind should only have 2 arguments!")
_internal_assert(isinstance(args[0], str), \
"A loop bind's first argument should be a string!")
- low, ext = _api.const(0, "int32"), args[1]
- iter_var = _api.thread_axis((low, ext), args[0])
+ low, ext = const(0, "int32"), args[1]
+ iter_var = tvm.te.thread_axis((low, ext), args[0])
for_type = None
return iter_var, low, ext, for_type
def _math_intrin(func_id, args):
# pylint: disable=import-outside-toplevel
- import tvm.tir.op
- return getattr(tvm.tir.op, func_id)(*args)
+ from tvm.tir import op
+ return getattr(op, func_id)(*args)
sqrt = log = exp = tanh = sigmoid = power = popcount = _math_intrin #pylint: disable=invalid-name
"""Handling TVM tensor allocation.
You may refer hybrid.intrin.allocate for more details."""
n = args.__len__()
- _internal_assert(isinstance(_api.convert(args[0]), Array), \
+ _internal_assert(isinstance(convert(args[0]), Array), \
"allocate's first argument should be a tuple of shape!")
shape = args[0]
for i in shape:
_internal_assert(args.__len__() == 1, "Only 1 argument is expected!")
_internal_assert(func_id == "len", "This function cannot be directly invoked!")
try:
- return _api.convert(args[0].__len__())
+ return convert(args[0].__len__())
except: #pylint: disable=bare-except
_internal_assert(args[0].shape.__len__() == 1, "Only one-dimension array can get len")
- return _api.convert(args[0].shape[0])
+ return convert(args[0].shape[0])
def _cast(func_id, args):
else:
_internal_assert(isinstance(args[0], _expr.IntImm), "In tvm bool should be uint")
res = _tgt.Target.current(args[0].value).max_num_threads
- return _api.convert(res)
+ return convert(res)
from enum import Enum
from tvm.ir import Array, Range
+import tvm.runtime
import tvm.tir
+import tvm.te
import tvm.te._ffi_api
from tvm.tir import expr as _expr
from . import util
from .preprocessor import determine_variable_usage
-from .. import api as _api
-
def concat_list_to_block(lst):
"""Concatenate a list of Python IR nodes to HalideIR Block"""
"""
Parameters
----------
- args: A list of tvm.placeholder or tvm.var
+ args: A list of tvm.te.placeholder or te.var
Provided by the user, the argument list of the function to be lowered.
usage: A dict of variables used in last in this function
_domain = [Range.make_by_min_extent(0, i) for i in _buf.shape]
_dtype = _buf.dtype
- _true = _api.convert(True)
+ _true = tvm.runtime.convert(True)
body = tvm.tir.Realize(_buf.op, 0, _dtype, _domain, _true, body)
- body = tvm.tir.AttrStmt(_buf.op, 'realize_scope', _api.convert(_scope), body)
+ body = tvm.tir.AttrStmt(_buf.op, 'realize_scope', tvm.runtime.convert(_scope), body)
for elem in to_pop:
self.symbols.pop(elem)
def visit_Name(self, node):
name = node.id
if sys.version_info[0] == 2 and name in ['True', 'False']:
- return _api.convert(ast.literal_eval(name))
+ return tvm.runtime.convert(ast.literal_eval(name))
if name in self.closure_vars:
- return _api.convert(self.closure_vars[name])
+ return tvm.runtime.convert(self.closure_vars[name])
ty, entry = self.symbols[name]
_internal_assert(name in self.symbols, "Unknown symbol %s!" % name)
return entry if isinstance(node.ctx, ast.Load) else None
if ty is Symbol.BufferVar:
if isinstance(node.ctx, ast.Load):
- return tvm.tir.Call(entry.dtype, entry.name, [_api.const(0, 'int32')], \
+ return tvm.tir.Call(entry.dtype, entry.name, [tvm.runtime.const(0, 'int32')], \
_expr.Call.Halide, entry.op, entry.value_index)
- return entry, [_api.const(0, 'int32')]
+ return entry, [tvm.runtime.const(0, 'int32')]
# Do I need any assertion here?
return entry
_internal_assert(isinstance(node.n, bool),
"The data type should be one of (int, float, bool)")
dtype = "bool"
- return _api.const(node.n, dtype)
+ return tvm.runtime.const(node.n, dtype)
def visit_NameConstant(self, node):
- return _api.convert(node.value)
+ return tvm.runtime.convert(node.value)
def visit_AugAssign(self, node):
_internal_assert(len(buf) == 2, "LHS is supposed to be (buf, args)!")
buf, args = buf
else:
- args = [_api.const(0, 'int32')]
+ args = [tvm.runtime.const(0, 'int32')]
_internal_assert(isinstance(buf, Tensor), "LHS is supposed to be Tensor!")
read = tvm.tir.Call(buf.dtype, buf.name, args, _expr.Call.Halide, buf.op, buf.value_index)
"This value should not be defined before this point!")
if isinstance(rhs, tuple):
shape, dtype, scope = rhs
- ph = _api.placeholder(shape, dtype=dtype, name=lhs)
+ ph = tvm.te.placeholder(shape, dtype=dtype, name=lhs)
self.add_symbol(lhs, getattr(Symbol, scope.title() + "Buffer"), ph)
if scope == 'output':
self.outputs.append(lhs)
"Single variable not supported in devices' side!\n" + \
"If you are using GPU, please allocate a 'local' spad " + \
"outside the bind body")
- ph = _api.placeholder((1, ), dtype=rhs.dtype, name=lhs)
+ ph = tvm.te.placeholder((1, ), dtype=rhs.dtype, name=lhs)
self.add_symbol(lhs, Symbol.BufferVar, ph)
lhs = self.visit(lhs_)
if lhs is not None:
if iter_var is None:
_internal_assert(for_type is not None, "The loop iterating function parse error!")
- offset = iter_var = _api.var(_name)
- if not _ir_pass.Equal(low, _api.const(0, 'int32')):
+ offset = iter_var = tvm.te.var(_name)
+ if not _ir_pass.Equal(low, tvm.runtime.const(0, 'int32')):
offset = iter_var + low
self.add_symbol(_name, Symbol.LoopVar, offset)
_body = visit_list_to_block(self.visit, node.body)
else:
_internal_assert(not isinstance(for_type, tuple), \
"Micro expansion should be handled before!")
- res = tvm.tir.For(iter_var, _api.const(0, 'int32'), ext, for_type, 0, _body)
+ res = tvm.tir.For(iter_var, tvm.runtime.const(0, 'int32'), ext, for_type, 0, _body)
self.symbols.pop(_name)
return res
def visit_Assert(self, node):
test = self.visit(node.test)
- mesg = _api.convert(self.visit(node.msg))
+ mesg = tvm.runtime.convert(self.visit(node.msg))
return tvm.tir.AssertStmt(test, mesg, util.make_nop())
import sys
import numpy
+import tvm.runtime
from tvm._ffi.base import numeric_types
from tvm.ir.container import Array
from tvm.tir import stmt as _stmt
from tvm.te.tensor import Tensor
-from .. import api as _api
-
#pylint: disable=invalid-name
np_arg_types = tuple(list(numeric_types) + [numpy.ndarray])
# Useful constants. In avoid of runtime dependences, we use function calls to return them.
def make_nop():
"""Returns a 'no operation' node in HalideIR."""
- return _stmt.Evaluate(_api.const(0, dtype='int32'))
+ return _stmt.Evaluate(tvm.runtime.const(0, dtype='int32'))
def is_docstring(node):
def replace_io(body, rmap):
"""Replacing tensors usage according to the dict given"""
# pylint: disable=import-outside-toplevel
- from .. import ir_pass
+ from tvm.tir import ir_pass
def replace(op):
if isinstance(op, _stmt.Provide) and op.func in rmap.keys():
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint:disable=unused-wildcard-import, wildcard-import, redefined-builtin
-"""Backwared compatible layer for intrin."""
-from .tir.op import *
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-import
-"""namespace of IR node builder make function
-
-This namespace is used for developers. While you do not see any declarations.
-The functions are automatically exported from C++ side via PackedFunc.
-
-Each api is a PackedFunc that can be called in a positional argument manner.
-You can use make function to build the IR node.
-"""
-import tvm._ffi
-import tvm.ir
-from tvm.ir import make_node as node
-from tvm.tir import Call
-
-
-def make_by_min_extent(min_value, extent):
- """Construct a Range by min and extent.
-
- This constructs a range in [min_value, min_value + extent)
-
- Parameters
- ----------
- min_value : PrimExpr
- The minimum value of the range.
-
- extent : PrimExpr
- The extent of the range.
-
- Returns
- -------
- rng : Range
- The constructed range.
- """
- return tvm.ir.Range.make_by_min_extent(min_value, extent)
-
-tvm._ffi._init_api("tvm.make")
"""The Relay IR namespace containing the IR definition and compiler."""
import os
from sys import setrecursionlimit
-from ..api import register_func
+
from . import call_graph
from . import base
from . import ty
Parameters
----------
- sch : tvm.Schedule
+ sch : tvm.te.Schedule
The schedule.
- inputs : List[tvm.Tensor]
+ inputs : List[tvm.te.Tensor]
The inputs to the function.
func_name : str
import logging
import numpy as np
import tvm
+from tvm import te
from ..base import register_relay_node, Object
from ... import target as _target
from ... import autotvm
"""Convert the shape to correct dtype and vars."""
ret = []
for dim in shape:
- if isinstance(dim, tvm.expr.IntImm):
+ if isinstance(dim, tvm.tir.IntImm):
val = int(dim)
assert val <= np.iinfo(np.int32).max
- ret.append(tvm.expr.IntImm("int32", val))
- elif isinstance(dim, tvm.expr.Any):
- ret.append(tvm.var("any_dim", "int32"))
+ ret.append(tvm.tir.IntImm("int32", val))
+ elif isinstance(dim, tvm.tir.Any):
+ ret.append(te.var("any_dim", "int32"))
else:
ret.append(dim)
return ret
attrs : object
The op attribute.
- inputs : List[tvm.Tensor]
+ inputs : List[tvm.te.Tensor]
Input tensors to the op.
out_type : relay.Type
flag = True
for clause in spec.condition.clauses:
clause = analyzer.canonical_simplify(clause)
- if isinstance(clause, tvm.expr.IntImm) and clause.value:
+ if isinstance(clause, tvm.tir.IntImm) and clause.value:
continue
flag = False
break
attrs : object
The op attribute.
- inputs : List[tvm.Tensor]
+ inputs : List[tvm.te.Tensor]
Input tensors to the op.
out_type : relay.Type
Returns
-------
- ret : tuple(relay.op.OpImplementation, List[tvm.Tensor])
+ ret : tuple(relay.op.OpImplementation, List[tvm.te.Tensor])
The best op implementation and the corresponding output tensors.
"""
all_impls = get_valid_implementations(op, attrs, inputs, out_type, target)
from tvm.runtime.ndarray import empty
from tvm.relay import _build_module
from tvm import target as _target
-from tvm import expr as _expr
+from tvm.tir import expr as _expr
class GraphRuntimeCodegen(object):
"""The compiler from Relay to the TVM runtime system."""
from tvm.ir import IRModule
-from tvm import expr as tvm_expr
+from tvm.tir import expr as tvm_expr
from .. import nd as _nd, target as _target, autotvm
from ..contrib import graph_runtime as _graph_rt
from . import _build_module
# under the License.
# pylint: disable=wildcard-import, redefined-builtin, invalid-name
"""The Relay IR namespace containing the IR definition and compiler."""
-from __future__ import absolute_import
-from ..api import register_func
-
+import tvm._ffi
# pylint: disable=unused-argument, import-outside-toplevel
def _debugger_init(expr, stack):
import pdb
pdb.set_trace()
-@register_func("relay.debug")
+@tvm._ffi.register_func("relay.debug")
def _debug(*args):
import pdb
pdb.set_trace()
# pylint: disable=unused-argument
-@register_func("relay.debug_interp")
+@tvm._ffi.register_func("relay.debug_interp")
def _debug_interp(*args):
_, _, _, ist = args
print("Relay Debugger")
# pylint: disable=invalid-name, import-self, unused-argument, unused-variable
# pylint: disable=inconsistent-return-statements, import-outside-toplevel
"""CoreML frontend."""
-from __future__ import absolute_import as _abs
import math
import numpy as np
import tvm
DarkNet symbol frontend for Relay.
"""
-from __future__ import absolute_import as _abs
from enum import Enum
import numpy as np
import tvm
# under the License.
# pylint: disable=invalid-name, import-self, len-as-condition, no-else-return, too-many-lines
"""MXNet symbol frontend."""
-from __future__ import absolute_import as _abs
-
import json
import numpy as np
import tvm
val = inputs[0]
dtype = type(val)
- if isinstance(val, tvm.expr.IntImm):
+ if isinstance(val, tvm.tir.IntImm):
val = val.__int__()
dtype = int
# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition, broad-except
# pylint: disable=import-outside-toplevel
"""TF: Tensorflow frontend."""
-from __future__ import absolute_import as _abs
-from __future__ import print_function
-
import warnings
from collections import defaultdict
'Attribute batch_dims is not supported')
new_input = inputs[0:2]
return AttrCvt(op_name="take",
- extras={'axis': tvm.const(axis, 'int32')},
+ extras={'axis': tvm.tir.const(axis, 'int32')},
ignores=['Tindices', 'Tparams', 'validate_indices',
'Taxis', '_class', 'batch_dims'])(new_input, attr)
return _impl
# specific language governing permissions and limitations
# under the License.
# pylint: disable=invalid-name, unused-argument, too-many-lines, import-outside-toplevel
-
"""Tensorflow lite frontend."""
import math
import numpy as np
"""Backend compiler related feature registration"""
from __future__ import absolute_import
+from tvm.runtime import convert
from topi.util import get_const_int, get_const_tuple
from . import op as _reg
-from ...api import convert
from ...hybrid import script
_reg.register_reduce_schedule("argmax")
# under the License.
#pylint: disable=invalid-name, unused-argument, len-as-condition
"""Backend compiler related feature registration"""
-from __future__ import absolute_import
import topi
+
+from tvm.runtime import convert
from topi.util import get_const_tuple
from .op import register_compute, register_shape_func
from .op import register_broadcast_schedule, register_injective_schedule
from .op import register_pattern, OpPattern
from ...hybrid import script
-from ...api import convert
register_broadcast_schedule("log")
# pylint: disable=invalid-name,unused-argument, len-as-condition, too-many-nested-blocks, too-many-local-variables, too-many-arguments
from __future__ import absolute_import
import tvm
+from tvm import te
+from tvm.runtime import convert
import topi
from topi.util import get_const_int, get_const_tuple
from . import op as _reg
from . import strategy
from .op import OpPattern
from ...hybrid import script
-from ...api import convert
_reg.register_broadcast_schedule("broadcast_to")
_reg.register_broadcast_schedule("broadcast_to_like")
output_shape.append(s)
else:
# see Any, replace it with a var
- output_shape.append(tvm.var("any_dim", "int32"))
+ output_shape.append(te.var("any_dim", "int32"))
new_output_type = tvm.relay.ty.TensorType(output_shape, "int32")
return [topi.argwhere(new_output_type, inputs[0])]
if keep_axes:
out = _squeeze_shape_func(inputs[0], convert(keep_axes))
else:
- out = tvm.compute((), lambda *indices: 0)
+ out = te.compute((), lambda *indices: 0)
return [out]
@script
data : relay.Expr
The input data tensor.
- valid_count : tvm.Tensor
+ valid_count : tvm.te.Tensor
The number of valid elements to be sorted.
axis : int, optional
import topi
from topi.util import get_const_tuple
+
+from tvm.runtime import convert
from .. import op as reg
from .. import strategy
from ..op import OpPattern
from .._tensor import elemwise_shape_func
-from ....api import convert
from ....hybrid import script
# relu
from ..base import register_relay_node
from ..expr import RelayExpr
-from ...api import register_func
from ...target import get_native_generic_func, GenericFunc
from ...runtime import Object
from . import _make
attrs : Attrs
Op attributes.
- inputs : list[tvm.tensor.Tensor]
+ inputs : list[te.tensor.Tensor]
The input tensors.
out_type : relay.Type
Returns
-------
- outs : list[tvm.tensor.Tensor]
+ outs : list[te.tensor.Tensor]
The output tensors.
"""
return _OpImplementationCompute(self, attrs, inputs, out_type)
attrs : Attrs
Op attributes.
- outs : list[tvm.tensor.Tensor]
+ outs : list[te.tensor.Tensor]
The output tensors.
target : tvm.target.Target
Returns
-------
- schedule : tvm.Schedule
+ schedule : tvm.te.Schedule
The schedule.
"""
return _OpImplementationSchedule(self, attrs, outs, target)
get(op_name).set_attr("TShapeDataDependant", data_dependant, level)
return register(op_name, "FShapeFunc", shape_func, level)
-@register_func("relay.op.compiler._lower")
+@tvm._ffi.register_func("relay.op.compiler._lower")
def _lower(name, schedule, inputs, outputs):
return lower(schedule, list(inputs) + list(outputs), name=name)
-@register_func("relay.op.compiler._build")
+@tvm._ffi.register_func("relay.op.compiler._build")
def _build(lowered_funcs):
return build(lowered_funcs, target="llvm")
if debug_func:
name = "debugger_func{}".format(__DEBUG_COUNTER__)
- register_func(name, debug_func)
+ tvm._ffi.register_func(name, debug_func)
__DEBUG_COUNTER__ += 1
else:
name = ''
# pylint: disable=invalid-name
"""Helper utility to save parameter dicts."""
import tvm
+import tvm._ffi
-_save_param_dict = tvm.get_global_func("tvm.relay._save_param_dict")
-_load_param_dict = tvm.get_global_func("tvm.relay._load_param_dict")
+
+_save_param_dict = tvm._ffi.get_global_func("tvm.relay._save_param_dict")
+_load_param_dict = tvm._ffi.get_global_func("tvm.relay._load_param_dict")
def save_param_dict(params):
"""Save parameter dictionary to binary bytes.
# under the License.
#pylint: disable=unused-argument, not-context-manager
"""Automatic quantization toolkit."""
-from __future__ import absolute_import
+import tvm.ir
+
from . import _quantize
from ._calibrate import calibrate
from .. import expr as _expr
from .. import transform as _transform
-from ... import make as _make
from ..base import Object, register_relay_node
"""
node_args = {k: v if k not in kwargs else kwargs[k]
for k, v in QConfig._node_defaults.items()}
- return _make.node("relay.quantize.QConfig", **node_args)
+ return tvm.ir.make_node("relay.quantize.QConfig", **node_args)
class QuantizeContext(object):
import numpy as np
import tvm
+from tvm import te
import tvm.relay as relay
import tvm.relay.op as op
from tvm.relay import transform
import os
import tvm
+
def ctx_list():
"""Get context list for testcases"""
device_list = os.environ.get("RELAY_TEST_TARGETS", "")
import functools
import tvm
+from tvm import te
from tvm.runtime import ndarray as _nd
from tvm.ir.transform import PassInfo, PassContext, Pass, ModulePass, Sequential, module_pass
import numpy as np
import tvm
+from tvm import te
from tvm import relay
# define a simple network.
x = relay.var('x', shape=(10, 10))
-----------
.. code-block:: python
- with tvm.build_config(dump_pass_ir=True)
+ with tvm.target.build_config(dump_pass_ir=True)
run()
"""
scope_level = 0
.. code-block:: python
import tvm
+from tvm import te
# wrap function as target generic
@tvm.target.override_native_generic_func("my_func")
def my_func(a):
.. code-block:: python
import tvm
+from tvm import te
# wrap function as target generic
@tvm.target.generic_func
def my_func(a):
"""Namespace for Tensor Expression Language
"""
# expose all operators in tvm tir.op
+from tvm.tir import any, all, min_value, max_value, trace
from tvm.tir import exp, erf, tanh, sigmoid, log, cos, sin, atan, sqrt, rsqrt, floor, ceil
from tvm.tir import trunc, abs, round, nearbyint, isnan, power, popcount, fmod, if_then_else
from tvm.tir import div, indexdiv, indexmod, truncdiv, truncmod, floordiv, floormod
from .tag import tag_scope
from .operation import placeholder, compute, scan, extern, var, size_var
from .operation import thread_axis, reduce_axis
+
+from .tensor import PlaceholderOp, ComputeOp, TensorComputeOp, ScanOp, ExternOp, HybridOp
.. code-block:: python
# The following code is equivalent to numpy.cumsum
- m = tvm.var("m")
- n = tvm.var("n")
- X = tvm.placeholder((m, n), name="X")
- s_state = tvm.placeholder((m, n))
- s_init = tvm.compute((1, n), lambda _, i: X[0, i])
- s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i])
- res = tvm.scan(s_init, s_update, s_state, X)
+ m = te.var("m")
+ n = te.var("n")
+ X = te.placeholder((m, n), name="X")
+ s_state = te.placeholder((m, n))
+ s_init = te.compute((1, n), lambda _, i: X[0, i])
+ s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i])
+ res = tvm.te.scan(s_init, s_update, s_state, X)
"""
if _tag.TagScope.get_current() is not None:
if tag != "":
.. code-block:: python
- A = tvm.placeholder((n, l), name="A")
- B = tvm.placeholder((l, m), name="B")
- C = tvm.extern((n, m), [A, B],
- lambda ins, outs: tvm.call_packed(
+ A = te.placeholder((n, l), name="A")
+ B = te.placeholder((l, m), name="B")
+ C = te.extern((n, m), [A, B],
+ lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.cblas.matmul",
ins[0], ins[1], outs[0], 0, 0), name="C")
"""
-------
.. code-block:: python
- n = tvm.var('n')
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((n, l), name='A')
- B = tvm.placeholder((m, l), name='B')
- k = tvm.reduce_axis((0, l), name='k')
+ n = te.var('n')
+ m = te.var('m')
+ l = te.var('l')
+ A = te.placeholder((n, l), name='A')
+ B = te.placeholder((m, l), name='B')
+ k = te.reduce_axis((0, l), name='k')
- with tvm.tag_scope(tag='matmul'):
- C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k] * B[j, k], axis=k))
+ with tvm.te.tag_scope(tag='matmul'):
+ C = te.compute((n, m), lambda i, j: te.sum(A[i, k] * B[j, k], axis=k))
# or use tag_scope as decorator
- @tvm.tag_scope(tag="conv")
+ @tvm.te.tag_scope(tag="conv")
def compute_relu(data):
- return tvm.compute(data.shape, lambda *i: tvm.select(data(*i) < 0, 0.0, data(*i)))
+ return te.compute(data.shape, lambda *i: tvm.select(data(*i) < 0, 0.0, data(*i)))
"""
return TagScope(tag)
# pylint: disable=unused-import, redefined-builtin
"""Namespace for Tensor-level IR"""
from tvm.ir import PrimExpr
+from tvm.runtime import const
+
from .buffer import Buffer, decl_buffer
from .data_layout import Layout, BijectiveLayout, bijective_layout, layout
from .expr import Var, SizeVar, Reduce, FloatImm, IntImm, StringImm, Cast
from .expr import Add, Sub, Mul, Div, Mod, FloorDiv, FloorMod
from .expr import Min, Max, EQ, NE, LT, LE, GT, GE, And, Or, Not
from .expr import Select, Load, Ramp, Broadcast, Shuffle, Call, Let
-from .expr import IterVar
+from .expr import IterVar, Any
from .stmt import Stmt, LetStmt, AssertStmt, ProducerConsumer, For
from .stmt import Store, Provide, Allocate, AttrStmt, Free, Realize, SeqStmt
from .stmt import IfThenElse, Evaluate, Prefetch, LoweredFunc, stmt_seq, stmt_list
from .op import call_packed, call_pure_intrin, call_intrin, call_pure_extern, call_extern
-from .op import call_llvm_intrin, all, any, min_value, max_value
+from .op import call_llvm_intrin, all, any, min_value, max_value, trace
from .op import exp, erf, tanh, sigmoid, log, cos, sin, atan, sqrt, rsqrt, floor, ceil
from .op import trunc, abs, round, nearbyint, isnan, power, popcount, fmod, if_then_else
from .op import div, indexdiv, indexmod, truncdiv, truncmod, floordiv, floormod
.. code-block:: python
- m0, m1, m2 = tvm.var("m0"), tvm.var("m1"), tvm.var("m2")
- n0, n1, n2 = tvm.var("n0"), tvm.var("n1"), tvm.var("n2")
- o0, o1, o2 = tvm.var("o0"), tvm.var("o1"), tvm.var("o2")
- A = tvm.placeholder((m0, m1, m2), name='A')
- B = tvm.placeholder((n0, n1, n2), name='B')
- C = tvm.compute((o0, o1, o2), lambda i, j, k: A[i, j, k] + B[i, j, k], name='C')
+ m0, m1, m2 = te.var("m0"), te.var("m1"), te.var("m2")
+ n0, n1, n2 = te.var("n0"), te.var("n1"), te.var("n2")
+ o0, o1, o2 = te.var("o0"), te.var("o1"), te.var("o2")
+ A = te.placeholder((m0, m1, m2), name='A')
+ B = te.placeholder((n0, n1, n2), name='B')
+ C = te.compute((o0, o1, o2), lambda i, j, k: A[i, j, k] + B[i, j, k], name='C')
Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast")
Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast")
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
fadd = tvm.build(s, [A, B, C], target='llvm', name='bcast_add', binds={A:Ab, B:Bb})
ctx = tvm.cpu(0)
a = tvm.nd.array(np.random.uniform(size=(2, 4, 3)).astype(A.dtype), ctx)
.. code-block:: python
- x = tvm.var("n")
+ x = te.var("n")
y = x + 2
assert(isinstance(y, tvm.tir.Add))
assert(y.a == x)
def __nonzero__(self):
raise ValueError("Cannot use and / or / not operator to Expr, hint: " +
- "use tvm.all / tvm.any instead")
+ "use tvm.tir.all / tvm.tir.any instead")
def __bool__(self):
return self.__nonzero__()
See Also
--------
- tvm.thread_axis: Create thread axis IterVar.
- tvm.reduce_axis: Create reduce axis IterVar.
+ te.thread_axis: Create thread axis IterVar.
+ te.reduce_axis: Create reduce axis IterVar.
"""
DataPar = 0
ThreadIndex = 1
Note
----
Select may compute both true_value and false_value.
- Use :py:class:`tvm.if_then_else` instead if you want to
+ Use :py:class:`tvm.tir.if_then_else` instead if you want to
get a conditional expression that only evaluates
the correct branch.
# under the License.
"""Generic opertors in TVM.
We follow the numpy naming convention for this interface
-(e.g., tvm.generic.multitply ~ numpy.multiply).
+(e.g., tvm.tir.generic.multitply ~ numpy.multiply).
The default implementation is used by tvm.ExprOp.
"""
# pylint: disable=unused-argument
--------
.. code-block:: python
- ib = tvm.ir_builder.create()
- n = tvm.var("n")
+ ib = tvm.tir.ir_builder.create()
+ n = te.var("n")
A = ib.allocate("float32", n, name="A")
with ib.for_range(0, n, name="i") as i:
with ib.if_scope((i % 2) == 0):
--------
.. code-block:: python
- ib = tvm.ir_builder.create()
- i = tvm.var("i")
+ ib = tvm.tir.ir_builder.create()
+ i = te.var("i")
x = ib.pointer("float32")
ib.scope_attr(x, "storage_scope", "global")
x[i] = x[i - 1] + 1
--------
.. code-block:: python
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
x = ib.pointer("float32")
with ib.for_range(1, 10, name="i") as i:
x[i] = x[i - 1] + 1
--------
.. code-block:: python
- ib = tvm.ir_builder.create()
- i = tvm.var("i")
+ ib = tvm.tir.ir_builder.create()
+ i = te.var("i")
x = ib.pointer("float32")
with ib.if_scope((i % 2) == 0):
x[i] = x[i - 1] + 1
--------
.. code-block:: python
- ib = tvm.ir_builder.create()
- i = tvm.var("i")
+ ib = tvm.tir.ir_builder.create()
+ i = te.var("i")
x = ib.pointer("float32")
with ib.if_scope((i % 2) == 0):
x[i] = x[i - 1] + 1
See Also
--------
- tvm.extern : Create tensor with extern function call.
+ te.extern : Create tensor with extern function call.
"""
call_args = [_pack_buffer(x) if isinstance(x, Buffer) else x for x in args]
return Call(
from tvm.target import codegen
llvm_id = codegen.llvm_lookup_intrinsic_id(name)
assert llvm_id != 0, "%s is not an LLVM intrinsic" % name
- return call_pure_intrin(dtype, 'llvm_intrin', tvm.const(llvm_id, 'uint32'), *args)
+ return call_pure_intrin(dtype, 'llvm_intrin', tvm.tir.const(llvm_id, 'uint32'), *args)
def any(*args):
tvm.tir.call_packed : Creates packed function.
"""
if not isinstance(args, list):
- raise Exception("tvm.trace consumes the args as list type")
+ raise Exception("tvm.tir.trace consumes the args as list type")
call_args = [_pack_buffer(x) if isinstance(x, Buffer) else x for x in args]
call_args.insert(0, trace_action)
return tvm.tir.Call(
def nearbyint(x):
"""Round elements of the array to the nearest integer.
This intrinsic uses llvm.nearbyint instead of llvm.round
- which is faster but will results different from tvm.round.
+ which is faster but will results different from te.round.
Notably nearbyint rounds according to the rounding mode,
- whereas tvm.round (llvm.round) ignores that.
+ whereas te.round (llvm.round) ignores that.
For differences between the two see:
https://en.cppreference.com/w/cpp/numeric/math/round
https://en.cppreference.com/w/cpp/numeric/math/nearbyint
-------
.. code-block:: python
- n = tvm.var("n")
- m = tvm.var("m")
- mysum = tvm.comm_reducer(lambda x, y: x+y,
- lambda t: tvm.const(0, dtype=t), name="mysum")
- A = tvm.placeholder((n, m), name="A")
- k = tvm.reduce_axis((0, m), name="k")
- B = tvm.compute((n,), lambda i: mysum(A[i, k], axis=k), name="B")
+ n = te.var("n")
+ m = te.var("m")
+ mysum = te.comm_reducer(lambda x, y: x+y,
+ lambda t: tvm.tir.const(0, dtype=t), name="mysum")
+ A = te.placeholder((n, m), name="A")
+ k = te.reduce_axis((0, m), name="k")
+ B = te.compute((n,), lambda i: mysum(A[i, k], axis=k), name="B")
"""
def _reduce_directly(*args):
num = len(args)
-------
.. code-block:: python
- m = tvm.var("m")
- n = tvm.var("n")
- A = tvm.placeholder((m, n), name="A")
- k = tvm.reduce_axis((0, n), name="k")
+ m = te.var("m")
+ n = te.var("n")
+ A = te.placeholder((m, n), name="A")
+ k = te.reduce_axis((0, n), name="k")
# there are two way to use this {0} reducer:
# mode 1, accept (expr, axis, where) to produce an Reduce Expr
- B = tvm.compute((m,), lambda i: tvm.{0}(A[i, k], axis=k), name="B")
+ B = te.compute((m,), lambda i: tvm.{0}(A[i, k], axis=k), name="B")
# mode 2, simply use it with multiple Exprs:
{0}_res = tvm.{0}(m, n)
.. code-block:: python
- x = tvm.var("n")
- a = tvm.var("array", tvm.handle)
+ x = te.var("n")
+ a = te.var("array", "handle")
st = tvm.tir.stmt.Store(a, x + 1, 1)
assert isinstance(st, tvm.tir.stmt.Store)
assert(st.buffer_var == a)
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import testing
from tvm.contrib import graph_runtime, cc
import sys
import tvm
+from tvm import te
from tvm.contrib import cc
def main(target, out_dir):
- n = tvm.var('n')
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda i: A[i] + B[i], name='C')
- s = tvm.create_schedule(C.op)
+ n = te.var('n')
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda i: A[i] + B[i], name='C')
+ s = te.create_schedule(C.op)
if target == 'cuda':
bx, tx = s[C].split(C.op.axis[0], factor=64)
- s[C].bind(bx, tvm.thread_axis('blockIdx.x'))
- s[C].bind(tx, tvm.thread_axis('threadIdx.x'))
+ s[C].bind(bx, te.thread_axis('blockIdx.x'))
+ s[C].bind(tx, te.thread_axis('threadIdx.x'))
fadd = tvm.build(s, [A, B, C], target, target_host='llvm', name='myadd')
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import testing
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import testing
import sys
import tvm
+from tvm import te
def main():
- n = tvm.var('n')
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
- s = tvm.create_schedule(C.op)
+ n = te.var('n')
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
+ s = tvm.te.create_schedule(C.op)
s[C].parallel(s[C].op.axis[0])
print(tvm.lower(s, [A, B, C], simple_mode=True))
tvm.build(s, [A, B, C], 'llvm --system-lib').save(osp.join(sys.argv[1], 'test.o'))
import sys
import tvm
+from tvm import te
from tvm.contrib import cc
def main():
- n = tvm.var('n')
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
- s = tvm.create_schedule(C.op)
+ n = te.var('n')
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
+ s = tvm.te.create_schedule(C.op)
s[C].parallel(s[C].op.axis[0])
print(tvm.lower(s, [A, B, C], simple_mode=True))
obj_file = osp.join(sys.argv[1], 'test.o')
"""
import tvm
+from tvm import te
import subprocess
from tvm.contrib import util
from tvm.contrib import cc
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
import topi.testing
from tvm.contrib import cblas
-def verify_matmul_add(m, l, n, transa=False, transb=False, dtype=tvm.float32):
- bias = tvm.var('bias', dtype=dtype)
+def verify_matmul_add(m, l, n, transa=False, transb=False, dtype="float32"):
+ bias = te.var('bias', dtype=dtype)
ashape = (l, n) if transa else (n, l)
bshape = (m, l) if transb else (l, m)
- A = tvm.placeholder(ashape, name='A', dtype=dtype)
- B = tvm.placeholder(bshape, name='B', dtype=dtype)
+ A = te.placeholder(ashape, name='A', dtype=dtype)
+ B = te.placeholder(bshape, name='B', dtype=dtype)
C = cblas.matmul(A, B, transa, transb)
- D = tvm.compute(C.shape, lambda i, j: C[i,j] + bias, name="D")
- s = tvm.create_schedule(D.op)
+ D = te.compute(C.shape, lambda i, j: C[i,j] + bias, name="D")
+ s = te.create_schedule(D.op)
def get_numpy(a, b, bb, transa, transb):
if transa:
verify_matmul_add(1, 16, 3, False, False)
verify_matmul_add(1, 16, 3, True, True)
-def verify_batch_matmul(batch, m, l, n, transa=False, transb=False, iterative=False, dtype=tvm.float32):
+def verify_batch_matmul(batch, m, l, n, transa=False, transb=False, iterative=False, dtype="float32"):
ashape = (batch, l, n) if transa else (batch, n, l)
bshape = (batch, m, l) if transb else (batch, l, m)
- A = tvm.placeholder(ashape, name='A', dtype=dtype)
- B = tvm.placeholder(bshape, name='B', dtype=dtype)
+ A = te.placeholder(ashape, name='A', dtype=dtype)
+ B = te.placeholder(bshape, name='B', dtype=dtype)
C = cblas.batch_matmul(A, B, transa, transb)
- D = tvm.compute(C.shape, lambda k, i, j: C[k, i,j], name="D")
- s = tvm.create_schedule(D.op)
+ D = te.compute(C.shape, lambda k, i, j: C[k, i,j], name="D")
+ s = te.create_schedule(D.op)
def get_numpy(a, b, transa, transb):
if transa:
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm.contrib import cublas
from tvm.contrib import cublaslt
n = 1024
l = 128
m = 236
- A = tvm.placeholder((n, l), name='A', dtype=in_dtype)
- B = tvm.placeholder((l, m), name='B', dtype=in_dtype)
+ A = te.placeholder((n, l), name='A', dtype=in_dtype)
+ B = te.placeholder((l, m), name='B', dtype=in_dtype)
C = cublas.matmul(A, B, dtype=out_dtype)
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
def verify(target="cuda"):
if not tvm.runtime.enabled(target):
N = roundoff(n, 8)
N_out = roundoff(n, 32)
- A = tvm.placeholder((N, L), name='A', dtype=in_dtype)
- B = tvm.placeholder((m, L), name='B', dtype=in_dtype)
+ A = te.placeholder((N, L), name='A', dtype=in_dtype)
+ B = te.placeholder((m, L), name='B', dtype=in_dtype)
# C has CUBLASLT_ORDER_COL32 layout, thus a different shape
C = cublaslt.matmul(A, B, False, True, m, N_out, dtype=out_dtype)
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
def verify(target="cuda"):
if not tvm.runtime.enabled(target):
n = 1024
l = 128
m = 236
- A = tvm.placeholder((j, n, l), name='A', dtype=in_dtype)
- B = tvm.placeholder((j, l, m), name='B', dtype=in_dtype)
+ A = te.placeholder((j, n, l), name='A', dtype=in_dtype)
+ B = te.placeholder((j, l, m), name='B', dtype=in_dtype)
C = cublas.batch_matmul(A, B, dtype=out_dtype)
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
def verify(target="cuda"):
if not tvm.runtime.enabled(target):
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm.contrib import cudnn
import numpy as np
import topi.testing
xshape = [batch, height, weight, in_channel]
wshape = [out_channel, filter_h, filter_w, in_channel]
- X = tvm.placeholder(xshape, name='X', dtype=data_dtype)
- W = tvm.placeholder(wshape, name='W', dtype=data_dtype)
+ X = te.placeholder(xshape, name='X', dtype=data_dtype)
+ W = te.placeholder(wshape, name='W', dtype=data_dtype)
Y = cudnn.conv_forward(X,
W,
[pad_h, pad_w],
conv_dtype=conv_dtype,
algo=-1)
yshape = [x.value for x in Y.shape]
- s = tvm.create_schedule(Y.op)
+ s = te.create_schedule(Y.op)
def verify():
ctx = tvm.gpu(0)
xshape = [batch, in_channel, depth, height, weight]
wshape = [out_channel, in_channel, filter_d, filter_h, filter_w]
- X = tvm.placeholder(xshape, name='X', dtype=data_dtype)
- W = tvm.placeholder(wshape, name='W', dtype=data_dtype)
+ X = te.placeholder(xshape, name='X', dtype=data_dtype)
+ W = te.placeholder(wshape, name='W', dtype=data_dtype)
Y = cudnn.conv_forward(X,
W,
[pad_d, pad_h, pad_w],
algo=-1,
conv_dtype=conv_dtype)
yshape = [x.value for x in Y.shape]
- s = tvm.create_schedule(Y.op)
+ s = te.create_schedule(Y.op)
def verify():
ctx = tvm.gpu(0)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm.contrib.dlpack import to_pytorch_func
np.testing.assert_equal(y.asnumpy(), tvm_x.asnumpy())
np.testing.assert_equal(torch.utils.dlpack.from_dlpack(y.to_dlpack()).numpy(), tvm_x.asnumpy())
- n = tvm.convert(137)
+ n = tvm.runtime.convert(137)
xx = torch.rand(137,137)
yy = torch.rand(137,137)
zz2 = torch.empty(137,137)
zz = xx.mm(yy)
- XX = tvm.placeholder((n,n), name='X')
- YY = tvm.placeholder((n,n), name='Y')
+ XX = te.placeholder((n,n), name='X')
+ YY = te.placeholder((n,n), name='Y')
- k = tvm.reduce_axis((0, n), name='k')
- ZZ = tvm.compute((n,n), lambda i,j : tvm.sum(XX[i,k]*YY[k,j], axis=k))
- s = tvm.create_schedule(ZZ.op)
+ k = te.reduce_axis((0, n), name='k')
+ ZZ = te.compute((n,n), lambda i,j : te.sum(XX[i,k]*YY[k,j], axis=k))
+ s = te.create_schedule(ZZ.op)
f = tvm.build(s, [XX, YY, ZZ], target_host='llvm', name='f')
f_pytorch = to_pytorch_func(f)
# under the License.
import os
import tvm
+from tvm import te
import numpy as np
from tvm import rpc
from tvm.contrib import util, tflite_runtime
# under the License.
# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition
import tvm
+from tvm import te
import numpy as np
from topi.x86.tensor_intrin import dot_16x1x16_uint8_int8_int16
n = 128
k = 128
- X = tvm.placeholder((m, k), name='X', dtype="uint8")
- W = tvm.placeholder((n, k), name='W', dtype="int8")
+ X = te.placeholder((m, k), name='X', dtype="uint8")
+ W = te.placeholder((n, k), name='W', dtype="int8")
peak = 512/16*2*2*2
gops_per_mm = 2*n*m*k
return
ctx = tvm.context(target, 0)
- X = tvm.placeholder((m, k), name='X', dtype="uint8")
- W = tvm.placeholder((n, k), name='W', dtype="int8")
+ X = te.placeholder((m, k), name='X', dtype="uint8")
+ W = te.placeholder((n, k), name='W', dtype="int8")
pc = dot_16x1x16_uint8_int8_int16()
- ak = tvm.reduce_axis((0, k), name='k')
+ ak = te.reduce_axis((0, k), name='k')
- packedW = tvm.placeholder((n//128, 128*(k//2), 2), name='packedW', dtype="int8")
- t_fc = tvm.compute((m, n), lambda i, j: tvm.sum(X[i, ak].astype("int16") * packedW[j//128, (ak//2)*128+j%128, ak%2].astype("int16"), axis=ak), name="F")
+ packedW = te.placeholder((n//128, 128*(k//2), 2), name='packedW', dtype="int8")
+ t_fc = te.compute((m, n), lambda i, j: te.sum(X[i, ak].astype("int16") * packedW[j//128, (ak//2)*128+j%128, ak%2].astype("int16"), axis=ak), name="F")
- t_sch = tvm.create_schedule(t_fc.op)
+ t_sch = te.create_schedule(t_fc.op)
a_x, a_y = t_fc.op.axis
a_k, = t_fc.op.reduce_axis
# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition
import tvm
+from tvm import te
import numpy as np
from topi.x86.tensor_intrin import dot_16x1x16_uint8_int8_int32_cascadelake
from topi.x86.tensor_intrin import dot_16x1x16_uint8_int8_int32
n = 1024
k = 1024
- X = tvm.placeholder((m, k), name='X', dtype="uint8")
- W = tvm.placeholder((n, k), name='W', dtype="int8")
+ X = te.placeholder((m, k), name='X', dtype="uint8")
+ W = te.placeholder((n, k), name='W', dtype="int8")
peak = 280
print("Peak {} Gops/s".format(peak))
ctx = tvm.context(target, 0)
pc = dot_16x1x16_uint8_int8_int32_cascadelake()
- ak = tvm.reduce_axis((0, k), name='k')
- packedW = tvm.placeholder(
+ ak = te.reduce_axis((0, k), name='k')
+ packedW = te.placeholder(
(n // 16, 16 * (k // 4), 4), name='packedW', dtype="int8")
- t_fc = tvm.compute((m, n), lambda i, j: tvm.sum(X[i, ak].astype(
+ t_fc = te.compute((m, n), lambda i, j: te.sum(X[i, ak].astype(
"int32") * packedW[j / 16, (ak / 4) * 16 + j % 16, ak % 4].astype("int32"), axis=ak), name="F")
- t_sch = tvm.create_schedule(t_fc.op)
+ t_sch = te.create_schedule(t_fc.op)
a_x, a_y = t_fc.op.axis
a_k, = t_fc.op.reduce_axis
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm.contrib import miopen
import numpy as np
return
wshape = (out_channel, in_channel, filter_h, filter_w)
- X = tvm.placeholder(xshape, name='X')
- W = tvm.placeholder(wshape, name='W')
+ X = te.placeholder(xshape, name='X')
+ W = te.placeholder(wshape, name='W')
Y = miopen.conv2d_forward(X,
W,
stride_h,
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm.contrib import mps
n = 1024
l = 128
m = 256
- A = tvm.placeholder((n, l), name='A')
- B = tvm.placeholder((l, m), name='B')
+ A = te.placeholder((n, l), name='A')
+ B = te.placeholder((l, m), name='B')
C = mps.matmul(A, B)
- D = tvm.compute(
+ D = te.compute(
C.shape,
lambda *i: C(*i) + 1.
)
- s = tvm.create_schedule(D.op)
+ s = te.create_schedule(D.op)
yo, xo = D.op.axis
- block_y = tvm.thread_axis("blockIdx.y")
- block_x = tvm.thread_axis("blockIdx.x")
- thread_y = tvm.thread_axis("threadIdx.y")
- thread_x = tvm.thread_axis("threadIdx.x")
+ block_y = te.thread_axis("blockIdx.y")
+ block_x = te.thread_axis("blockIdx.x")
+ thread_y = te.thread_axis("threadIdx.y")
+ thread_x = te.thread_axis("threadIdx.x")
by, ty = s[D].split(yo, factor=16)
bx, tx = s[D].split(xo, factor=16)
s[D].bind(by, block_y)
kh = 3
kw = 3
stride = 2
- A = tvm.placeholder((n, h, w, ci), name="x")
- B = tvm.placeholder((co, kh, kw, ci), name="w")
+ A = te.placeholder((n, h, w, ci), name="x")
+ B = te.placeholder((co, kh, kw, ci), name="w")
C = mps.conv2d(A, B, 'SAME', 2)
- s1 = tvm.create_schedule(C.op)
+ s1 = te.create_schedule(C.op)
def verify(A, B, C, target="llvm"):
if not tvm.get_global_func("tvm.contrib.mps.conv2d", True):
import mxnet as mx
import topi
import tvm
+ from tvm import te
import numpy as np
from tvm.contrib.mxnet import to_mxnet_func
# build a TVM function through topi
n = 20
shape = (20,)
- scale = tvm.var("scale", dtype="float32")
- x = tvm.placeholder(shape)
- y = tvm.placeholder(shape)
+ scale = te.var("scale", dtype="float32")
+ x = te.placeholder(shape)
+ y = te.placeholder(shape)
z = topi.broadcast_add(x, y)
- zz = tvm.compute(shape, lambda *i: z(*i) * scale)
+ zz = te.compute(shape, lambda *i: z(*i) * scale)
target = tvm.target.cuda()
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
import scipy.signal
from topi.nn.util import get_pad_tuple
n = 1024
l = 128
m = 235
- bias = tvm.var('bias', dtype=tvm.float32)
- A = tvm.placeholder((l, ), name='A')
- B = tvm.placeholder((m, l), name='B')
+ bias = te.var('bias', dtype="float32")
+ A = te.placeholder((l, ), name='A')
+ B = te.placeholder((m, l), name='B')
C = nnpack.fully_connected_inference(A, B)
- D = tvm.compute(C.shape, lambda i: C[i] + bias, name="D")
- s = tvm.create_schedule(D.op)
+ D = te.compute(C.shape, lambda i: C[i] + bias, name="D")
+ s = te.create_schedule(D.op)
def verify(target="llvm"):
if not tvm.runtime.enabled(target):
bshape = (OC, )
oshape = (BATCH, OC, OH, OW)
- data = tvm.placeholder(dshape, name='data')
- kernel = tvm.placeholder(kshape, name='kernel')
- bias = tvm.placeholder(bshape, name='bias')
+ data = te.placeholder(dshape, name='data')
+ kernel = te.placeholder(kshape, name='kernel')
+ bias = te.placeholder(bshape, name='bias')
def verify(target="llvm",
algorithm=nnpack.ConvolutionAlgorithm.AUTO,
with_bias=True):
data, kernel, bias if with_bias else None,
[PAD, PAD, PAD, PAD], [STRIDE, STRIDE],
algorithm=algorithm)
- s = tvm.create_schedule(output.op)
+ s = te.create_schedule(output.op)
f = tvm.build(s, [data, kernel, bias, output], target)
bshape = (OC, )
oshape = (BATCH, OC, OH, OW)
- data = tvm.placeholder(dshape, name='data')
- kernel = tvm.placeholder(kshape, name='kernel')
- bias = tvm.placeholder(bshape, name='bias')
+ data = te.placeholder(dshape, name='data')
+ kernel = te.placeholder(kshape, name='kernel')
+ bias = te.placeholder(bshape, name='bias')
def verify(target="llvm",
algorithm=nnpack.ConvolutionAlgorithm.AUTO,
with_bias=True):
[PAD, PAD, PAD, PAD], [STRIDE, STRIDE],
algorithm=algorithm)
- s = tvm.create_schedule(output.op)
+ s = te.create_schedule(output.op)
f = tvm.build(s, [data, kernel, bias, output], target)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm.contrib import random
m = 1024
n = 1024
A = random.randint(-127, 128, size=(m, n), dtype='int32')
- s = tvm.create_schedule(A.op)
+ s = te.create_schedule(A.op)
def verify(target="llvm"):
if not tvm.runtime.enabled(target):
m = 1024
n = 1024
A = random.uniform(0, 1, size=(m, n))
- s = tvm.create_schedule(A.op)
+ s = te.create_schedule(A.op)
def verify(target="llvm"):
if not tvm.runtime.enabled(target):
m = 1024
n = 1024
A = random.normal(3, 4, size=(m, n))
- s = tvm.create_schedule(A.op)
+ s = te.create_schedule(A.op)
def verify(target="llvm"):
if not tvm.runtime.enabled(target):
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm.contrib import rocblas
n = 1024
l = 128
m = 235
- A = tvm.placeholder((n, l), name='A')
- B = tvm.placeholder((l, m), name='B')
+ A = te.placeholder((n, l), name='A')
+ B = te.placeholder((l, m), name='B')
C = rocblas.matmul(A, B)
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
def verify(target="rocm"):
if not tvm.runtime.enabled(target):
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import logging
import numpy as np
import time
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import logging
import numpy as np
import time
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
def test_sort():
n = 2
l = 5
m = 3
- data = tvm.placeholder((n, l, m), name='data')
- sort_num = tvm.placeholder((n, m), name="sort_num", dtype="int32")
+ data = te.placeholder((n, l, m), name='data')
+ sort_num = te.placeholder((n, m), name="sort_num", dtype="int32")
axis = 1
is_ascend = False
- out = tvm.extern(data.shape, [data, sort_num],
- lambda ins, outs: tvm.call_packed(
+ out = te.extern(data.shape, [data, sort_num],
+ lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.sort.argsort_nms", ins[0],
ins[1], outs[0], axis, is_ascend),
dtype='int32', name="sort_tensor")
ctx = tvm.cpu(0)
target = "llvm"
- s = tvm.create_schedule(out.op)
+ s = te.create_schedule(out.op)
f = tvm.build(s, [data, sort_num, out], target)
a = tvm.nd.array(np.array(input).astype(data.dtype), ctx)
b = tvm.nd.array(np.array(sort_num_input).astype(sort_num.dtype), ctx)
axis = 4
reduced_shape = (1, 2, 3, 4, 6)
is_ascend = True
- data = tvm.placeholder(dshape, name='data')
- sort_num = tvm.placeholder(reduced_shape, name="sort_num", dtype="int32")
- out = tvm.extern(data.shape, [data, sort_num],
- lambda ins, outs: tvm.call_packed(
+ data = te.placeholder(dshape, name='data')
+ sort_num = te.placeholder(reduced_shape, name="sort_num", dtype="int32")
+ out = te.extern(data.shape, [data, sort_num],
+ lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.sort.argsort_nms", ins[0],
ins[1], outs[0], axis, is_ascend),
dtype='int32', name="sort_tensor")
ctx = tvm.cpu(0)
target = "llvm"
- s = tvm.create_schedule(out.op)
+ s = te.create_schedule(out.op)
f = tvm.build(s, [data, sort_num, out], target)
np_data = np.random.uniform(size=dshape)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import tvm.contrib.sparse as tvmsp
import tvm.runtime.ndarray as _nd
import numpy as np
stype = 'csr'
target = 'llvm'
ctx = tvm.context(target, 0)
- m = tvm.size_var('m')
- n = tvm.size_var('n')
+ m = te.size_var('m')
+ n = te.size_var('n')
A = tvmsp.placeholder(shape=(m, n), name='A', dtype=dtype)
assert(A.stype == 'csr')
n = 3
a = np.maximum(np.random.uniform(size=(n,n)).astype(dtype)-.6, 0.)
a = tvmsp.array(a, ctx)
- A.data = tvm.placeholder(a.data.shape, dtype, name='A_data')
- Ab = tvm.decl_buffer(a.data.shape, dtype, name='A_data')
+ A.data = te.placeholder(a.data.shape, dtype, name='A_data')
+ Ab = tvm.tir.decl_buffer(a.data.shape, dtype, name='A_data')
binds = {A.data: Ab}
- C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
- s = tvm.create_schedule(C.op)
+ C = te.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
+ s = te.create_schedule(C.op)
f = tvm.build(s, [A.data, C], target, binds=binds)
c = tvmsp.array(np.zeros((n,n), dtype), ctx)
c.data = tvm.nd.empty(a.data.shape, dtype)
stype = 'csr'
target = 'llvm'
ctx = tvm.context(target, 0)
- nr, nc, n = tvm.size_var('nr'), tvm.size_var('nc'), tvm.size_var('n')
+ nr, nc, n = te.size_var('nr'), te.size_var('nc'), te.size_var('n')
A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, name='A', dtype=dtype)
assert(A.stype == 'csr')
- C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
- s = tvm.create_schedule(C.op)
+ C = te.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
+ s = te.create_schedule(C.op)
_nr, _nc = 3, 5
a = np.maximum(np.random.uniform(size=(_nr, _nc)).astype(dtype)-.6, 0.)
a = tvmsp.array(a, ctx)
assert a.data.dtype == a.dtype
Ab = namedtuple('CSRBuffer', ['data', 'indices', 'indptr'])
- Ab.data = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_data')
- Ab.indices = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_indices')
+ Ab.data = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_data')
+ Ab.indices = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_indices')
binds = {A.data: Ab.data, A.indices: Ab.indices}
f = tvm.build(s, [nr, A.data, C], target, binds=binds)
c = tvmsp.array(np.zeros((_nr, _nc), dtype), ctx)
stype = 'csr'
target = 'llvm'
ctx = tvm.context(target, 0)
- nr, nc, n = tvm.size_var('nr'), tvm.size_var('nc'), tvm.size_var('n')
+ nr, nc, n = te.size_var('nr'), te.size_var('nc'), te.size_var('n')
A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, name='A', dtype=dtype)
assert(A.stype == 'csr')
- C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
- s = tvm.create_schedule(C.op)
+ C = te.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
+ s = te.create_schedule(C.op)
_nr, _nc = 3, 5
a = np.maximum(np.random.uniform(size=(_nr, _nc)).astype(dtype)-.6, 0.)
# convert to sparse array tuple
a = tvmsp.array(a_init, shape=source_array.shape, ctx=ctx)
assert a.data.dtype == a.dtype
Ab = namedtuple('CSRBuffer', ['data', 'indices', 'indptr'])
- Ab.data = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_data')
- Ab.indices = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_indices')
+ Ab.data = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_data')
+ Ab.indices = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_indices')
binds = {A.data: Ab.data, A.indices: Ab.indices}
f = tvm.build(s, [nr, A.data, C], target, binds=binds)
c = tvmsp.array(np.zeros((_nr, _nc), dtype), ctx)
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-import tvm
+from tvm import te
import numpy as np
import re
import topi
return not {'graphviz', 'ipython'} - {pkg.key for pkg in pkg_resources.working_set}
def test_dfg():
- A = tvm.placeholder((1024, 4096), dtype='float32', name='A')
+ A = te.placeholder((1024, 4096), dtype='float32', name='A')
B = topi.nn.softmax(A)
# confirm lower works
- s = tvm.create_schedule([B.op])
+ s = te.create_schedule([B.op])
def verify():
from tvm.contrib import tedd
findany(r"Stage_2:O_0 -> Tensor_2_0", str)
findany(r"Tensor_2_0 -> Stage_3:I_0", str)
findany(r"Stage_3:O_0 -> Tensor_3_0", str)
- findany(r"Tensor_2_0 -> Stage_4:I_0", str)
+ findany(r"Tensor_2_0 -> Stage_4:I_0", str)
findany(r"Tensor_3_0 -> Stage_4:I_1", str)
findany(r"Stage_4:O_0 -> Tensor_4_0", str)
if checkdepdency():
def test_itervar_relationship_graph():
- n = tvm.var("n")
- m = tvm.var("m")
- A = tvm.placeholder((n, m), name='A')
- k = tvm.reduce_axis((0, m), "k")
- B = tvm.compute((n, ), lambda i: tvm.sum(A[i, k], axis=k), name="B")
+ n = te.var("n")
+ m = te.var("m")
+ A = te.placeholder((n, m), name='A')
+ k = te.reduce_axis((0, m), "k")
+ B = te.compute((n, ), lambda i: te.sum(A[i, k], axis=k), name="B")
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
s[B].split(B.op.reduce_axis[0], factor=16)
def verify():
def test_schedule_tree():
- block_x = tvm.thread_axis('blockIdx.x')
- thread_x = tvm.thread_axis('threadIdx.x')
- n = tvm.var("n")
- m = tvm.var("m")
- l = tvm.var("l")
- A = tvm.placeholder((n, m, l), name='A')
- B = tvm.compute((n, m, l), lambda bi, bj, bk: A[bi, bj, bk] + 1, name='B')
- r = tvm.reduce_axis((0, m), "r")
- C = tvm.compute((n, m,),
- lambda ci, cj: tvm.sum(B[ci, cj, r], axis=r),
- name="C")
- s = tvm.create_schedule(C.op)
+ block_x = te.thread_axis('blockIdx.x')
+ thread_x = te.thread_axis('threadIdx.x')
+ n = te.var("n")
+ m = te.var("m")
+ l = te.var("l")
+ A = te.placeholder((n, m, l), name='A')
+ B = te.compute((n, m, l), lambda bi, bj, bk: A[bi, bj, bk] + 1, name='B')
+ r = te.reduce_axis((0, m), "r")
+ C = te.compute((n, m,),
+ lambda ci, cj: te.sum(B[ci, cj, r], axis=r),
+ name="C")
+ s = te.create_schedule(C.op)
s.cache_read(A, 'shared', [B])
s[B].vectorize(B.op.axis[-1])
s[C].reorder(C.op.reduce_axis[0], C.op.axis[0])
str = tedd.viz_schedule_tree(s, False, '', True)
findany(r"digraph \"Schedule Tree\"", str)
findany(r"subgraph cluster_legend", str)
- # Check the A_shared stage, including memory scope, itervars,
+ # Check the A_shared stage, including memory scope, itervars,
# and compute
findany(r"Stage_1.*A\.shared<br/>Scope: shared.+>0.+>" \
r"ax0\(kDataPar\).+>1.+ax1\(kDataPar\).+>2.+>ax2\(kDataPar\).+>" \
if __name__ == "__main__":
test_dfg()
test_itervar_relationship_graph()
- test_schedule_tree()
\ No newline at end of file
+ test_schedule_tree()
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm import rpc
from tvm.contrib import util, tflite_runtime
root = tf.Module()
root.const = tf.constant([1., 2.], tf.float32)
root.f = tf.function(lambda x: root.const * x)
-
+
input_signature = tf.TensorSpec(shape=[2, ], dtype=tf.float32)
concrete_func = root.f.get_concrete_function(input_signature)
converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
-
+
input_shape = input_details[0]['shape']
tflite_input = np.array(np.random.random_sample(input_shape), dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], tflite_input)
interpreter.invoke()
tflite_output = interpreter.get_tensor(output_details[0]['index'])
-
+
# inference via tvm tflite runtime
with open(tflite_model_path, 'rb') as model_fin:
runtime = tflite_runtime.create(model_fin.read(), tvm.cpu(0))
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
-
+
input_shape = input_details[0]['shape']
tflite_input = np.array(np.random.random_sample(input_shape), dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], tflite_input)
# under the License.
import numpy as np
import tvm
+from tvm import te
from tvm.contrib import graph_runtime
from tvm.relay.testing.config import ctx_list
from tvm import relay
from coremltools.models import datatypes
import tvm
+from tvm import te
from tvm.contrib import graph_runtime
import topi
import topi.testing
"""
import numpy as np
import tvm
+from tvm import te
from tvm.contrib import graph_runtime
from tvm.contrib.download import download_testdata
download_testdata.__test__ = False
# under the License.
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.contrib import graph_runtime
from tvm.relay.testing.config import ctx_list
import operator
import tvm
+from tvm import te
from tvm.contrib import graph_runtime
from tvm.relay.testing.config import ctx_list
from tvm import relay
import mxnet as mx
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import transform
import model_zoo
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm import relay
from tvm.contrib import graph_runtime
import topi
import topi.testing
import tvm
+from tvm import te
from tvm import relay
from tvm.contrib import graph_runtime
from tvm.relay.testing.config import ctx_list
import torch
from torch.nn import Module
import tvm
+from tvm import te
import torchvision
from tvm import relay
def test_vgg11_bn():
torch.set_grad_enabled(False)
verify_model("vgg11_bn")
-
+
#TODO: Need to update schedule in tophub file after PR #4787 updated workloads
def test_mobilenet_v2():
torch.set_grad_enabled(False)
from tensorflow.python.ops import init_ops
from distutils.version import LooseVersion
import tvm
+from tvm import te
from tvm import relay
import tvm.relay.testing.tf as tf_testing
in_data = tf.placeholder(tf.bool, (5, 7, 11), name="in_data")
tf.reduce_any(in_data, name="any")
compare_tf_with_tvm([np_data], ['in_data:0'], 'any:0')
-
+
def test_forward_reduce_max():
def check_max(ishape, axis, keepdims, dtype):
tf.reset_default_graph()
from functools import partial
import numpy as np
import tvm
+from tvm import te
from tvm import relay
import tensorflow as tf
from tensorflow.python.framework import constant_op
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
def lower(s, args, name="mydot"):
arg_list = []
for x in args:
- assert isinstance(x, tvm.tensor.Tensor)
- buf = tvm.decl_buffer(x.shape, dtype=x.dtype, name=x.op.name)
+ assert isinstance(x, te.tensor.Tensor)
+ buf = tvm.tir.decl_buffer(x.shape, dtype=x.dtype, name=x.op.name)
binds[x] = buf
arg_list.append(buf)
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 16)
- stmt = tvm.ir_pass.CanonicalSimplify(stmt)
- stmt = tvm.ir_pass.Simplify(stmt)
- fapi = tvm.ir_pass.MakeAPI(stmt, name, arg_list, 0, True)
- fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, binds, 16)
+ stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
+ fapi = tvm.tir.ir_pass.MakeAPI(stmt, name, arg_list, 0, True)
+ fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
return fapi
def test_dot():
nn = 12
- n = tvm.convert(nn)
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- k = tvm.reduce_axis((0, n), 'k')
- C = tvm.compute((1,), lambda _: tvm.sum(A[k] * B[k], axis=k), name='C')
- s = tvm.create_schedule(C.op)
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ k = te.reduce_axis((0, n), 'k')
+ C = te.compute((1,), lambda _: te.sum(A[k] * B[k], axis=k), name='C')
+ s = te.create_schedule(C.op)
fapi = lower(s, [A, B, C])
def verify(target):
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm.contrib import nvcc
import numpy as np
import time
def test_exp():
# graph
- n = tvm.convert(1024)
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: tvm.exp(A(*i)), name='B')
- s = tvm.create_schedule(B.op)
+ n = tvm.runtime.convert(1024)
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: te.exp(A(*i)), name='B')
+ s = te.create_schedule(B.op)
# create iter var and assign them tags.
num_thread = 8
bx, tx = s[B].split(B.op.axis[0], factor=num_thread)
- s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[B].bind(bx, te.thread_axis("blockIdx.x"))
+ s[B].bind(tx, te.thread_axis("threadIdx.x"))
# one line to build the function.
def check_device(device, host="stackvm"):
def test_fmod():
# graph
def run(dtype):
- n = tvm.size_var('n')
- A = tvm.placeholder((n,), name='A', dtype=dtype)
- B = tvm.placeholder((n,), name='B', dtype=dtype)
- C = tvm.compute(A.shape, lambda *i: tvm.fmod(A(*i), B(*i)), name='C')
- s = tvm.create_schedule(C.op)
+ n = te.size_var('n')
+ A = te.placeholder((n,), name='A', dtype=dtype)
+ B = te.placeholder((n,), name='B', dtype=dtype)
+ C = te.compute(A.shape, lambda *i: te.fmod(A(*i), B(*i)), name='C')
+ s = te.create_schedule(C.op)
# create iter var and assign them tags.
num_thread = 8
bx, tx = s[C].split(C.op.axis[0], factor=num_thread)
return
target = tvm.target.create(device)
if "cpu" not in target.keys:
- s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[C].bind(bx, te.thread_axis("blockIdx.x"))
+ s[C].bind(tx, te.thread_axis("threadIdx.x"))
fmod = tvm.build(s, [A, B, C], device, name="myfmod")
# launch the kernel.
def test_multiple_cache_write():
# graph
- n = tvm.convert(1024)
- A0 = tvm.placeholder((n,), name='A0', dtype = "float32")
- A1 = tvm.placeholder((n,), name='A1', dtype = "float32")
- B0, B1 = tvm.compute((n,),
+ n = tvm.runtime.convert(1024)
+ A0 = te.placeholder((n,), name='A0', dtype = "float32")
+ A1 = te.placeholder((n,), name='A1', dtype = "float32")
+ B0, B1 = te.compute((n,),
lambda *i: (A0(*i) + A1(*i), A0(*i) * A1(*i)),
name='B')
- C = tvm.compute((n,), lambda *i: B0(*i) + B1(*i),
+ C = te.compute((n,), lambda *i: B0(*i) + B1(*i),
name='C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
# create iter var and assign them tags.
num_thread = 8
B0_cache, B1_cache = s.cache_write([B0, B1], "local")
bx, tx = s[C].split(C.op.axis[0], factor=num_thread)
s[B0].compute_at(s[C], bx)
s[B0_cache].compute_at(s[C], bx)
- s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[C].bind(bx, te.thread_axis("blockIdx.x"))
+ s[C].bind(tx, te.thread_axis("threadIdx.x"))
# one line to build the function.
def check_device(device, host="stackvm"):
if not tvm.runtime.enabled(host):
def test_log_pow_llvm():
# graph
- n = tvm.size_var('n')
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: tvm.power(tvm.log(A(*i)), 2.0), name='B')
- s = tvm.create_schedule(B.op)
+ n = te.size_var('n')
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: te.power(te.log(A(*i)), 2.0), name='B')
+ s = te.create_schedule(B.op)
# create iter var and assign them tags.
bx, tx = s[B].split(B.op.axis[0], factor=32)
# one line to build the function.
def test_popcount():
def run(dtype):
# graph
- n = tvm.convert(1024)
- A = tvm.placeholder((n,), name='A', dtype=dtype)
- B = tvm.compute(A.shape, lambda *i: tvm.popcount(A(*i)), name='B')
- s = tvm.create_schedule(B.op)
+ n = tvm.runtime.convert(1024)
+ A = te.placeholder((n,), name='A', dtype=dtype)
+ B = te.compute(A.shape, lambda *i: tvm.tir.popcount(A(*i)), name='B')
+ s = te.create_schedule(B.op)
# simple schedule
num_thread = 8
bx, tx = s[B].split(B.op.axis[0], factor=num_thread)
return
target = tvm.target.create(device)
if "cpu" not in target.keys:
- s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[B].bind(bx, te.thread_axis("blockIdx.x"))
+ s[B].bind(tx, te.thread_axis("threadIdx.x"))
func = tvm.build(s, [A, B], device)
# launch the kernel.
n = 1024
def test_add():
def run(dtype):
# graph
- n = tvm.size_var('n')
- A = tvm.placeholder((n,), name='A', dtype=dtype)
- B = tvm.placeholder((n,), name='B', dtype=dtype)
- bias = tvm.var("bias", dtype=dtype)
- scale = tvm.var("scale", dtype=dtype)
- C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
+ n = te.size_var('n')
+ A = te.placeholder((n,), name='A', dtype=dtype)
+ B = te.placeholder((n,), name='B', dtype=dtype)
+ bias = te.var("bias", dtype=dtype)
+ scale = te.var("scale", dtype=dtype)
+ C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
# schedule
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
# create iter var and assign them tags.
num_thread = 16
bx, x = s[C].split(C.op.axis[0], factor=num_thread*4)
tx, x = s[C].split(x, nparts=num_thread)
_, x = s[C].split(x, factor=4)
- s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[C].bind(bx, te.thread_axis("blockIdx.x"))
+ s[C].bind(tx, te.thread_axis("threadIdx.x"))
s[C].vectorize(x)
# one line to build the function.
def try_warp_memory():
"""skip this in default test because it require higher arch"""
m = 128
- A = tvm.placeholder((m,), name='A')
- B = tvm.compute((m,), lambda i: A[i] + 3, name='B')
+ A = te.placeholder((m,), name='A')
+ B = te.compute((m,), lambda i: A[i] + 3, name='B')
warp_size = 32
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
AA = s.cache_read(A, "warp", [B])
xo, xi = s[B].split(B.op.axis[0], warp_size * 2)
xi0, xi1 = s[B].split(xi, factor=warp_size)
- tx = tvm.thread_axis("threadIdx.x")
+ tx = te.thread_axis("threadIdx.x")
s[B].bind(xi1, tx)
- s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
+ s[B].bind(xo, te.thread_axis("blockIdx.x"))
s[AA].compute_at(s[B], xo)
xo, xi = s[AA].split(s[AA].op.axis[0], warp_size)
s[AA].bind(xi, tx)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
import os
def test_exp():
# graph
- n = tvm.convert(1024)
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: tvm.exp(A(*i)), name='B')
- s = tvm.create_schedule(B.op)
+ n = tvm.runtime.convert(1024)
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: te.exp(A(*i)), name='B')
+ s = te.create_schedule(B.op)
# create iter var and assign them tags.
px, x = s[B].split(B.op.axis[0], nparts=1)
- s[B].bind(px, tvm.thread_axis("pipeline"))
+ s[B].bind(px, te.thread_axis("pipeline"))
# one line to build the function.
def check_device(device, host="llvm"):
def test_multi_kernel():
# graph
- n = tvm.convert(1024)
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
- D = tvm.compute(A.shape, lambda *i: A(*i) + C(*i), name='D')
- s = tvm.create_schedule(D.op)
+ n = tvm.runtime.convert(1024)
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
+ D = te.compute(A.shape, lambda *i: A(*i) + C(*i), name='D')
+ s = te.create_schedule(D.op)
# create iter var and assign them tags.
px, x = s[C].split(C.op.axis[0], nparts=1)
- s[C].bind(px, tvm.thread_axis("pipeline"))
+ s[C].bind(px, te.thread_axis("pipeline"))
px, x = s[D].split(D.op.axis[0], nparts=1)
- s[D].bind(px, tvm.thread_axis("pipeline"))
+ s[D].bind(px, te.thread_axis("pipeline"))
# one line to build the function.
def check_device(device, host="llvm"):
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
import time
def test_gemm():
# graph
nn = 1024
- n = tvm.convert(nn)
+ n = tvm.runtime.convert(nn)
m = n
l = n
- A = tvm.placeholder((n, l), name='A')
- B = tvm.placeholder((m, l), name='B')
- k = tvm.reduce_axis((0, l), name='k')
- C = tvm.compute(
+ A = te.placeholder((n, l), name='A')
+ B = te.placeholder((m, l), name='B')
+ k = te.reduce_axis((0, l), name='k')
+ C = te.compute(
(n, m),
- lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k),
+ lambda ii, jj: te.sum(A[ii, k] * B[jj, k], axis=k),
name='CC')
# schedule
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
xtile, ytile = 32, 32
scale = 8
num_thread = 8
block_factor = scale * num_thread
- block_x = tvm.thread_axis("blockIdx.x")
- thread_x = tvm.thread_axis("threadIdx.x")
- block_y = tvm.thread_axis("blockIdx.y")
- thread_y = tvm.thread_axis("threadIdx.y")
+ block_x = te.thread_axis("blockIdx.x")
+ thread_x = te.thread_axis("threadIdx.x")
+ block_y = te.thread_axis("blockIdx.y")
+ thread_y = te.thread_axis("threadIdx.y")
CC = s.cache_write(C, "local")
AA = s.cache_read(A, "shared", [CC])
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
def test_reduce_prims():
def test_prim(reducer, np_reducer):
# graph
- n = tvm.size_var('n')
- m = tvm.size_var('m')
- A = tvm.placeholder((n, m), name='A')
- R = tvm.compute((n, ), lambda i: tvm.tir.Select((i > 1), 1, 0), name='R')
- k = tvm.reduce_axis((0, m))
- B = tvm.compute((n,), lambda i: reducer(A[i, k], axis=k, where=(R[i]==1)), name='B')
+ n = tvm.te.size_var('n')
+ m = tvm.te.size_var('m')
+ A = te.placeholder((n, m), name='A')
+ R = te.compute((n, ), lambda i: tvm.tir.Select((i > 1), 1, 0), name='R')
+ k = te.reduce_axis((0, m))
+ B = te.compute((n,), lambda i: reducer(A[i, k], axis=k, where=(R[i]==1)), name='B')
# schedule
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
# create iter var and assign them tags.
num_thread = 1
xo, xi = s[B].split(B.op.axis[0], factor=num_thread)
- s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
- s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
+ s[B].bind(xo, te.thread_axis("blockIdx.x"))
+ s[B].bind(xi, te.thread_axis("threadIdx.x"))
s[R].compute_inline()
# one line to build the function.
check_device("vulkan")
check_device("cuda")
check_device("opencl")
- test_prim(tvm.sum, np.sum)
- test_prim(tvm.min, np.amin)
- test_prim(tvm.max, np.amax)
+ test_prim(te.sum, np.sum)
+ test_prim(tvm.te.min, np.amin)
+ test_prim(tvm.te.max, np.amax)
def test_rfactor():
- n = tvm.convert(1027)
- A = tvm.placeholder((n,), name='A')
- k = tvm.reduce_axis((0, n))
- B = tvm.compute((1,), lambda i: tvm.sum(A[k], axis=k), name='B')
+ n = tvm.runtime.convert(1027)
+ A = te.placeholder((n,), name='A')
+ k = te.reduce_axis((0, n))
+ B = te.compute((1,), lambda i: te.sum(A[k], axis=k), name='B')
# schedule
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
kf, ki = s[B].split(k, nparts=4)
BF = s.rfactor(B, kf)
s[BF].parallel(BF.op.axis[0])
check_target()
def test_rfactor_factor_axis():
- n = tvm.convert(1027)
- A = tvm.placeholder((n,), name='A')
- k = tvm.reduce_axis((0, n))
- B = tvm.compute((1,), lambda i: tvm.sum(A[k], axis=k), name='B')
+ n = tvm.runtime.convert(1027)
+ A = te.placeholder((n,), name='A')
+ k = te.reduce_axis((0, n))
+ B = te.compute((1,), lambda i: te.sum(A[k], axis=k), name='B')
# schedule
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
kf, ki = s[B].split(k, nparts=4)
BF = s.rfactor(B, kf, 1)
s[BF].parallel(BF.op.axis[0])
def test_rfactor_threads():
nn = 1027
mm = 10
- n = tvm.convert(nn)
- m = tvm.convert(mm)
- A = tvm.placeholder((m, n), name='A')
- k = tvm.reduce_axis((0, n))
+ n = tvm.runtime.convert(nn)
+ m = tvm.runtime.convert(mm)
+ A = te.placeholder((m, n), name='A')
+ k = te.reduce_axis((0, n))
nthread = 16
- B = tvm.compute((m,), lambda i: tvm.sum(A[i, k], axis=k, where=(i>1)), name='B')
+ B = te.compute((m,), lambda i: te.sum(A[i, k], axis=k, where=(i>1)), name='B')
# schedule
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
ko, kf = s[B].split(k, factor=nthread)
BF = s.rfactor(B, kf)
bx, ty = s[B].split(s[B].op.axis[0], factor=nthread)
- s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[B].bind(ty, tvm.thread_axis("threadIdx.y"))
+ s[B].bind(bx, te.thread_axis("blockIdx.x"))
+ s[B].bind(ty, te.thread_axis("threadIdx.y"))
tx = s[B].op.reduce_axis[0]
- thread_x = tvm.thread_axis("threadIdx.x")
+ thread_x = te.thread_axis("threadIdx.x")
s[B].bind(tx, thread_x)
s[BF].compute_at(s[B], tx)
s[B].set_store_predicate(thread_x.var.equal(0))
def test_rfactor_elemwise_threads():
n = 1025
m = 10
- A = tvm.placeholder((m, n), name='A')
- k = tvm.reduce_axis((0, n))
+ A = te.placeholder((m, n), name='A')
+ k = te.reduce_axis((0, n))
nthread = 16
- B = tvm.compute((m,), lambda i: tvm.sum(A[i, k], axis=k), name='B')
- BB = tvm.compute((m,), lambda i: B[i] + 1, name='BB')
- C = tvm.compute((m,), lambda i: BB[i] + 1, name='C')
+ B = te.compute((m,), lambda i: te.sum(A[i, k], axis=k), name='B')
+ BB = te.compute((m,), lambda i: B[i] + 1, name='BB')
+ C = te.compute((m,), lambda i: BB[i] + 1, name='C')
# schedule
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
s[BB].compute_inline()
bx, ty = s[C].split(s[C].op.axis[0], factor=nthread)
ko, kf = s[B].split(k, factor=nthread)
BF = s.rfactor(B, kf)
s[B].compute_at(s[C], ty)
- s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[C].bind(ty, tvm.thread_axis("threadIdx.y"))
+ s[C].bind(bx, te.thread_axis("blockIdx.x"))
+ s[C].bind(ty, te.thread_axis("threadIdx.y"))
tx = s[B].op.reduce_axis[0]
- thread_x = tvm.thread_axis("threadIdx.x")
+ thread_x = te.thread_axis("threadIdx.x")
s[B].bind(tx, thread_x)
s[BF].compute_at(s[B], tx)
# Since thread_x is shared across reductions
return lhs, rhs
def fidentity(t0, t1):
- return tvm.const(-1, t0), tvm.min_value(t1)
+ return tvm.tir.const(-1, t0), tvm.te.min_value(t1)
- argmax = tvm.comm_reducer(fcombine,
+ argmax = te.comm_reducer(fcombine,
fidentity,
name='argmax')
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- idx = tvm.placeholder((m, n), name='idx', dtype='int32')
- val = tvm.placeholder((m, n), name='val', dtype='float32')
- k = tvm.reduce_axis((0, n), 'k')
- T0, T1 = tvm.compute((m,), lambda i: argmax((idx[i,k], val[i,k]), axis=k), name='T')
- s = tvm.create_schedule(T0.op)
+ m = te.size_var('m')
+ n = te.size_var('n')
+ idx = te.placeholder((m, n), name='idx', dtype='int32')
+ val = te.placeholder((m, n), name='val', dtype='float32')
+ k = te.reduce_axis((0, n), 'k')
+ T0, T1 = te.compute((m,), lambda i: argmax((idx[i,k], val[i,k]), axis=k), name='T')
+ s = te.create_schedule(T0.op)
def check_target():
device = 'cpu'
return lhs, rhs
def fidentity(t0, t1):
- return tvm.const(-1, t0), tvm.min_value(t1)
+ return tvm.tir.const(-1, t0), tvm.te.min_value(t1)
- argmax = tvm.comm_reducer(fcombine,
+ argmax = te.comm_reducer(fcombine,
fidentity,
name='argmax')
nn = 1027
mm = 10
- n = tvm.convert(nn)
- m = tvm.convert(mm)
- A0 = tvm.placeholder((m, n), name='A0', dtype='int32')
- A1 = tvm.placeholder((m, n), name='A1', dtype='float32')
- k = tvm.reduce_axis((0, n))
- B0, B1 = tvm.compute((m,), lambda i: argmax((A0[i, k], A1[i, k]), axis=k), name='B')
+ n = tvm.runtime.convert(nn)
+ m = tvm.runtime.convert(mm)
+ A0 = te.placeholder((m, n), name='A0', dtype='int32')
+ A1 = te.placeholder((m, n), name='A1', dtype='float32')
+ k = te.reduce_axis((0, n))
+ B0, B1 = te.compute((m,), lambda i: argmax((A0[i, k], A1[i, k]), axis=k), name='B')
# schedule
- s = tvm.create_schedule(B0.op)
+ s = te.create_schedule(B0.op)
nthread = 16
ko, kf = s[B0].split(k, factor=nthread)
BF0, BF1 = s.rfactor(B0, kf)
bx, ty = s[B0].split(s[B0].op.axis[0], factor=nthread)
- s[B0].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[B0].bind(ty, tvm.thread_axis("threadIdx.y"))
+ s[B0].bind(bx, te.thread_axis("blockIdx.x"))
+ s[B0].bind(ty, te.thread_axis("threadIdx.y"))
tx = s[B0].op.reduce_axis[0]
- thread_x = tvm.thread_axis("threadIdx.x")
+ thread_x = te.thread_axis("threadIdx.x")
s[B0].bind(tx, thread_x)
s[BF0.op].compute_at(s[B0], tx)
s[B0].set_store_predicate(thread_x.var.equal(0))
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
def test_scan():
- m = tvm.size_var("m")
- n = tvm.size_var("n")
- X = tvm.placeholder((m, n), name="X")
- s_state = tvm.placeholder((m, n))
- s_init = tvm.compute((1, n), lambda _, i: X[0, i])
- s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i])
- scan = tvm.scan(s_init, s_update, s_state)
+ m = te.size_var("m")
+ n = te.size_var("n")
+ X = te.placeholder((m, n), name="X")
+ s_state = te.placeholder((m, n))
+ s_init = te.compute((1, n), lambda _, i: X[0, i])
+ s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i])
+ scan = tvm.te.scan(s_init, s_update, s_state)
# test scan + compute case
- res = tvm.compute((m, n), lambda i, j: scan[i, j])
+ res = te.compute((m, n), lambda i, j: scan[i, j])
# schedule
- s = tvm.create_schedule(res.op)
+ s = te.create_schedule(res.op)
num_thread = 256
- block_x = tvm.thread_axis(None, "blockIdx.x")
- thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x")
+ block_x = te.thread_axis(None, "blockIdx.x")
+ thread_x = te.thread_axis((0, num_thread), "threadIdx.x")
xo, xi = s[s_init].split(s_init.op.axis[1], factor=num_thread)
s[s_init].bind(xo, block_x)
s[s_init].bind(xi, thread_x)
import time
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.tuner import RandomTuner
"""An example template for testing"""
assert N == 1, "Only consider batch_size = 1 in this template"
- data = tvm.placeholder((N, CI, H, W), name='data')
- kernel = tvm.placeholder((CO, CI, KH, KW), name='kernel')
+ data = te.placeholder((N, CI, H, W), name='data')
+ kernel = te.placeholder((CO, CI, KH, KW), name='kernel')
- rc = tvm.reduce_axis((0, CI), name='rc')
- ry = tvm.reduce_axis((0, KH), name='ry')
- rx = tvm.reduce_axis((0, KW), name='rx')
+ rc = te.reduce_axis((0, CI), name='rc')
+ ry = te.reduce_axis((0, KH), name='ry')
+ rx = te.reduce_axis((0, KW), name='rx')
- conv = tvm.compute(
+ conv = te.compute(
(N, CO, H - KH + 1, W - KW + 1),
- lambda nn, ff, yy, xx: tvm.sum(
+ lambda nn, ff, yy, xx: te.sum(
data[nn, rc, yy + ry, xx + rx] * kernel[ff, rc, ry, rx],
axis=[rc, ry, rx]), tag="conv2d_nchw")
- s = tvm.create_schedule([conv.op])
+ s = te.create_schedule([conv.op])
output = conv
OL = s.cache_write(conv, 'local')
bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x)
kernel_scope = n # this is the scope to attach global config inside this kernel
- s[output].bind(bf, tvm.thread_axis("blockIdx.z"))
- s[output].bind(by, tvm.thread_axis("blockIdx.y"))
- s[output].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[output].bind(vf, tvm.thread_axis("vthread"))
- s[output].bind(vy, tvm.thread_axis("vthread"))
- s[output].bind(vx, tvm.thread_axis("vthread"))
- s[output].bind(tf, tvm.thread_axis("threadIdx.z"))
- s[output].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[output].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(bf, te.thread_axis("blockIdx.z"))
+ s[output].bind(by, te.thread_axis("blockIdx.y"))
+ s[output].bind(bx, te.thread_axis("blockIdx.x"))
+ s[output].bind(vf, te.thread_axis("vthread"))
+ s[output].bind(vy, te.thread_axis("vthread"))
+ s[output].bind(vx, te.thread_axis("vthread"))
+ s[output].bind(tf, te.thread_axis("threadIdx.z"))
+ s[output].bind(ty, te.thread_axis("threadIdx.y"))
+ s[output].bind(tx, te.thread_axis("threadIdx.x"))
s[output].reorder(n, bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi)
s[OL].compute_at(s[output], tx)
tz, fused = s[load].split(fused, nparts=cfg["tile_f"].size[2])
ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2])
tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2])
- s[load].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[load].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.z"))
+ s[load].bind(ty, te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
# tune unroll
cfg.define_knob("auto_unroll_max_step", [0, 512, 1500])
# under the License.
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import FallbackConfigEntity
from tvm.contrib import nnpack
in_height = in_width = in_size
- A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
- W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W')
- bias = tvm.placeholder((num_filter, 1, 1), name='bias')
+ A = te.placeholder((batch, in_channel, in_height, in_width), name='A')
+ W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W')
+ bias = te.placeholder((num_filter, 1, 1), name='bias')
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
# under the License.
from collections import namedtuple
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import quantize as qtz
import mxnet as mx
import numpy as np
import tvm
+from tvm import te
from tvm.contrib import graph_runtime
from tvm import relay
from tvm.runtime import container
prof_res = np.array(ftimer("main", data).results) * 1000
print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" %
(np.mean(prof_res), np.std(prof_res)))
-
+
return result.asnumpy().astype(dtype)
# random input
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.backend.interpreter import ConstructorValue
from tvm.relay import create_executor
import pytest
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.loops import while_loop
from tvm.relay.testing import run_infer_type as infer_type
# under the License.
import numpy as np
import tvm
+from tvm import te
import tvm.testing
from tvm import relay
from tvm import autotvm
return strategy
def _create_record(task_name, dshape, wshape, target, cost):
- args = [tvm.placeholder(dshape), tvm.placeholder(wshape), (1, 1), (1, 1, 1, 1),
+ args = [te.placeholder(dshape), te.placeholder(wshape), (1, 1), (1, 1, 1, 1),
(1, 1), 'float32']
task = autotvm.task.create(task_name, args, target)
cfg = autotvm.ConfigEntity(0, None, {}, [])
return relay.backend.compile_engine.get_valid_implementations(
relay.op.get("nn.conv2d"),
out.attrs,
- [tvm.placeholder(dshape), tvm.placeholder(wshape)],
+ [te.placeholder(dshape), te.placeholder(wshape)],
out.checked_type,
target)
return relay.backend.compile_engine.select_implementation(
relay.op.get("nn.conv2d"),
out.attrs,
- [tvm.placeholder(dshape), tvm.placeholder(wshape)],
+ [te.placeholder(dshape), te.placeholder(wshape)],
out.checked_type,
target,
use_autotvm)
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.contrib import graph_runtime
from tvm.relay.scope_builder import ScopeBuilder
# under the License.
import numpy as np
import tvm
+from tvm import te
import tvm.testing
from tvm import nd
from tvm import relay
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.testing import resnet
from tvm.relay import transform
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.contrib.nvcc import have_fp16
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
def check_type_err(expr, msg):
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import ExprFunctor, ExprMutator, ExprVisitor
import numpy as np
import tvm
+from tvm import te
import tvm.relay.testing
import tvm.relay.transform
from tvm import relay
import numpy as np
import tvm
+from tvm import te
import tvm.runtime._ffi_api
from tvm import relay
from tvm.contrib import util
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.analysis import detect_feature
from tvm.relay.transform import gradient
# under the License.
""" test bind function."""
import tvm
+from tvm import te
from tvm import relay
# under the License.
"""Tests for module functionality."""
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.prelude import Prelude
from tvm.relay.testing import add_nat_definitions
""" test ir"""
import pytest
import tvm
+from tvm import te
from tvm import relay
from tvm.tir.expr import *
from tvm.relay import op
# Types
def test_tensor_type():
- shape = tvm.convert([1, 2, 3])
+ shape = tvm.runtime.convert([1, 2, 3])
dtype = 'float32'
tt = relay.TensorType(shape, dtype)
assert tt.dtype == dtype
def test_func_type():
- type_params = tvm.convert([])
- type_constraints = tvm.convert([]) # TODO: fill me in
- arg_types = tvm.convert([])
+ type_params = tvm.runtime.convert([])
+ type_constraints = tvm.runtime.convert([]) # TODO: fill me in
+ arg_types = tvm.runtime.convert([])
ret_type = relay.TensorType((1, 2, 3), 'float32')
tf = relay.FuncType(arg_types, ret_type, type_params, type_constraints)
assert tf.type_params == type_params
def test_tuple_type():
tp = relay.TypeVar('tp', relay.TypeKind.Type)
- tf = relay.FuncType(tvm.convert([]), None, tvm.convert([]), tvm.convert([]))
- tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32')
- fields = tvm.convert([tp, tf, tt])
+ tf = relay.FuncType(tvm.runtime.convert([]), None, tvm.runtime.convert([]), tvm.runtime.convert([]))
+ tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32')
+ fields = tvm.runtime.convert([tp, tf, tt])
tup_ty = relay.TupleType(fields)
assert tup_ty.fields == fields
def test_type_relation():
tp = relay.TypeVar('tp', relay.TypeKind.Type)
- tf = relay.FuncType(tvm.convert([]), None, tvm.convert([]), tvm.convert([]))
- tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32')
- args = tvm.convert([tp, tf, tt])
+ tf = relay.FuncType(tvm.runtime.convert([]), None, tvm.runtime.convert([]), tvm.runtime.convert([]))
+ tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32')
+ args = tvm.runtime.convert([tp, tf, tt])
num_inputs = 2
func = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Broadcast")
def test_tuple():
- fields = tvm.convert([])
+ fields = tvm.runtime.convert([])
tup = relay.Tuple(fields)
assert tup.fields == fields
assert tup.span == None
def test_function():
param_names = ['a', 'b', 'c', 'd']
- params = tvm.convert([relay.Var(n) for n in param_names])
- ret_type = relay.TupleType(tvm.convert([]))
- body = relay.Tuple(tvm.convert([]))
- type_params = tvm.convert([])
+ params = tvm.runtime.convert([relay.Var(n) for n in param_names])
+ ret_type = relay.TupleType(tvm.runtime.convert([]))
+ body = relay.Tuple(tvm.runtime.convert([]))
+ type_params = tvm.runtime.convert([])
fn = relay.Function(params, body, ret_type, type_params)
fn = fn.set_attribute("test_attribute", tvm.tir.StringImm("value"))
assert fn.params == params
@pytest.mark.skip(reason="AttrsEqualHandler doesn't handle Map so far.")
def test_function_attrs():
param_names = ['a', 'b', 'c', 'd']
- params = tvm.convert([relay.var(n, shape=(5, 2)) for n in param_names])
- ret_type = relay.TupleType(tvm.convert([]))
- body = relay.Tuple(tvm.convert([]))
- type_params = tvm.convert([])
+ params = tvm.runtime.convert([relay.var(n, shape=(5, 2)) for n in param_names])
+ ret_type = relay.TupleType(tvm.runtime.convert([]))
+ body = relay.Tuple(tvm.runtime.convert([]))
+ type_params = tvm.runtime.convert([])
fn = relay.Function(params, body, ret_type, type_params)
model_params = {}
for param in params[:1]:
def test_call():
op = relay.Var('f')
arg_names = ['a', 'b', 'c', 'd']
- args = tvm.convert([relay.Var(n) for n in arg_names])
+ args = tvm.runtime.convert([relay.Var(n) for n in arg_names])
call = relay.Call(op, args, None, None)
assert call.op == op
assert call.args == args
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.analysis import graph_equal, assert_graph_equal
from tvm.relay.analysis import alpha_equal, assert_alpha_equal
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
import tvm.relay.testing
import numpy as np
def test_meta_data():
- n, c, h, w = tvm.size_var("n"), 10, 224, 224
+ n, c, h, w = te.size_var("n"), 10, 224, 224
x = relay.var("x", shape=(n, c, h, w))
w = relay.var("w")
z = relay.nn.conv2d(x, w,
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.analysis import well_formed
from tvm.relay.prelude import Prelude
# under the License.
import tvm
+from tvm import te
from tvm import relay
import json
# specific language governing permissions and limitations
# under the License
import tvm
+from tvm import te
import numpy as np
from tvm import relay
from tvm.relay import memory_alloc
import pytest
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.testing import check_grad, ctx_list, run_infer_type
from tvm.relay.transform import gradient
import topi
import topi.testing
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.testing import check_grad, ctx_list, run_infer_type
from tvm.relay.transform import gradient
data = np.random.rand(*x_shape).astype("float32")
y_shape = topi.util.get_const_tuple(fwd_func.ret_type.shape)
out_grad = np.ones(shape=y_shape)
- ref_grad = topi.testing.pool_grad_nchw(data, out_grad, pool_size=(x_shape[2], x_shape[3]),
- strides=(1, 1), padding=[0, 0, 0, 0], pool_type='avg',
+ ref_grad = topi.testing.pool_grad_nchw(data, out_grad, pool_size=(x_shape[2], x_shape[3]),
+ strides=(1, 1), padding=[0, 0, 0, 0], pool_type='avg',
ceil_mode=False)
for target, ctx in ctx_list():
import pytest
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.testing import check_grad, ctx_list, run_infer_type
from tvm.relay.transform import gradient
import numpy as np
import pytest
import tvm
+from tvm import te
import scipy
from tvm import relay
from tvm.relay import transform
def check_binary_op(opfunc, ref, dtype):
# TODO(@jroesch): this piece of code improperly uses type variables.
- n = tvm.var("n")
+ n = te.var("n")
s1 = (5, n, 5)
s2 = (n, 1)
t1 = relay.TensorType(s1)
def test_expand_dims_infer_type():
for dtype in ['float16', 'float32']:
- n, t, d = tvm.size_var("n"), tvm.size_var("t"), 100
+ n, t, d = te.size_var("n"), te.size_var("t"), 100
x = relay.var("x", shape=(n, t, d), dtype=dtype)
y = relay.expand_dims(x, axis=2)
assert "axis=2" in y.astext()
def test_concatenate():
for dtype in ['float16', 'float32']:
- n, t, d = tvm.size_var("n"), tvm.size_var("t"), 100
+ n, t, d = te.size_var("n"), te.size_var("t"), 100
x = relay.var("x", shape=(n, t, d))
y = relay.var("y", shape=(n, t, d))
z = relay.concatenate((x, y), axis=-1)
assert "axis=" in z.astext()
zz = run_infer_type(z)
assert zz.checked_type == relay.TensorType((n, t, 200))
-
+
x = relay.exp(x)
z = relay.concatenate((x, y), axis=2)
zz = run_infer_type(z)
assert zz.checked_type == relay.TensorType((n, t, 200))
-
+
z = relay.concatenate((x, y), axis=1)
zz = run_infer_type(z)
assert zz.checked_type == relay.TensorType((n, t + t, 100))
-
+
# check shape mismatches (the following case is expected to raise tvm._ffi.base.TVMError.
try:
x = relay.var('p1', shape=(2, 5))
pass
else:
assert False
-
+
x = relay.var("x", shape=(10, 5), dtype=dtype)
y = relay.var("y", shape=(10, 5), dtype=dtype)
t = relay.var("z", shape=(), dtype=dtype)
y_data = np.random.rand(10, 5).astype(dtype)
t_data = np.random.uniform(size=()).astype(dtype)
ref_res = np.concatenate((x_data, y_data), axis=1) + t_data
-
+
for target, ctx in ctx_list():
if dtype == 'float16' and target == 'cuda' and not have_fp16(tvm.gpu(0).compute_version):
continue
def test_dropout():
for dtype in ['float16', 'float32']:
- n, t, d = tvm.size_var("n"), tvm.size_var("t"), tvm.size_var("d")
+ n, t, d = te.size_var("n"), te.size_var("t"), te.size_var("d")
input_ty = relay.TensorType((n, t, d), dtype)
x = relay.var("x", input_ty)
y = relay.nn.dropout(x, rate=0.75)
center=False, scale=False)
yy = run_infer_type(y.astuple())
assert "center=" in yy.astext()
- assert yy.checked_type == relay.ty.TupleType(tvm.convert([
+ assert yy.checked_type == relay.ty.TupleType(tvm.runtime.convert([
relay.TensorType((3, 2, 1), dtype),
relay.TensorType((2,), dtype),
relay.TensorType((2,), dtype)
y = relay.nn.batch_norm(data, gamma, beta, moving_mean, moving_var,
axis=0, center=False, scale=False)
yy = run_infer_type(y.astuple())
- assert yy.checked_type == relay.ty.TupleType(tvm.convert([
+ assert yy.checked_type == relay.ty.TupleType(tvm.runtime.convert([
relay.ty.TensorType((3, 2, 1), dtype),
relay.ty.TensorType((3,), dtype),
relay.ty.TensorType((3,), dtype)
y = relay.nn.batch_norm(data, gamma, beta, moving_mean, moving_var,
axis=-1, center=False, scale=False)
yy = run_infer_type(y.astuple())
- assert yy.checked_type == relay.ty.TupleType(tvm.convert([
+ assert yy.checked_type == relay.ty.TupleType(tvm.runtime.convert([
relay.ty.TensorType((1, 2, 3), dtype),
relay.ty.TensorType((3,), dtype),
relay.ty.TensorType((3,), dtype)
# Dense accuracy for float16 is poor
if dtype == 'float16':
return
- n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w")
+ n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
x = relay.var("x", relay.TensorType((n, c, h, w), dtype))
w = relay.var("w", relay.TensorType((2, w), dtype))
y = relay.nn.dense(x, w, units=2)
yy = run_infer_type(y)
assert yy.checked_type == relay.TensorType((n, c, h, 2), dtype)
- n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), 2
+ n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), 2
x = relay.var("x", relay.TensorType((n, c, h, w), dtype))
- wh, ww = tvm.size_var("wh"), tvm.size_var("ww")
+ wh, ww = te.size_var("wh"), te.size_var("ww")
w = relay.var("w", relay.TensorType((ww, wh), dtype))
y = relay.nn.dense(x, w)
yy = run_infer_type(y)
assert yy.checked_type == relay.TensorType((n, c, h, ww), dtype)
- n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), 2
+ n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), 2
x = relay.var("x", relay.TensorType((n, c, h, w), dtype))
w = relay.var("w", relay.IncompleteType())
y = relay.nn.dense(x, w, units=2)
data_dtype = 'uint8'
weight_dtype = 'int8'
out_dtype = 'uint8'
- n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w")
+ n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
x = relay.var("x", relay.TensorType((n, c, h, w), data_dtype))
w = relay.var("w", relay.TensorType((2, w), weight_dtype))
y = relay.nn.dense(x, w, units=2, out_dtype=out_dtype)
def test_bitserial_dense():
- m, k = tvm.size_var("m"), tvm.size_var("k")
+ m, k = te.size_var("m"), te.size_var("k")
x = relay.var("x", relay.TensorType((m, k), "int16"))
w = relay.var("w", relay.TensorType((k, 32), "int16"))
y = relay.nn.bitserial_dense(x, w, units=32)
"""
import numpy as np
import tvm
+from tvm import te
import topi.testing
from tvm import relay
from tvm.relay import transform
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
def test_slice_like():
- d1, d2, d3, d4 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3"), tvm.var("d4")
+ d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4")
verify_slice_like(data=(d1, d2, d3), slice_like=(1, 2, 3), axes=None, output=(1, 2, 3))
verify_slice_like(data=(1, 2, 3), slice_like=(d1, d2, d3), axes=None, output=(d1, d2, d3))
verify_slice_like(data=(d2, d3, d4), slice_like=(d1, d2, d3), axes=(1,2), output=(d2, d2, d3))
tvm.testing.assert_allclose(z.asnumpy(), z_np, rtol=1e-5)
def test_batch_matmul():
- b, m, n, k = tvm.size_var("b"), tvm.size_var("m"), tvm.size_var("n"), tvm.size_var("k")
+ b, m, n, k = te.size_var("b"), te.size_var("m"), te.size_var("n"), te.size_var("k")
x = relay.var("x", relay.TensorType((b, m, k), "float32"))
y = relay.var("y", relay.TensorType((b, n, k), "float32"))
z = relay.nn.batch_matmul(x, y)
"""
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from tvm import relay
from tvm.relay import transform
def test_conv1d_infer_type():
# symbolic in batch dimension
- n, c, w = tvm.var("n"), 10, 224
+ n, c, w = te.var("n"), 10, 224
x = relay.var("x", relay.ty.TensorType((n, c, w), "float32"))
w = relay.var("w")
y = relay.nn.conv1d(x, w,
(2, 10, 3), "float32")
# infer by shape of w, mixed precision
- n, c, w = tvm.var("n"), 10, 224
+ n, c, w = te.var("n"), 10, 224
x = relay.var("x", relay.TensorType((n, c, w), "int8"))
w = relay.var("w", relay.TensorType((2, 10, 3), "int8"))
y = relay.nn.conv1d(x, w, out_dtype="int32")
(n, 2, 222), "int32")
# infer shape in case of different dtypes for input and weight.
- n, c, w = tvm.var("n"), 10, 224
+ n, c, w = te.var("n"), 10, 224
x = relay.var("x", relay.TensorType((n, c, w), "uint8"))
w = relay.var("w", relay.TensorType((2, 10, 3), "int8"))
y = relay.nn.conv1d(x, w, out_dtype="int32")
def test_conv2d_infer_type():
# symbolic in batch dimension
- n, c, h, w = tvm.size_var("n"), 10, 224, 224
+ n, c, h, w = te.size_var("n"), 10, 224, 224
x = relay.var("x", relay.ty.TensorType((n, c, h, w), "float32"))
w = relay.var("w")
y = relay.nn.conv2d(x, w,
(2, 10, 3, 3), "float32")
# infer by shape of w, mixed precision
- n, c, h, w = tvm.size_var("n"), 10, 224, 224
+ n, c, h, w = te.size_var("n"), 10, 224, 224
x = relay.var("x", relay.TensorType((n, c, h, w), "int8"))
w = relay.var("w", relay.TensorType((2, 10, 3, 3), "int8"))
y = relay.nn.conv2d(x, w, out_dtype="int32")
(n, 2, 222, 222), "int32")
# infer shape in case of different dtypes for input and weight.
- n, c, h, w = tvm.size_var("n"), 10, 224, 224
+ n, c, h, w = te.size_var("n"), 10, 224, 224
x = relay.var("x", relay.TensorType((n, c, h, w), "uint8"))
w = relay.var("w", relay.TensorType((2, 10, 3, 3), "int8"))
y = relay.nn.conv2d(x, w, out_dtype="int32")
def test_conv3d_infer_type():
# symbolic in batch dimension
- n, c, d, h, w = tvm.size_var("n"), 10, 224, 224, 224
+ n, c, d, h, w = te.size_var("n"), 10, 224, 224, 224
x = relay.var("x", relay.ty.TensorType((n, c, d, h, w), "float32"))
w = relay.var("w")
y = relay.nn.conv3d(x, w,
(2, 10, 3, 3, 3), "float32")
# infer by shape of w, mixed precision
- n, c, d, h, w = tvm.size_var("n"), 10, 224, 224, 224
+ n, c, d, h, w = te.size_var("n"), 10, 224, 224, 224
x = relay.var("x", relay.TensorType((n, c, d, h, w), "int8"))
w = relay.var("w", relay.TensorType((2, 10, 3, 3, 3), "int8"))
y = relay.nn.conv3d(x, w, out_dtype="int32")
(n, 2, 222, 222, 222), "int32")
# infer shape in case of different dtypes for input and weight.
- n, c, d, h, w = tvm.size_var("n"), 10, 224, 224, 224
+ n, c, d, h, w = te.size_var("n"), 10, 224, 224, 224
x = relay.var("x", relay.TensorType((n, c, d, h, w), "uint8"))
w = relay.var("w", relay.TensorType((2, 10, 3, 3, 3), "int8"))
y = relay.nn.conv3d(x, w, out_dtype="int32")
def test_conv2d_transpose_infer_type():
# symbolic in batch dimension
- n, c, h, w = tvm.size_var("n"), 10, 10, 12
+ n, c, h, w = te.size_var("n"), 10, 10, 12
x = relay.var("x", relay.TensorType((n, c, h, w), "float32"))
w = relay.var("w", relay.IncompleteType())
y = relay.nn.conv2d_transpose(x, w,
(10, 15, 3, 3), "float32")
# infer by shape of w, mixed precision
- n, h, w, c = tvm.size_var("n"), 10, 10, 12
+ n, h, w, c = te.size_var("n"), 10, 10, 12
x = relay.var("x", relay.TensorType((n, h, w, c), "float32"))
w = relay.var("w", relay.TensorType((12, 11, 5, 5), "float32"))
y = relay.nn.conv2d_transpose(x, w,
def test_upsampling_infer_type():
- n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w")
- scale = tvm.const(2.0, "float64")
+ n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
+ scale = tvm.tir.const(2.0, "float64")
x = relay.var("x", relay.TensorType((n, c, h, w), "float32"))
y = relay.nn.upsampling(x, scale_h=2, scale_w=2, layout="NCHW", method="bilinear")
"method=\"BINLINEAR\"" in y.astext()
yy = run_infer_type(y)
- assert yy.checked_type == relay.TensorType((n, c, tvm.tir.Cast("int32", tvm.round(h*scale)),
- tvm.tir.Cast("int32", tvm.round(w*scale))),
+ assert yy.checked_type == relay.TensorType((n, c, tvm.tir.Cast("int32", te.round(h*scale)),
+ tvm.tir.Cast("int32", te.round(w*scale))),
"float32")
- n, c = tvm.size_var("n"), tvm.size_var("c")
+ n, c = te.size_var("n"), te.size_var("c")
x = relay.var("x", relay.TensorType((n, c, 100, 200), "float32"))
y = relay.nn.upsampling(x, scale_h=2, scale_w=2, layout="NCHW", method="bilinear")
yy = run_infer_type(y)
assert yy.checked_type == relay.TensorType((n, c, 200, 400), "float32")
def test_upsampling3d_infer_type():
- n, c, d, h, w = tvm.size_var("n"), tvm.size_var("c"),\
- tvm.size_var("d"), tvm.size_var("h"), tvm.size_var("w")
- scale = tvm.const(2.0, "float64")
+ n, c, d, h, w = te.size_var("n"), te.size_var("c"),\
+ te.size_var("d"), te.size_var("h"), te.size_var("w")
+ scale = tvm.tir.const(2.0, "float64")
x = relay.var("x", relay.TensorType((n, c, d, h, w), "float32"))
y = relay.nn.upsampling3d(x, scale_d=2, scale_h=2, scale_w=2, layout="NCDHW", method="trilinear")
yy = run_infer_type(y)
- assert yy.checked_type == relay.TensorType((n, c, tvm.tir.Cast("int32", tvm.round(d*scale)),
- tvm.tir.Cast("int32", tvm.round(h*scale)),
- tvm.tir.Cast("int32", tvm.round(w*scale))),
+ assert yy.checked_type == relay.TensorType((n, c, tvm.tir.Cast("int32", te.round(d*scale)),
+ tvm.tir.Cast("int32", te.round(h*scale)),
+ tvm.tir.Cast("int32", te.round(w*scale))),
"float32")
- n, c = tvm.size_var("n"), tvm.size_var("c")
+ n, c = te.size_var("n"), te.size_var("c")
x = relay.var("x", relay.TensorType((n, c, 100, 100, 200), "float32"))
y = relay.nn.upsampling3d(x, scale_d=2, scale_h=2, scale_w=2, layout="NCDHW", method="trilinear")
yy = run_infer_type(y)
assert yy.checked_type == relay.TensorType((n, c, 200, 200, 400), "float32")
def _test_pool2d(opfunc, reffunc):
- n, c, h, w = tvm.size_var("n"), 10, 224, 224
+ n, c, h, w = te.size_var("n"), 10, 224, 224
x = relay.var("x", relay.TensorType((n, c, h, w), "float32"))
y = opfunc(x, pool_size=(1, 1))
assert "pool_size=" in y.astext()
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
def _test_pool2d_int(opfunc, reffunc, dtype):
- n, c, h, w = tvm.size_var("n"), 10, 224, 224
+ n, c, h, w = te.size_var("n"), 10, 224, 224
x = relay.var("x", relay.TensorType((n, c, h, w), dtype))
y = opfunc(x, pool_size=(1, 1))
assert "pool_size=" in y.astext()
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
def _test_global_pool2d(opfunc, reffunc):
- n, c, h, w = tvm.size_var("n"), tvm.size_var("c"), 224, 224
+ n, c, h, w = te.size_var("n"), te.size_var("c"), 224, 224
x = relay.var("x", relay.TensorType((n, h, w, c), "float32"))
y = opfunc(x, layout="NHWC")
yy = run_infer_type(y)
assert yy.checked_type == relay.TensorType((n, 1, 1, c), "float32")
- n, c, h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w")
+ n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
x = relay.var("x", relay.TensorType((n, c, h, w), "float32"))
y = opfunc(x)
yy = run_infer_type(y)
def test_pool1d():
def _test_pool1d(opfunc):
- n, c, w = tvm.var("n"), 10, 224
+ n, c, w = te.var("n"), 10, 224
x = relay.var("x", relay.TensorType((n, c, w), "float32"))
y = opfunc(x, pool_size=(1,))
assert "pool_size=" in y.astext()
def test_pool3d():
def _test_pool3d(opfunc, padding=(0, 0, 0, 0, 0, 0), out_shape=(1, 3, 16, 16, 16)):
- n, c, d, h, w = tvm.size_var("n"), 10, 5, 224, 224
+ n, c, d, h, w = te.size_var("n"), 10, 5, 224, 224
x = relay.var("x", relay.TensorType((n, c, d, h, w), "float32"))
y = opfunc(x, pool_size=(1, 1, 1))
assert "pool_size=" in y.astext()
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
def test_flatten_infer_type():
- d1, d2, d3, d4 = tvm.size_var("d1"), tvm.size_var("d2"), tvm.size_var("d3"), tvm.size_var("d4")
+ d1, d2, d3, d4 = te.size_var("d1"), te.size_var("d2"), te.size_var("d3"), te.size_var("d4")
x = relay.var("x", relay.TensorType((d1, d2, d3, d4), "float32"))
y = relay.nn.batch_flatten(x)
yy = run_infer_type(y)
assert yy.checked_type == relay.TensorType((3, 6, 9, 12), "float32")
# some symbolic values
- n, c, h, w = tvm.size_var("n"), 2, 3, tvm.size_var("w")
+ n, c, h, w = te.size_var("n"), 2, 3, te.size_var("w")
t = relay.var("t", relay.TensorType((n, c, h, w), "float32"))
y = relay.nn.pad(t, ((1, 1), (2, 2), (3, 3), (4, 4)))
yy = run_infer_type(y)
_test_run('int32')
def test_lrn():
- n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w")
+ n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
x = relay.var("x", shape=(n, c , h, w))
y = relay.nn.lrn(x, size=10, axis=2, bias=0.5, alpha=.00001, beta=0.75)
"alpha=" in y.astext()
tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5)
def test_l2_normalize():
- n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w")
+ n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
x = relay.var("x", shape=(n, c , h, w))
y = relay.nn.l2_normalize(x, eps=0.001, axis=[1])
"axis=" in y.astext()
def _test_upsampling(layout, method, align_corners=False):
- n, c, h, w = tvm.size_var("n"), 16, 32, 32
+ n, c, h, w = te.size_var("n"), 16, 32, 32
scale_h = 2.0
scale_w = 2.0
dtype = "float32"
_test_upsampling("NHWC", "bilinear", True)
def _test_upsampling3d(layout, method, coordinate_transformation_mode="half_pixel"):
- n, c, d, h, w = tvm.size_var("n"), 8, 16, 16, 16
+ n, c, d, h, w = te.size_var("n"), 8, 16, 16, 16
scale_d = 2.0
scale_h = 2.0
scale_w = 2.0
def test_bitserial_conv2d_infer_type():
# Basic shape test with ambiguous batch.
- n, c, h, w = tvm.size_var("n"), 32, 224, 224
+ n, c, h, w = te.size_var("n"), 32, 224, 224
x = relay.var("x", relay.ty.TensorType((n, c, h, w), "int16"))
w = relay.var("w", relay.ty.TensorType((32, 32, 3, 3), "int16"))
y = relay.nn.bitserial_conv2d(
import numpy as np
import pytest
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import create_executor, transform
from tvm.relay.testing import ctx_list, check_grad, run_infer_type
def test_transpose_infer_type():
- n, t, d = tvm.size_var("n"), tvm.size_var("t"), 100
+ n, t, d = te.size_var("n"), te.size_var("t"), 100
x = relay.var("x", relay.TensorType((n, t, d), "float32"))
y = relay.transpose(x, axes=(1, 0, 2))
assert "axes=" in y.astext()
assert zz.checked_type == relay.TensorType((1, 6), "float32")
# symbolic shape
- n, c, h, w = tvm.size_var("n"), 2, 3, tvm.size_var("w")
+ n, c, h, w = te.size_var("n"), 2, 3, te.size_var("w")
x = relay.var("x", relay.TensorType((n, c, h, w), "float32"))
y = relay.var("y", relay.TensorType((1, 8, 8), "float32"))
z = relay.reshape_like(x, y)
yy = run_infer_type(y)
assert yy.checked_type == relay.TensorType(oshape, "float32")
- d1, d2, d3 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3")
- d4, d5, d6 = tvm.var("d4"), tvm.var("d5"), tvm.var("d6")
+ d1, d2, d3 = te.var("d1"), te.var("d2"), te.var("d3")
+ d4, d5, d6 = te.var("d4"), te.var("d5"), te.var("d6")
verify_take((d1,), (1,), (1,), 0)
verify_take((4,), (d1, d2), (d1, d2))
verify_take((3, 3, 3), (1, d2), (1, d2))
yy = run_infer_type(y.astuple())
assert yy.checked_type == ret_type
- idxd = tvm.indexdiv
+ idxd = tvm.tir.indexdiv
- d1, d2, d3, d4 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3"), tvm.var("d4")
- axis = tvm.var("axis")
+ d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4")
+ axis = te.var("axis")
verify_split((5, 5, 2, 2), 5,
- relay.ty.TupleType(tvm.convert([
+ relay.ty.TupleType(tvm.runtime.convert([
relay.ty.TensorType((5, 1, 2, 2), "float32"),
relay.ty.TensorType((5, 1, 2, 2), "float32"),
relay.ty.TensorType((5, 1, 2, 2), "float32"),
relay.ty.TensorType((5, 1, 2, 2), "float32")])),
axis=1)
verify_split((5, 5, 2, 2), 5,
- relay.ty.TupleType(tvm.convert([
+ relay.ty.TupleType(tvm.runtime.convert([
relay.ty.TensorType((1, 5, 2, 2), "float32"),
relay.ty.TensorType((1, 5, 2, 2), "float32"),
relay.ty.TensorType((1, 5, 2, 2), "float32"),
relay.ty.TensorType((1, 5, 2, 2), "float32")])),
axis=0)
verify_split((d1, d2, d3, d4), 4,
- relay.ty.TupleType(tvm.convert([
+ relay.ty.TupleType(tvm.runtime.convert([
relay.ty.TensorType((d1, d2, idxd(d3, 4), d4), "float32"),
relay.ty.TensorType((d1, d2, idxd(d3, 4), d4), "float32"),
relay.ty.TensorType((d1, d2, idxd(d3, 4), d4), "float32"),
relay.ty.TensorType((d1, d2, idxd(d3, 4), d4), "float32")])),
axis=2)
verify_split((d1, d2, d3, d4), 2,
- relay.ty.TupleType(tvm.convert([
+ relay.ty.TupleType(tvm.runtime.convert([
relay.ty.TensorType((idxd(d1, 2), d2, d3, d4), "float32"),
relay.ty.TensorType((idxd(d1, 2), d2, d3, d4), "float32")])),
axis=0)
verify_split((d1, d2, d3, d4), (2, 4, 7),
- relay.ty.TupleType(tvm.convert([
+ relay.ty.TupleType(tvm.runtime.convert([
relay.ty.TensorType((d1, 2, d3, d4), "float32"),
relay.ty.TensorType((d1, 2, d3, d4), "float32"),
relay.ty.TensorType((d1, 3, d3, d4), "float32"),
assert yy.checked_type == relay.TensorType((1, 2, 3), "float32")
# symbolic shape
- n, c, h, w = tvm.size_var("n"), 2, 3, tvm.size_var("w")
+ n, c, h, w = te.size_var("n"), 2, 3, te.size_var("w")
base = relay.var("base", relay.TensorType((n, c, h, w), "float32"))
fill = relay.var("fill", relay.TensorType((), "float32"))
y = relay.full_like(base, fill)
def test_infer_type_leaky_relu():
- n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w")
+ n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
x = relay.var("x", relay.TensorType((n, c, h, w), "float32"))
y = relay.nn.leaky_relu(x, alpha=0.1)
"alpha=0.1" in y.astext()
def test_infer_type_prelu():
- n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w")
+ n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
verify_infer_type_prelu((n, c, h, w), (c,), 1, (n, c, h, w))
verify_infer_type_prelu((n, h, w, c), (c,), 3, (n, h, w, c))
verify_infer_type_prelu((n, c, h, w), None, 1, (n, c, h, w))
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm import relay
from tvm.relay import transform
def test_binary_op():
def check_binary_op(opfunc, ref):
- n = tvm.size_var("n")
+ n = te.size_var("n")
t1 = relay.TensorType((5, n, 5))
t2 = relay.TensorType((n, 1))
x = relay.var("x", t1)
return func(data, axis=axis).reshape(out_shape)
return _wrapper
- d1, d2, d3, d4 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3"), tvm.var("d4")
+ d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4")
for func in [[relay.sum, np.sum],
[relay.max, np.max],
[relay.min, np.min],
op_res = intrp.evaluate(func)(x_data)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res)
- d1, d2, d3, d4 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3"), tvm.var("d4")
+ d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4")
verify((d1, d2, 3), [None, None, 1], [None, None, 2], None, (d1, d2, 1), False)
verify((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2], (3, 1, 2))
verify((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], (1, 3, 3))
import math
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import transform
from tvm.relay.testing import ctx_list, run_infer_type
def test_resize_infer_type():
- n, c, h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w")
+ n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
x = relay.var("x", relay.TensorType((n, c, h, w), "int8"))
- th, tw = tvm.var("th"), tvm.var("tw")
+ th, tw = te.var("th"), te.var("tw")
z = relay.image.resize(x, (th, tw))
zz = run_infer_type(z)
assert zz.checked_type == relay.TensorType((n, c, th, tw), "int8")
x = relay.var("x", relay.TensorType(dshape, "float32"))
verify_multibox_prior(x, dshape, ref_res, sizes, ratios, steps, offsets,
check_size=True)
- y = relay.var("y", relay.TensorType((tvm.size_var("n"), 3, 56, 56), "float32"))
+ y = relay.var("y", relay.TensorType((te.size_var("n"), 3, 56, 56), "float32"))
verify_multibox_prior(x, dshape, ref_res, sizes, ratios, steps, offsets,
check_size=True, check_type_only=True)
ref_res = get_ref_result(dshape, clip=False)
x = relay.var("x", relay.TensorType(dshape, "float32"))
verify_multibox_prior(x, dshape, ref_res, clip=False)
- y = relay.var("y", relay.TensorType((tvm.size_var("n"), 24, 32, 32), "float32"))
+ y = relay.var("y", relay.TensorType((te.size_var("n"), 24, 32, 32), "float32"))
verify_multibox_prior(x, dshape, ref_res, clip=False, check_type_only=True)
np_indices_result = np.array([[3, 0, -1, -1, -1]])
num_anchors = 5
- dshape = (tvm.size_var("n"), num_anchors, 6)
+ dshape = (te.size_var("n"), num_anchors, 6)
verify_nms(np_data, np_valid_count, dshape, np_result, np_indices_result,
force_suppress=True, top_k=2, check_type_only=True)
dshape = (1, num_anchors, 6)
[1, 0.7, 30, 60, 50, 80], [-1, -1, -1, -1, -1, -1],
[-1, -1, -1, -1, -1, -1]]])
np_indices_result = np.array([[3, 0, 1, -1, -1]])
- dshape = (tvm.size_var("n"), num_anchors, 6)
+ dshape = (te.size_var("n"), num_anchors, 6)
verify_nms(np_data, np_valid_count, dshape, np_result,
np_indices_result, check_type_only=True)
dshape = (1, num_anchors, 6)
cls_prob=cls_prob, loc_pred=loc_pred, anchor=anchors)
ret = run_infer_type(mtl.astuple())
ref_type = relay.ty.TupleType(
- tvm.convert([
+ tvm.runtime.convert([
relay.ty.TensorType((1, num_anchors, 6), "float32"),
relay.ty.TensorType((1, ), "int")
]))
def test_threshold():
num_anchors = 5
num_classes = 5
- n = tvm.size_var("n")
+ n = te.size_var("n")
cls_prob = relay.var(
"cls_prob",
relay.ty.TensorType((n, num_anchors, num_classes), "float32"))
variances=variances)
ret = run_infer_type(ret.astuple())
ref_type = relay.ty.TupleType(
- tvm.convert([
+ tvm.runtime.convert([
relay.ty.TensorType((n, num_anchors, 6), "float32"),
relay.ty.TensorType((n, ), "int")
]))
assert "stride=" in z.astext()
assert zz.checked_type == relay.ty.TensorType(out_shape, "float32")
- n, c, h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w")
- idxd = tvm.indexdiv
+ n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
+ idxd = tvm.tir.indexdiv
verify_yolo_reorg((n, c, 20, 20), 10, (n, c*10*10, 2, 2))
verify_yolo_reorg((n, c, h, w), 2, (n, c*2*2, idxd(h, 2), idxd(w, 2)))
"""
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.testing import ctx_list
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm import relay
from tvm.contrib import graph_runtime
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm import relay
from tvm.contrib import graph_runtime
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm import relay
from tvm.relay import transform
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm import relay
from tvm.contrib import graph_runtime
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm import relay
from tvm.contrib import graph_runtime
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm import relay
from tvm.contrib import graph_runtime
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm import relay
from tvm.contrib import graph_runtime
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm import relay
from tvm.contrib import graph_runtime
import os
import numpy as np
import tvm
+from tvm import te
import json
import base64
from tvm._ffi.base import py_str
# under the License.
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import analysis
from tvm.relay.testing import run_opt_pass
# function types are the only way to put type params
# in eq map
- ft1 = relay.FuncType(tvm.convert([]), t1, tvm.convert([t1]), tvm.convert([]))
- ft2 = relay.FuncType(tvm.convert([]), t3, tvm.convert([t3]), tvm.convert([]))
+ ft1 = relay.FuncType(tvm.runtime.convert([]), t1, tvm.runtime.convert([t1]), tvm.runtime.convert([]))
+ ft2 = relay.FuncType(tvm.runtime.convert([]), t3, tvm.runtime.convert([t3]), tvm.runtime.convert([]))
# actually an invalid type because t2 is wrong kind
- ft3 = relay.FuncType(tvm.convert([]), t2, tvm.convert([t2]), tvm.convert([]))
+ ft3 = relay.FuncType(tvm.runtime.convert([]), t2, tvm.runtime.convert([t2]), tvm.runtime.convert([]))
assert ft1 == ft2
assert ft1 != ft3 # kinds still do not match
broadcast = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Broadcast")
identity = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Identity")
- tr1 = relay.TypeRelation(broadcast, tvm.convert([tp1, tp3]), 1, None)
- tr2 = relay.TypeRelation(broadcast, tvm.convert([tp2, tp4]), 1, None)
- tr3 = relay.TypeRelation(identity, tvm.convert([tp1, tp3]), 1, None)
+ tr1 = relay.TypeRelation(broadcast, tvm.runtime.convert([tp1, tp3]), 1, None)
+ tr2 = relay.TypeRelation(broadcast, tvm.runtime.convert([tp2, tp4]), 1, None)
+ tr3 = relay.TypeRelation(identity, tvm.runtime.convert([tp1, tp3]), 1, None)
- ft = relay.FuncType(tvm.convert([t1, t2]), tp1,
- tvm.convert([tp1, tp3]),
- tvm.convert([tr1]))
- translate_vars = relay.FuncType(tvm.convert([t1, t2]), tp1,
- tvm.convert([tp2, tp4]),
- tvm.convert([tr2]))
+ ft = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1,
+ tvm.runtime.convert([tp1, tp3]),
+ tvm.runtime.convert([tr1]))
+ translate_vars = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1,
+ tvm.runtime.convert([tp2, tp4]),
+ tvm.runtime.convert([tr2]))
assert ft == translate_vars
- different_args = relay.FuncType(tvm.convert([t1]), tp1,
- tvm.convert([tp1, tp3]),
- tvm.convert([tr1]))
+ different_args = relay.FuncType(tvm.runtime.convert([t1]), tp1,
+ tvm.runtime.convert([tp1, tp3]),
+ tvm.runtime.convert([tr1]))
assert ft != different_args
- different_order = relay.FuncType(tvm.convert([t2, t1]), tp1,
- tvm.convert([tp1, tp3]),
- tvm.convert([tr1]))
+ different_order = relay.FuncType(tvm.runtime.convert([t2, t1]), tp1,
+ tvm.runtime.convert([tp1, tp3]),
+ tvm.runtime.convert([tr1]))
assert ft != different_order
- no_rel = relay.FuncType(tvm.convert([t1, t2]), tp1,
- tvm.convert([tp1, tp3]),
- tvm.convert([]))
+ no_rel = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1,
+ tvm.runtime.convert([tp1, tp3]),
+ tvm.runtime.convert([]))
assert ft != no_rel
- more_vars = relay.FuncType(tvm.convert([t1, t2]), tp2,
- tvm.convert([tp1, tp2, tp3]),
- tvm.convert([tr1]))
+ more_vars = relay.FuncType(tvm.runtime.convert([t1, t2]), tp2,
+ tvm.runtime.convert([tp1, tp2, tp3]),
+ tvm.runtime.convert([tr1]))
assert ft != more_vars
- all_the_vars = relay.FuncType(tvm.convert([t1, t2]), tp1,
- tvm.convert([tp1, tp2, tp3, tp4]),
- tvm.convert([tr1, tr2]))
+ all_the_vars = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1,
+ tvm.runtime.convert([tp1, tp2, tp3, tp4]),
+ tvm.runtime.convert([tr1, tr2]))
assert ft != all_the_vars
- different_rel = relay.FuncType(tvm.convert([t1, t2]), tp1,
- tvm.convert([tp1, tp3]),
- tvm.convert([tr3]))
+ different_rel = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1,
+ tvm.runtime.convert([tp1, tp3]),
+ tvm.runtime.convert([tr3]))
assert ft != different_rel
- more_rels = relay.FuncType(tvm.convert([t1, t2]), tp1,
- tvm.convert([tp1, tp3]),
- tvm.convert([tr1, tr3]))
+ more_rels = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1,
+ tvm.runtime.convert([tp1, tp3]),
+ tvm.runtime.convert([tr1, tr3]))
assert ft != more_rels
tp1 = relay.TypeVar("v1", relay.TypeKind.Type)
tp2 = relay.TypeVar("v2", relay.TypeKind.Type)
- tup1 = relay.TupleType(tvm.convert([t1, t2, tp1]))
- tup2 = relay.TupleType(tvm.convert([t1, t2, tp1]))
- tup3 = relay.TupleType(tvm.convert([t2, t1, tp1]))
- tup4 = relay.TupleType(tvm.convert([t1, t2, tp2]))
+ tup1 = relay.TupleType(tvm.runtime.convert([t1, t2, tp1]))
+ tup2 = relay.TupleType(tvm.runtime.convert([t1, t2, tp1]))
+ tup3 = relay.TupleType(tvm.runtime.convert([t2, t1, tp1]))
+ tup4 = relay.TupleType(tvm.runtime.convert([t1, t2, tp2]))
# as long as types are alpha-equal and in same order,
# tuples should be alpha-equal
attr1_same = tvm.ir.make_node("attrs.TestAttrs", name="attr", padding=(3,4))
attr2 = tvm.ir.make_node("attrs.TestAttrs", name="attr", padding=(3,4,4))
- tr = relay.TypeRelation(broadcast, tvm.convert([t1, t2]), 1, attr1)
- same = relay.TypeRelation(broadcast, tvm.convert([t1, t2]), 1, attr1)
- diff_func = relay.TypeRelation(identity, tvm.convert([t1, t2]), 1, attr1)
- diff_order = relay.TypeRelation(broadcast, tvm.convert([t2, t1]), 1, attr1)
- diff_args = relay.TypeRelation(broadcast, tvm.convert([t2, t3]), 1, attr1)
- diff_attr = relay.TypeRelation(broadcast, tvm.convert([t1, t2]), 1, attr2)
- same_attr = relay.TypeRelation(broadcast, tvm.convert([t1, t2]), 1, attr1_same)
+ tr = relay.TypeRelation(broadcast, tvm.runtime.convert([t1, t2]), 1, attr1)
+ same = relay.TypeRelation(broadcast, tvm.runtime.convert([t1, t2]), 1, attr1)
+ diff_func = relay.TypeRelation(identity, tvm.runtime.convert([t1, t2]), 1, attr1)
+ diff_order = relay.TypeRelation(broadcast, tvm.runtime.convert([t2, t1]), 1, attr1)
+ diff_args = relay.TypeRelation(broadcast, tvm.runtime.convert([t2, t3]), 1, attr1)
+ diff_attr = relay.TypeRelation(broadcast, tvm.runtime.convert([t1, t2]), 1, attr2)
+ same_attr = relay.TypeRelation(broadcast, tvm.runtime.convert([t1, t2]), 1, attr1_same)
- bigger = relay.TypeRelation(identity, tvm.convert([t1, t3, t2]), 2, attr1)
- diff_num_inputs = relay.TypeRelation(identity, tvm.convert([t1, t3, t2]), 1, attr2)
+ bigger = relay.TypeRelation(identity, tvm.runtime.convert([t1, t3, t2]), 2, attr1)
+ diff_num_inputs = relay.TypeRelation(identity, tvm.runtime.convert([t1, t3, t2]), 1, attr2)
# func, number of args, input count, and order should be the same
assert tr == same
import pytest
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import transform, analysis
from tvm.relay.testing.temp_op_attr import TempOpAttr
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.contrib import graph_runtime
from tvm.relay.expr_functor import ExprMutator
import pytest
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import testing
# under the License.
import tvm
+from tvm import te
import tvm.relay as relay
import tvm.relay.transform as _transform
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.analysis import check_kind
import pytest
def test_tuple_kind():
# only contain type kinds
tp = relay.TypeVar('tp', relay.TypeKind.Type)
- tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32')
- tf = relay.FuncType(tvm.convert([]), tt, tvm.convert([]), tvm.convert([]))
- fields = tvm.convert([tp, tf, tt])
+ tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32')
+ tf = relay.FuncType(tvm.runtime.convert([]), tt, tvm.runtime.convert([]), tvm.runtime.convert([]))
+ fields = tvm.runtime.convert([tp, tf, tt])
tup_ty = relay.TupleType(fields)
assert check_kind(tup_ty) == relay.TypeKind.Type
tp1 = relay.TypeVar('tp1', relay.TypeKind.Type)
tp2 = relay.TypeVar('tp2', relay.TypeKind.Type)
- shape = tvm.convert([1, 2, 3])
+ shape = tvm.runtime.convert([1, 2, 3])
dtype = 'float32'
tensor_type = relay.TensorType(shape, dtype)
- tr = relay.TypeRelation(None, tvm.convert([tensor_type, tp1]) , 1, None)
+ tr = relay.TypeRelation(None, tvm.runtime.convert([tensor_type, tp1]) , 1, None)
- type_params = tvm.convert([tp1, tp2])
- type_constraints = tvm.convert([tr])
- arg_types = tvm.convert([tp1, tensor_type])
- ret_type = relay.TupleType(tvm.convert([tp2, tensor_type]))
+ type_params = tvm.runtime.convert([tp1, tp2])
+ type_constraints = tvm.runtime.convert([tr])
+ arg_types = tvm.runtime.convert([tp1, tensor_type])
+ ret_type = relay.TupleType(tvm.runtime.convert([tp2, tensor_type]))
tf = relay.FuncType(arg_types, ret_type, type_params, type_constraints)
assert check_kind(tf) == relay.TypeKind.Type
def test_ref_kind():
# only contain type kinds
- tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32')
- ft = relay.FuncType(tvm.convert([]), tt, tvm.convert([]), tvm.convert([]))
+ tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32')
+ ft = relay.FuncType(tvm.runtime.convert([]), tt, tvm.runtime.convert([]), tvm.runtime.convert([]))
rt1 = relay.RefType(tt)
assert check_kind(rt1) == relay.TypeKind.Type
def test_relation_kind():
# only have type kinds for arguments
tp = relay.TypeVar('tp', relay.TypeKind.Type)
- tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32')
- tf = relay.FuncType(tvm.convert([]), tt, tvm.convert([]), tvm.convert([]))
- args = tvm.convert([tf, tt, tp])
+ tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32')
+ tf = relay.FuncType(tvm.runtime.convert([]), tt, tvm.runtime.convert([]), tvm.runtime.convert([]))
+ args = tvm.runtime.convert([tf, tt, tp])
tr = relay.TypeRelation(None, args, 2, None)
assert check_kind(tr) == relay.TypeKind.Constraint
tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar)
tp2 = relay.TypeVar('tp2', relay.TypeKind.BaseType)
tp3 = relay.TypeVar('tp3', relay.TypeKind.Constraint)
- fields = tvm.convert([tp1, tp2, tp3])
+ fields = tvm.runtime.convert([tp1, tp2, tp3])
tup_ty = relay.TupleType(fields)
check_kind(tup_ty)
tp2 = relay.TypeVar('tp2', relay.TypeKind.BaseType)
tp3 = relay.TypeVar('tp3', relay.TypeKind.Constraint)
- type_params = tvm.convert([tp1, tp2, tp3])
- type_constraints = tvm.convert([])
- arg_types = tvm.convert([tp1, tp2])
+ type_params = tvm.runtime.convert([tp1, tp2, tp3])
+ type_constraints = tvm.runtime.convert([])
+ arg_types = tvm.runtime.convert([tp1, tp2])
ret_type = tp3
tf = relay.FuncType(arg_types, ret_type, type_params, type_constraints)
tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar)
tp2 = relay.TypeVar('tp2', relay.TypeKind.BaseType)
tp3 = relay.TypeVar('tp3', relay.TypeKind.Constraint)
- args = tvm.convert([tp1, tp2, tp3])
+ args = tvm.runtime.convert([tp1, tp2, tp3])
func = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Broadcast")
tr = relay.TypeRelation(func, args, 2, None)
def test_func_with_invalid_ret_type():
tp1 = relay.TypeVar('tp1', relay.TypeKind.Type)
tp2 = relay.TypeVar('tp2', relay.TypeKind.ShapeVar)
- tf = relay.FuncType(tvm.convert([tp1]), tp2, tvm.convert([tp1, tp2]), tvm.convert([]))
+ tf = relay.FuncType(tvm.runtime.convert([tp1]), tp2, tvm.runtime.convert([tp1, tp2]), tvm.runtime.convert([]))
check_kind(tf)
def test_func_with_invalid_arg_types():
tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar)
tp2 = relay.TypeVar('tp2', relay.TypeKind.Type)
- tf = relay.FuncType(tvm.convert([tp1]), tp2, tvm.convert([tp1, tp2]), tvm.convert([]))
+ tf = relay.FuncType(tvm.runtime.convert([tp1]), tp2, tvm.runtime.convert([tp1, tp2]), tvm.runtime.convert([]))
check_kind(tf)
def test_func_with_invalid_tuple():
tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar)
- ret_type = relay.TupleType(tvm.convert([tp1, tp1, tp1]))
+ ret_type = relay.TupleType(tvm.runtime.convert([tp1, tp1, tp1]))
- tf = relay.FuncType(tvm.convert([]), ret_type, tvm.convert([tp1]), tvm.convert([]))
+ tf = relay.FuncType(tvm.runtime.convert([]), ret_type, tvm.runtime.convert([tp1]), tvm.runtime.convert([]))
check_kind(tf)
tp3 = relay.TypeVar('tp3', relay.TypeKind.Constraint)
func = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Identity")
- tr = relay.TypeRelation(func, tvm.convert([tp2, tp3]), 1, None)
+ tr = relay.TypeRelation(func, tvm.runtime.convert([tp2, tp3]), 1, None)
- tf = relay.FuncType(tvm.convert([tp1]), tp1, tvm.convert([tp1, tp2, tp3]), tvm.convert([tr]))
+ tf = relay.FuncType(tvm.runtime.convert([tp1]), tp1, tvm.runtime.convert([tp1, tp2, tp3]), tvm.runtime.convert([tr]))
check_kind(tf)
@pytest.mark.xfail(raises=tvm.error.TVMError)
def test_tuple_with_invalid_func():
- tensor_type = relay.TensorType(tvm.convert([1, 2, 3]), 'float32')
+ tensor_type = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32')
tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar)
- tf = relay.FuncType(tvm.convert([]), tp1, tvm.convert([tp1]), tvm.convert([]))
+ tf = relay.FuncType(tvm.runtime.convert([]), tp1, tvm.runtime.convert([tp1]), tvm.runtime.convert([]))
- tup_ty = relay.TupleType(tvm.convert([tensor_type, tf]))
+ tup_ty = relay.TupleType(tvm.runtime.convert([tensor_type, tf]))
check_kind(tup_ty)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import transform
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import transform
# under the License.
"""Test alter op layout pass"""
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.op import register_alter_op_layout
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import Function, transform
from tvm.relay.analysis import alpha_equal, graph_equal, free_vars, assert_alpha_equal
class env:
def __init__(self):
- self.shape = tvm.convert([1, 2, 3])
+ self.shape = tvm.runtime.convert([1, 2, 3])
self.tt = relay.TensorType(self.shape, "float32")
self.int32 = relay.TensorType([], "int32")
self.float32 = relay.TensorType([], "float32")
# under the License.
"""Test eliminate common subexpr pass"""
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.op import register_alter_op_layout
import numpy as np
import tvm
+from tvm import te
from tvm import relay
import tvm.relay.transform as _transform
# under the License.
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import transform
from tvm.relay.build_module import bind_params_by_name
raise RuntimeError()
# the fold constant should work on any context.
- with tvm.build_config(add_lower_pass=[(0, fail)]):
+ with tvm.target.build_config(add_lower_pass=[(0, fail)]):
with tvm.target.create("cuda"):
zz = run_opt_pass(before(), transform.FoldConstant())
zexpected = run_opt_pass(expected(), transform.InferType())
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import transform
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import transform
from tvm.relay.testing import run_opt_pass
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.analysis import free_vars, free_type_vars, assert_alpha_equal
from tvm.relay import create_executor, transform
import pytest
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import transform
"""Test legalize pass"""
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.contrib import graph_runtime
"""Unit tests for MAC counter."""
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import analysis, transform
data2 = relay.var("data2", shape=dshape2)
gemm = relay.nn.dense(data1, data2)
func = relay.Function([data1, data2],
- relay.Tuple(tvm.convert([gemm])))
+ relay.Tuple(tvm.runtime.convert([gemm])))
func = run_opt_pass(func, transform.InferType())
compute_count = analysis.get_total_mac_number(func)
expect_count = n * m * k
channels=output_channel,
kernel_size=(kh, kw),
padding=(h_padding, w_padding))
- func = relay.Function([data, weight], relay.Tuple(tvm.convert([conv2d])))
+ func = relay.Function([data, weight], relay.Tuple(tvm.runtime.convert([conv2d])))
func = run_opt_pass(func, transform.InferType())
compute_count = analysis.get_total_mac_number(func)
expect_count = batch_size * input_channel * oh * ow * output_channel * kh * kw
weight_dense)
func = relay.Function([data1, data2, weight_conv, weight_dense],
- relay.Tuple(tvm.convert([conv2d_1, conv2d_2,
+ relay.Tuple(tvm.runtime.convert([conv2d_1, conv2d_2,
dense_1, add, flattened])))
# alter the CONV 2D data layout to test
func = run_opt_pass(func, transform.AlterOpLayout())
groups=64)
add = relay.add(depthwise_conv2d_1, depthwise_conv2d_2)
func = relay.Function([data1, data2, weight_conv],
- relay.Tuple(tvm.convert([depthwise_conv2d_1,
+ relay.Tuple(tvm.runtime.convert([depthwise_conv2d_1,
depthwise_conv2d_2,
add])))
func = run_opt_pass(func, transform.InferType())
kernel_size=(kh, kw),
padding=(h_padding, w_padding))
func = relay.Function([data, weight],
- relay.Tuple(tvm.convert([conv2d_transpose])))
+ relay.Tuple(tvm.runtime.convert([conv2d_transpose])))
func = run_opt_pass(func, transform.InferType())
compute_count = analysis.get_total_mac_number(func)
expect_count = batch_size * input_channel * oh * ow * output_channel * kh * kw
import pytest
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import ExprFunctor
from tvm.relay import Function, Call
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.analysis import alpha_equal, assert_alpha_equal
from tvm.relay.prelude import Prelude
import pytest
import tvm
+from tvm import te
import tvm.relay.testing
import tvm.relay.transform as transform
from tvm import relay
"""Test legalize pass"""
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.contrib import graph_runtime
# under the License.
import pytest
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import transform
from tvm.relay.prelude import Prelude
# under the License.
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.analysis import alpha_equal, detect_feature
from tvm.relay import op, create_executor, transform
# under the License.
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.analysis import alpha_equal, detect_feature
from tvm.relay.transform import to_cps, un_cps
# under the License.
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import op, create_executor, transform, Feature
from tvm.relay.analysis import detect_feature
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.prelude import Prelude
from tvm.relay.analysis import unmatched_cases
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.analysis import (free_vars, free_type_vars,
bound_vars, bound_type_vars,
# under the License.
import numpy as np
import tvm
+from tvm import te
from tvm import relay
from tvm.relay.testing import to_python, run_as_python
from tvm.relay.prelude import Prelude
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import TypeFunctor, TypeMutator, TypeVisitor
from tvm.relay.analysis import assert_graph_equal
def test_func_type():
tv = TypeVar('tv')
- tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32')
+ tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32')
ft = FuncType([tt], tt, type_params=[tv])
check_visit(ft)
for expressions.
"""
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import op, transform, analysis
from tvm.relay.analysis import assert_alpha_equal
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
import pytest
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import relay
from tvm.relay import transform
import pytest
import tvm
+from tvm import te
from tvm import runtime
from tvm import relay
from tvm.relay.scope_builder import ScopeBuilder
import numpy as np
import tvm
+from tvm import te
from tvm.runtime import vm as _vm
from tvm.relay import vm as rly_vm
from tvm import relay
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
class CanonicalChecker:
def __init__(self):
def verify(self, data, expected):
res = self.analyzer.canonical_simplify(data)
- assert tvm.ir_pass.Equal(res, expected), "\ndata={}\nres={}\nexpected={}".format(data, res, expected)
+ assert tvm.tir.ir_pass.Equal(res, expected), "\ndata={}\nres={}\nexpected={}".format(data, res, expected)
def test_mul_sum_simplify():
ck = CanonicalChecker()
- x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z")
+ x, y, z = te.var("x"), te.var("y"), te.var("z")
ck.verify(2 + (3 * x + z + y + 1) * 4 + x,
x * 13 + z * 4 + y * 4 +6)
ck.verify(x * 3 - 4 * x + 1, 1 - x)
ck.verify(y + x * 3 - 5 * x + 1 + y, y * 2 + 1 - x * 2)
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
# trucdiv
ck.verify(tdiv(x + y + x + y * 3, 2), y * 2 + x)
ck.verify(tmod(x + y + x + y * 3, 2), 0)
# floordiv
- fld = tvm.floordiv
- flm = tvm.floormod
+ fld = tvm.te.floordiv
+ flm = tvm.te.floormod
ck.verify(flm(x + x + y * 3, 2), flm(y * 3, 2))
ck.verify(fld(x + y + x + y * 3, 2), y * 2 + x)
ck.verify(flm(x + y + x + y * 3, 2), 0)
def test_split_index_simplify():
ck = CanonicalChecker()
- x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z")
+ x, y, z = te.var("x"), te.var("y"), te.var("z")
# trucdiv
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
# split div const
ck.verify(tdiv(x, 3) *3 + tmod(x, 3), x)
ck.verify(tdiv(x * 4 + y, 2) * 2 + tmod(x * 4 + y, 2), x * 4 + y)
# floordiv
- fld = tvm.floordiv
- flm = tvm.floormod
+ fld = tvm.te.floordiv
+ flm = tvm.te.floormod
ck.verify(fld(x, 3) * 3 + flm(x, 3), x)
ck.verify(fld(x, 6) * 6 + flm(fld(x, 3), 2) * 3 + flm(x, 3), x)
ck.verify(fld(fld(flm(x, 16), 2) * 2, 4), fld(flm(x, 16), 4))
def test_div_simplify():
ck = CanonicalChecker()
- x = tvm.var("x")
- tdiv = tvm.truncdiv
+ x = te.var("x")
+ tdiv = tvm.tir.truncdiv
# truc div
ck.verify(tdiv(16+48*x,16), x*3 + 1)
ck.verify(tdiv(17 + 47 * x, 16), tdiv(x * 47 + 17, 16))
# floordiv
- fld = tvm.floordiv
+ fld = tvm.te.floordiv
ck.analyzer.update(x, tvm.arith.ConstIntBound(-1000, 10000), True)
ck.verify(fld(16+48*x, 16), x*3 + 1)
ck.verify(fld(17+48*x, 16), x * 3 + 1)
def test_floormod_simplify():
ck = CanonicalChecker()
- flm = tvm.floormod
- x, y = tvm.var("x"), tvm.var("y")
+ flm = tvm.te.floormod
+ x, y = te.var("x"), te.var("y")
ck.verify(flm(flm((x*4) + y - 466036, 24528) - 24512, 16),
flm((x*4) + y + 12, 16))
def test_canonical_mixed():
ck = CanonicalChecker()
- x = tvm.var("x")
- z = tvm.const(3, "int32")
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
+ x = te.var("x")
+ z = tvm.tir.const(3, "int32")
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
ck.verify(tdiv(x, (z*z)) - tdiv(x, (z*z)), 0)
ck.verify(tdiv(x, (z+z)) - tdiv(x, (z+z)), 0)
ck.verify(x - 2 < 3, x < 5)
- ck.verify(tvm.max(x, 1) - tvm.max(x, 1), 0)
- ck.verify(tvm.min(x, 1) - tvm.min(x, 1), 0)
+ ck.verify(tvm.te.max(x, 1) - tvm.te.max(x, 1), 0)
+ ck.verify(tvm.te.min(x, 1) - tvm.te.min(x, 1), 0)
ck.verify(x * x - x * x, 0)
- fld = tvm.floordiv
+ fld = tvm.te.floordiv
ck.verify(fld(x, (z*z)) - fld(x, (z*z)), 0)
ck.verify(fld(x, (z+z)) - fld(x, (z+z)), 0)
def test_reduce_combiner_simplify():
ck = CanonicalChecker()
- dummy = tvm.var('dummy')
- comm_reducer = tvm.comm_reducer
- prod = comm_reducer(lambda x, y: x*y, lambda t0: tvm.const(1, t0))
+ dummy = te.var('dummy')
+ comm_reducer = te.comm_reducer
+ prod = comm_reducer(lambda x, y: x*y, lambda t0: tvm.tir.const(1, t0))
sum_or_prod = comm_reducer(
lambda x, y: tvm.tir.Select(dummy < 0,
x + y, x*y),
lambda t0: tvm.tir.Select(dummy < 0,
- tvm.const(0, t0), tvm.const(1, t0)))
+ tvm.tir.const(0, t0), tvm.tir.const(1, t0)))
sum_and_prod = comm_reducer(
lambda x, y: (x[0] + y[0],
x[1]*y[1]),
- lambda t0, t1: (tvm.const(0, t0),
- tvm.const(5, t0) - tvm.const(4, t0)))
+ lambda t0, t1: (tvm.tir.const(0, t0),
+ tvm.tir.const(5, t0) - tvm.tir.const(4, t0)))
some_reducer1 = comm_reducer(
lambda x, y: (x[0] + y[0],
x[0] + y[0] + x[1] + y[1],
x[0]*y[2] + y[0]*x[2],
x[1] + y[2],
4.0),
- lambda t0, t1, t2, t3, t4: (tvm.const(0, t0),
- tvm.const(1, t1),
- tvm.const(2, t2),
- tvm.const(3, t3),
- tvm.const(4, t4)))
-
- k = tvm.reduce_axis((0, 10), name="k")
- A = tvm.placeholder((10,), name='A')
+ lambda t0, t1, t2, t3, t4: (tvm.tir.const(0, t0),
+ tvm.tir.const(1, t1),
+ tvm.tir.const(2, t2),
+ tvm.tir.const(3, t3),
+ tvm.tir.const(4, t4)))
+
+ k = te.reduce_axis((0, 10), name="k")
+ A = te.placeholder((10,), name='A')
# Test that SimplifyCombiner makes use of vranges
ck.analyzer.update(dummy, tvm.arith.ConstIntBound(-10, -4))
- ck.verify(sum_or_prod(A[k], k), tvm.sum(A[k], k))
+ ck.verify(sum_or_prod(A[k], k), te.sum(A[k], k))
ck.analyzer.update(dummy, tvm.arith.ConstIntBound(5, 9), True)
ck.verify(sum_or_prod(A[k], k), prod(A[k], k))
ck.analyzer.update(dummy, tvm.arith.ConstIntBound(-10, 100), True)
- ck.verify(sum_and_prod((A[k], A[10-k]), k)[0], tvm.sum(A[k], k))
+ ck.verify(sum_and_prod((A[k], A[10-k]), k)[0], te.sum(A[k], k))
ck.verify(sum_and_prod((A[k], A[10-k]), k)[1], prod(A[10-k], k))
reference_simplified_sources = [[A[0]],
# Check that the remaining components are the expected ones.
for lhs, rhs in zip(simplified.source, reference_simplified_sources[j]):
- assert tvm.ir_pass.Equal(lhs, rhs)
+ assert tvm.tir.ir_pass.Equal(lhs, rhs)
# Test that components with side effects are not removed
side_effect = lambda *xs: tvm.tir.Call("int32", "dummy", xs, tvm.tir.Call.Intrinsic, None, 0)
ck.verify(sum_and_prod((A[k], side_effect(A[10-k])), k)[0],
sum_and_prod((A[k], side_effect(A[10-k])), k)[0])
ck.verify(sum_and_prod((side_effect(A[k]), A[10-k]), k)[0],
- tvm.sum(side_effect(A[k]), k))
+ te.sum(side_effect(A[k]), k))
def test_reduce_simplify():
ck = CanonicalChecker()
- k = tvm.reduce_axis((0, 10), name="k")
- j = tvm.reduce_axis((-5, 3), name="j")
- A = tvm.placeholder((10,), name='A')
- ck.verify(tvm.sum(tvm.tir.Select(k + j < 12, k + j, 0), [k, j]),
- tvm.sum(k + j, [k, j]))
- ck.verify(tvm.sum(A[3], []), A[3])
+ k = te.reduce_axis((0, 10), name="k")
+ j = te.reduce_axis((-5, 3), name="j")
+ A = te.placeholder((10,), name='A')
+ ck.verify(te.sum(tvm.tir.Select(k + j < 12, k + j, 0), [k, j]),
+ te.sum(k + j, [k, j]))
+ ck.verify(te.sum(A[3], []), A[3])
# The rule below is not typical, removed for now
- ck.verify(tvm.sum(tvm.div(k, 10), k), tvm.sum(tvm.const(0, "int32"), k))
+ ck.verify(te.sum(te.div(k, 10), k), te.sum(tvm.tir.const(0, "int32"), k))
def test_simplify_if_then_else():
ck = CanonicalChecker()
- x = tvm.var("x")
- y = tvm.var("y")
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
+ x = te.var("x")
+ y = te.var("y")
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
# simplification that takes condition into account.
- res = tvm.if_then_else((x * 4 + y) >= 466036,
- tvm.if_then_else(24512 <= tmod(((x*4) + y) - 466036, 24528),
+ res = tvm.tir.if_then_else((x * 4 + y) >= 466036,
+ tvm.tir.if_then_else(24512 <= tmod(((x*4) + y) - 466036, 24528),
tmod(tmod(((x*4) + y) - 466036, 24528) -24512, 16),
x), y)
- res2 = tvm.if_then_else((x * 4) >= 466036 - y,
- tvm.if_then_else(24512 <= tmod(((x*4) + y) - 466036, 24528),
+ res2 = tvm.tir.if_then_else((x * 4) >= 466036 - y,
+ tvm.tir.if_then_else(24512 <= tmod(((x*4) + y) - 466036, 24528),
tmod(tmod(((x*4) + y) - 466036, 24528) -24512, 16),
x), y)
- expected = tvm.if_then_else(
+ expected = tvm.tir.if_then_else(
tvm.tir.LE(466036, (x * 4 + y)),
- tvm.if_then_else(tvm.tir.LE(24512, tmod(((x*4) + y) - 4, 24528)),
+ tvm.tir.if_then_else(tvm.tir.LE(24512, tmod(((x*4) + y) - 4, 24528)),
tmod(((x*4) + y) - 4, 16),
x), y)
ck.verify(res, expected)
ck.verify(res2, expected)
# can only simplify if condition
- res = tvm.tir.Select(tvm.all(x >= -1, y >= 0), tmod(x + y + 100, 3), tmod(x + 100, 3))
- expected = tvm.tir.Select(tvm.all(x >= -1, y >= 0), tmod(x + y + 1, 3), tmod(x + 100, 3))
+ res = tvm.tir.Select(tvm.tir.all(x >= -1, y >= 0), tmod(x + y + 100, 3), tmod(x + 100, 3))
+ expected = tvm.tir.Select(tvm.tir.all(x >= -1, y >= 0), tmod(x + y + 1, 3), tmod(x + 100, 3))
ck.verify(res, ck.analyzer.canonical_simplify(expected))
res = tvm.tir.Select(x >= 10,
- tvm.if_then_else(tdiv(x, 3) > 2, x, 0), 0)
+ tvm.tir.if_then_else(tdiv(x, 3) > 2, x, 0), 0)
expected = tvm.tir.Select(x >= 10, x, 0)
ck.verify(res, ck.analyzer.canonical_simplify(expected))
res = tvm.tir.Select(x >= 10,
- tvm.if_then_else(tdiv(x, 3) < 2, x, 0), 0)
+ tvm.tir.if_then_else(tdiv(x, 3) < 2, x, 0), 0)
ck.verify(res, 0)
def test_complex_cases():
ck = CanonicalChecker()
- x = tvm.var("x")
- y = tvm.var("y")
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
+ x = te.var("x")
+ y = te.var("y")
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
res2 = (tdiv(tdiv(tmod(x*128 + y, 1296),36)*2 + 1,2)*36 +
tdiv(tmod((x*128) + y, 36)*2 + 1,2)
- tmod((x*128) + y, 1296) + 1)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_dtype_bound():
analyzer = tvm.arith.Analyzer()
- x = tvm.var("x", dtype="int64")
+ x = te.var("x", dtype="int64")
bd = analyzer.const_int_bound(x)
assert bd.min_value == bd.NEG_INF
assert bd.max_value == bd.POS_INF
- x = tvm.var("x", dtype="int8")
+ x = te.var("x", dtype="int8")
bd = analyzer.const_int_bound(x)
assert bd.min_value == -128
assert bd.max_value == 127
- x = tvm.var("x", dtype="uint8")
+ x = te.var("x", dtype="uint8")
bd = analyzer.const_int_bound(x)
assert bd.min_value == 0
assert bd.max_value == 255
def test_cast_bound():
analyzer = tvm.arith.Analyzer()
- x = tvm.var("x", dtype="int8")
- tmod = tvm.truncmod
+ x = te.var("x", dtype="int8")
+ tmod = tvm.tir.truncmod
bd = analyzer.const_int_bound(tmod(x, 3).astype("uint32"))
assert bd.min_value == 0
assert bd.max_value == 2
def test_add_sub_bound():
analyzer = tvm.arith.Analyzer()
- x, y = tvm.var("x", "int64"), tvm.var("y", "int64")
+ x, y = te.var("x", "int64"), te.var("y", "int64")
bd = analyzer.const_int_bound(x + y)
assert bd.min_value == bd.NEG_INF
assert bd.max_value == bd.POS_INF
def test_mul_bound():
analyzer = tvm.arith.Analyzer()
- x, y = tvm.var("x"), tvm.var("y")
+ x, y = te.var("x"), te.var("y")
analyzer.update(x, tvm.arith.ConstIntBound(-2, 4))
analyzer.update(y, tvm.arith.ConstIntBound(4, 10))
def test_truncdiv_bound():
analyzer = tvm.arith.Analyzer()
- x, y = tvm.var("x"), tvm.var("y")
- tdiv = tvm.truncdiv
+ x, y = te.var("x"), te.var("y")
+ tdiv = tvm.tir.truncdiv
analyzer.update(x, tvm.arith.ConstIntBound(-9, 4))
analyzer.update(y, tvm.arith.ConstIntBound(4, 10))
def test_truncmod_bound():
analyzer = tvm.arith.Analyzer()
- x, y = tvm.var("x"), tvm.var("y")
+ x, y = te.var("x"), te.var("y")
- tmod = tvm.truncmod
+ tmod = tvm.tir.truncmod
analyzer.update(x, tvm.arith.ConstIntBound(-9, 4))
analyzer.update(y, tvm.arith.ConstIntBound(4, 10))
def test_floordiv_bound():
analyzer = tvm.arith.Analyzer()
- x, y = tvm.var("x"), tvm.var("y")
- fld = tvm.floordiv
+ x, y = te.var("x"), te.var("y")
+ fld = tvm.te.floordiv
analyzer.update(x, tvm.arith.ConstIntBound(-9, 4))
analyzer.update(y, tvm.arith.ConstIntBound(4, 10))
bd = analyzer.const_int_bound(fld(x, y))
def test_floormod_bound():
analyzer = tvm.arith.Analyzer()
- x, y = tvm.var("x"), tvm.var("y")
- flm = tvm.floormod
+ x, y = te.var("x"), te.var("y")
+ flm = tvm.te.floormod
analyzer.update(x, tvm.arith.ConstIntBound(-9, 4))
analyzer.update(y, tvm.arith.ConstIntBound(4, 10))
def test_min_max_bound():
analyzer = tvm.arith.Analyzer()
- x, y = tvm.var("x"), tvm.var("y")
+ x, y = te.var("x"), te.var("y")
analyzer.update(x, tvm.arith.ConstIntBound(-9, 11))
analyzer.update(y, tvm.arith.ConstIntBound(4, 10))
- bd = analyzer.const_int_bound(tvm.min(x, y))
+ bd = analyzer.const_int_bound(tvm.te.min(x, y))
assert bd.min_value == -9
assert bd.max_value == 10
analyzer.update(x, tvm.arith.ConstIntBound(bd.NEG_INF, bd.POS_INF), override=True)
analyzer.update(y, tvm.arith.ConstIntBound(4, 10), override=True)
- bd = analyzer.const_int_bound(tvm.min(x, y))
+ bd = analyzer.const_int_bound(tvm.te.min(x, y))
assert bd.min_value == bd.NEG_INF
assert bd.max_value == 10
- bd = analyzer.const_int_bound(tvm.max(x, y))
+ bd = analyzer.const_int_bound(tvm.te.max(x, y))
assert bd.min_value == 4
assert bd.max_value == bd.POS_INF
analyzer.update(x, tvm.arith.ConstIntBound(1, bd.POS_INF), override=True)
analyzer.update(y, tvm.arith.ConstIntBound(4, 10), override=True)
- bd = analyzer.const_int_bound(tvm.max(x, y))
+ bd = analyzer.const_int_bound(tvm.te.max(x, y))
assert bd.min_value == 4
assert bd.max_value == bd.POS_INF
def test_select_bound():
analyzer = tvm.arith.Analyzer()
- x, y = tvm.var("x"), tvm.var("y")
+ x, y = te.var("x"), te.var("y")
analyzer.update(x, tvm.arith.ConstIntBound(-9, 11))
analyzer.update(y, tvm.arith.ConstIntBound(4, 10))
def test_shift_and_bound():
analyzer = tvm.arith.Analyzer()
- x, y = tvm.var("x"), tvm.var("y")
+ x, y = te.var("x"), te.var("y")
analyzer.update(x, tvm.arith.ConstIntBound(-9, 11))
analyzer.update(y, tvm.arith.ConstIntBound(2, 10))
def test_mix_index_bound():
analyzer = tvm.arith.Analyzer()
- x, y = tvm.var("x"), tvm.var("y")
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
+ x, y = te.var("x"), te.var("y")
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
analyzer.update(x, tvm.arith.ConstIntBound(0, 24 - 1))
analyzer.update(y, tvm.arith.ConstIntBound(0, 3 - 1))
def test_size_var_bound():
analyzer = tvm.arith.Analyzer()
- x = tvm.size_var("x")
+ x = te.size_var("x")
bd = analyzer.const_int_bound(x)
assert bd.min_value == 0
assert bd.max_value == bd.POS_INF
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def assert_expr_equal(a, b):
- res = tvm.ir_pass.Simplify(a - b)
+ res = tvm.tir.ir_pass.Simplify(a - b)
equal = isinstance(res, tvm.tir.IntImm) and res.value == 0
if not equal:
raise ValueError("{} and {} are not equal".format(a, b))
def test_deduce():
- a = tvm.var('a')
- b = tvm.var('b')
- c = tvm.var('c')
- d = tvm.var('d')
+ a = te.var('a')
+ b = te.var('b')
+ c = te.var('c')
+ d = te.var('d')
b_s = tvm.arith.IntervalSet(2, 3)
c_s = tvm.arith.IntervalSet(10, 15)
d_s = tvm.arith.IntervalSet(-3, -1)
- zero = tvm.const(0, "int32")
+ zero = tvm.tir.const(0, "int32")
- fdiv = tvm.floordiv
+ fdiv = tvm.te.floordiv
e0 = (-b)*a+c-d
res0 = tvm.arith.deduce_bound(a, e0>=0, {b: b_s, c: c_s, d: d_s}, {})
assert_expr_equal(res1.max_value, ans1)
- e2 = (tvm.max(5, a * 4) < 0)
+ e2 = (tvm.te.max(5, a * 4) < 0)
res2 = tvm.arith.deduce_bound(a, e2, {b: b_s, c: c_s, d: d_s}, {})
assert str(res2.max_value) == "neg_inf"
assert str(res2.min_value) == "pos_inf"
# expression containing variable a is on rhs
- e2 = (zero < tvm.max(5, a * 4))
+ e2 = (zero < tvm.te.max(5, a * 4))
res2 = tvm.arith.deduce_bound(a, e2, {b: b_s, c: c_s, d: d_s}, {})
assert str(res2.max_value) == "neg_inf"
assert str(res2.min_value) == "pos_inf"
e3 = (-b)+a*c-d
res3 = tvm.arith.deduce_bound(a, e3>=0, {b: b_s, c: c_s, d: d_s}, {b: b_s, d: d_s})
ans3 = fdiv(2,c)+1
- assert str(tvm.ir_pass.Simplify(res3.min_value)) == str(ans3)
+ assert str(tvm.tir.ir_pass.Simplify(res3.min_value)) == str(ans3)
res3 = tvm.arith.deduce_bound(a, zero <= e3, {b: b_s, c: c_s, d: d_s}, {b: b_s, d: d_s})
- assert str(tvm.ir_pass.Simplify(res3.min_value)) == str(ans3)
+ assert str(tvm.tir.ir_pass.Simplify(res3.min_value)) == str(ans3)
# tests for `EQ` op
res4 = tvm.arith.deduce_bound(a, a == b, {}, {})
def test_check():
- a = tvm.var('a')
- b = tvm.var('b')
- c = tvm.var('c')
- d = tvm.var('d')
+ a = te.var('a')
+ b = te.var('b')
+ c = te.var('c')
+ d = te.var('d')
b_s = tvm.arith.IntervalSet(2, 3)
c_s = tvm.arith.IntervalSet(5, 7)
def test_deduce_basic():
def test_basic(a1, a2, coff):
- a = tvm.var('a')
- b = tvm.var('b')
+ a = te.var('a')
+ b = te.var('b')
b_s = tvm.arith.IntervalSet(a1, a2)
e0 = b + a*coff + 3
res1 = tvm.arith.deduce_bound(a, e0<17, {b: b_s}, {b: b_s})
[x, y] = [res1.max_value, b_s.max_value] if coff > 0 else [res1.min_value, b_s.min_value]
- assert (tvm.ir_pass.Simplify((x * coff + 3 + y) < 17)).value == 1
+ assert (tvm.tir.ir_pass.Simplify((x * coff + 3 + y) < 17)).value == 1
# expression containing variable a is on rhs
- res1 = tvm.arith.deduce_bound(a, tvm.const(17, "int32") < e0, {b: b_s}, {b: b_s})
+ res1 = tvm.arith.deduce_bound(a, tvm.tir.const(17, "int32") < e0, {b: b_s}, {b: b_s})
[x, y] = [res1.max_value, b_s.max_value] if coff < 0 else [res1.min_value, b_s.min_value]
- assert (tvm.ir_pass.Simplify((x * coff + 3 + y) > 17)).value == 1
+ assert (tvm.tir.ir_pass.Simplify((x * coff + 3 + y) > 17)).value == 1
# expression containing variable a is on rhs
- res1 = tvm.arith.deduce_bound(a, tvm.const(17, "int32")>= e0, {b: b_s}, {b: b_s})
+ res1 = tvm.arith.deduce_bound(a, tvm.tir.const(17, "int32")>= e0, {b: b_s}, {b: b_s})
[x, y] = [res1.max_value, b_s.max_value] if coff > 0 else [res1.min_value, b_s.min_value]
- assert (tvm.ir_pass.Simplify((x * coff + 3 + y) <= 17)).value == 1
+ assert (tvm.tir.ir_pass.Simplify((x * coff + 3 + y) <= 17)).value == 1
res1 = tvm.arith.deduce_bound(a, e0>=17, {b: b_s}, {b: b_s})
[x, y] = [res1.max_value, b_s.max_value] if coff < 0 else [res1.min_value, b_s.min_value]
- assert (tvm.ir_pass.Simplify((x * coff + 3 + y) >= 17)).value == 1
+ assert (tvm.tir.ir_pass.Simplify((x * coff + 3 + y) >= 17)).value == 1
test_basic(0, 4, 4)
test_basic(1, 5, 4)
def test_deduce_complex():
def test_complex(a1, a2, coff):
- a = tvm.var('a')
- b = tvm.var('b')
+ a = te.var('a')
+ b = te.var('b')
b_s = tvm.arith.IntervalSet(a1, a2)
e0 = (b*3 + a* coff) * 4
res1 = tvm.arith.deduce_bound(a, e0<63, {b: b_s}, {b: b_s})
[t, x] = [res1.max_value, b_s.max_value] if coff > 0 else [res1.min_value, b_s.min_value]
- assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) < 63)).value == 1
+ assert (tvm.tir.ir_pass.Simplify(((x*3 + t* coff) * 4) < 63)).value == 1
# expression containing variable a is on rhs
- res1 = tvm.arith.deduce_bound(a, tvm.const(63, "int32")>= e0, {b: b_s}, {b: b_s})
+ res1 = tvm.arith.deduce_bound(a, tvm.tir.const(63, "int32")>= e0, {b: b_s}, {b: b_s})
[t, x] = [res1.max_value, b_s.max_value] if coff > 0 else [res1.min_value, b_s.min_value]
- assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) <= 63)).value == 1
+ assert (tvm.tir.ir_pass.Simplify(((x*3 + t* coff) * 4) <= 63)).value == 1
res1 = tvm.arith.deduce_bound(a, e0>63, {b: b_s}, {b: b_s})
[t, x] = [res1.max_value, b_s.max_value] if coff < 0 else [res1.min_value, b_s.min_value]
- assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) > 63)).value == 1
+ assert (tvm.tir.ir_pass.Simplify(((x*3 + t* coff) * 4) > 63)).value == 1
# expression containing variable a is on rhs
- res1 = tvm.arith.deduce_bound(a, tvm.const(63, "int32") <= e0, {b: b_s}, {b: b_s})
+ res1 = tvm.arith.deduce_bound(a, tvm.tir.const(63, "int32") <= e0, {b: b_s}, {b: b_s})
[t, x] = [res1.max_value, b_s.max_value] if coff < 0 else [res1.min_value, b_s.min_value]
- assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) >= 63)).value == 1
+ assert (tvm.tir.ir_pass.Simplify(((x*3 + t* coff) * 4) >= 63)).value == 1
test_complex(0, 4, 4)
test_complex(0, 4, -4)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_basic():
- a = tvm.var("a")
- b = tvm.var("b")
- c = tvm.var("c")
- m = tvm.arith.detect_clip_bound(tvm.all(a * 1 < b * 6,
+ a = te.var("a")
+ b = te.var("b")
+ c = te.var("c")
+ m = tvm.arith.detect_clip_bound(tvm.tir.all(a * 1 < b * 6,
a - 1 > 0), [a])
- assert tvm.ir_pass.Simplify(m[1] - (b * 6 - 1)).value == 0
+ assert tvm.tir.ir_pass.Simplify(m[1] - (b * 6 - 1)).value == 0
assert m[0].value == 2
- m = tvm.arith.detect_clip_bound(tvm.all(a * 1 < b * 6,
+ m = tvm.arith.detect_clip_bound(tvm.tir.all(a * 1 < b * 6,
a - 1 > 0), [a, b])
assert len(m) == 0
- m = tvm.arith.detect_clip_bound(tvm.all(a + 10 * c <= 20,
+ m = tvm.arith.detect_clip_bound(tvm.tir.all(a + 10 * c <= 20,
b - 1 > 0), [a, b])
- assert tvm.ir_pass.Simplify(m[1] - (20 - 10 * c)).value == 0
- assert tvm.ir_pass.Simplify(m[2] - 2).value == 0
+ assert tvm.tir.ir_pass.Simplify(m[1] - (20 - 10 * c)).value == 0
+ assert tvm.tir.ir_pass.Simplify(m[2] - 2).value == 0
if __name__ == "__main__":
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_basic():
- a = tvm.var("a")
- b = tvm.var("b")
+ a = te.var("a")
+ b = te.var("b")
m = tvm.arith.detect_linear_equation(a * 4 + b * 6 + 7, [a])
assert m[0].value == 4
- assert tvm.ir_pass.Simplify(m[1] - (b * 6 + 7)).value == 0
+ assert tvm.tir.ir_pass.Simplify(m[1] - (b * 6 + 7)).value == 0
m = tvm.arith.detect_linear_equation(a * 4 * (a+1) + b * 6 + 7, [a])
assert len(m) == 0
m = tvm.arith.detect_linear_equation(a * 4 + (a+1) + b * 6 + 7, [a])
assert m[0].value == 5
- assert tvm.ir_pass.Simplify(m[1] - (b * 6 + 7 + 1)).value == 0
+ assert tvm.tir.ir_pass.Simplify(m[1] - (b * 6 + 7 + 1)).value == 0
m = tvm.arith.detect_linear_equation(a * b + 7, [a])
assert m[0] == b
m = tvm.arith.detect_linear_equation(b * 7, [])
assert len(m) == 1
- assert tvm.ir_pass.Simplify(m[0] - b * 7).value == 0
+ assert tvm.tir.ir_pass.Simplify(m[0] - b * 7).value == 0
def test_multivariate():
- v = [tvm.var("v%d" % i) for i in range(4)]
- b = tvm.var("b")
+ v = [te.var("v%d" % i) for i in range(4)]
+ b = te.var("b")
m = tvm.arith.detect_linear_equation(v[0] * (b + 4) + v[0] + v[1] * 8, v)
- assert(tvm.ir_pass.Equal(tvm.ir_pass.Simplify(m[0]), b + 5))
+ assert(tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.Simplify(m[0]), b + 5))
assert(m[1].value == 8)
m = tvm.arith.detect_linear_equation(v[0] * (b + 4) + v[0] + v[1] * 8 * v[2], v)
m = tvm.arith.detect_linear_equation((v[0] - v[1]), [v[2]])
assert(m[0].value == 0)
- assert(tvm.ir_pass.Simplify(m[1] - (v[0] - v[1])).value == 0)
+ assert(tvm.tir.ir_pass.Simplify(m[1] - (v[0] - v[1])).value == 0)
m = tvm.arith.detect_linear_equation((v[0] - v[1]), [])
assert(len(m) == 1)
- assert(tvm.ir_pass.Simplify(m[0] - (v[0] - v[1])).value == 0)
+ assert(tvm.tir.ir_pass.Simplify(m[0] - (v[0] - v[1])).value == 0)
if __name__ == "__main__":
test_basic()
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_domain_touched():
- i = tvm.var('i')
- j = tvm.var('j')
- n = tvm.convert(100)
- m = tvm.var('m')
- a = tvm.placeholder((n, m), name = 'a')
- b = tvm.placeholder((n, m), name = 'b')
+ i = te.var('i')
+ j = te.var('j')
+ n = tvm.runtime.convert(100)
+ m = te.var('m')
+ a = te.placeholder((n, m), name = 'a')
+ b = te.placeholder((n, m), name = 'b')
ir = tvm.tir.For(
i, 0, n, 0, 0,
tvm.tir.For(j, 0, m, 0, 0,
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
class IntSetChecker:
return "\ndata={}\ndmap={}\nres={}\nexpected={}".format(data, dmap, res, expected)
def equal(x, y):
res = self.analyzer.canonical_simplify(x - y)
- return tvm.ir_pass.Equal(res, 0)
+ return tvm.tir.ir_pass.Equal(res, 0)
assert equal(res.min_value, expected[0]), err_msg()
assert equal(res.max_value, expected[1]), err_msg()
def test_add_sub():
ck = IntSetChecker()
- x, y = tvm.var("x"), tvm.var("y")
+ x, y = te.var("x"), te.var("y")
ck.verify(x + y, {x : tvm.arith.IntervalSet(0, 10)}, (y, 10 + y))
ck.verify(x + y,
{x : tvm.arith.IntervalSet(0, 10), y : tvm.arith.IntervalSet(1, 11)},
def test_mul_div():
ck = IntSetChecker()
- x, y = tvm.var("x"), tvm.var("y")
+ x, y = te.var("x"), te.var("y")
- tdiv = tvm.truncdiv
+ tdiv = tvm.tir.truncdiv
ck.analyzer.update(y, tvm.arith.ConstIntBound(1, 100), override=True)
ck.verify(x * y, {x : tvm.arith.IntervalSet(0, 10)}, (0, 10 * y))
ck.verify(x * 2, {x : tvm.arith.IntervalSet(1, 10)}, (2, 20))
ck.verify(tdiv(x, y), {x : tvm.arith.IntervalSet(0, 10)}, (0, tdiv(10, y)))
ck.verify(tdiv(x, 2), {x : tvm.arith.IntervalSet(1, 10)}, (0, 5))
- fld = tvm.floordiv
+ fld = tvm.te.floordiv
ck.verify(fld(x, y), {x : tvm.arith.IntervalSet(0, 10)}, (0, fld(10, y)))
ck.verify(fld(x, 2), {x : tvm.arith.IntervalSet(-1, 10)}, (-1, 5))
def test_mod():
ck = IntSetChecker()
- x, y = tvm.var("x"), tvm.var("y")
- tmod = tvm.truncmod
+ x, y = te.var("x"), te.var("y")
+ tmod = tvm.tir.truncmod
ck.analyzer.update(y, tvm.arith.ConstIntBound(1, 100), override=True)
ck.verify(tmod(x, y), {x : tvm.arith.IntervalSet(0, 10)}, (0, y - 1))
ck.verify(tmod(x, 10), {x : tvm.arith.IntervalSet(1, 10)}, (0, 9))
- flm = tvm.floormod
+ flm = tvm.te.floormod
ck.verify(flm(x, 10), {x : tvm.arith.IntervalSet(-10, 10)}, (0, 9))
def test_max_min():
ck = IntSetChecker()
- x, y = tvm.var("x"), tvm.var("y")
- ck.verify(tvm.max(x, x + 1), {x : tvm.arith.IntervalSet(0, 10)}, (1, 11))
- ck.verify(tvm.min(x - 1, x + 1), {x : tvm.arith.IntervalSet(0, 10)}, (-1, 9))
- ck.verify(tvm.min(x, y), {}, (tvm.min(x, y), tvm.min(x, y)))
- ck.verify(tvm.max(x, y), {}, (tvm.max(x, y), tvm.max(x, y)))
+ x, y = te.var("x"), te.var("y")
+ ck.verify(tvm.te.max(x, x + 1), {x : tvm.arith.IntervalSet(0, 10)}, (1, 11))
+ ck.verify(tvm.te.min(x - 1, x + 1), {x : tvm.arith.IntervalSet(0, 10)}, (-1, 9))
+ ck.verify(tvm.te.min(x, y), {}, (tvm.te.min(x, y), tvm.te.min(x, y)))
+ ck.verify(tvm.te.max(x, y), {}, (tvm.te.max(x, y), tvm.te.max(x, y)))
def test_select():
ck = IntSetChecker()
- x, y = tvm.var("x"), tvm.var("y")
+ x, y = te.var("x"), te.var("y")
ck.verify(tvm.tir.Select(x > 0, x - 1, x + 1),
{x : tvm.arith.IntervalSet(0, 10)}, (-1, 11))
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_cast():
analyzer = tvm.arith.Analyzer()
- x = tvm.var("x", dtype="int8")
+ x = te.var("x", dtype="int8")
m = analyzer.modular_set((x * 3).astype("uint32"))
assert m.coeff == 3
assert m.base == 0
def test_add_sub():
analyzer = tvm.arith.Analyzer()
- x, y = tvm.var("x", "int64"), tvm.var("y", "int64")
+ x, y = te.var("x", "int64"), te.var("y", "int64")
m = analyzer.modular_set(x * 6 + y * 4)
assert m.coeff == 2
assert m.base == 0
def test_mul():
analyzer = tvm.arith.Analyzer()
- x, y = tvm.var("x"), tvm.var("y")
+ x, y = te.var("x"), te.var("y")
m = analyzer.modular_set((x * 4 + 2) * (y * 6 + 1))
assert m.coeff == 4
assert m.base == 2
def test_div_shift():
analyzer = tvm.arith.Analyzer()
- x, y = tvm.var("x"), tvm.var("y")
+ x, y = te.var("x"), te.var("y")
# not sure if x is non-negative
- tdiv = tvm.truncdiv
+ tdiv = tvm.tir.truncdiv
m = analyzer.modular_set(tdiv(x * 4 + 2, 2))
assert m.coeff == 1
assert m.base == 0
m = analyzer.modular_set((x * 4 + 2) >> 1)
assert m.coeff == 2
assert m.base == 1
- fld = tvm.floordiv
+ fld = tvm.te.floordiv
m = analyzer.modular_set(fld(x * 4 + 2, 2))
assert m.coeff == 2
assert m.base == 1
def test_min_max_select():
analyzer = tvm.arith.Analyzer()
- x, y = tvm.var("x"), tvm.var("y")
- m = analyzer.modular_set(tvm.min(x * 3, y * 9))
+ x, y = te.var("x"), te.var("y")
+ m = analyzer.modular_set(tvm.te.min(x * 3, y * 9))
assert m.coeff == 3
assert m.base == 0
- m = analyzer.modular_set(tvm.max(x * 3 + 1, y * 9 + 4))
+ m = analyzer.modular_set(tvm.te.max(x * 3 + 1, y * 9 + 4))
assert m.coeff == 3
assert m.base == 1
def test_mix_index():
- a = tvm.var("a")
- b = tvm.var("b")
+ a = te.var("a")
+ b = te.var("b")
analyzer = tvm.arith.Analyzer()
- tdiv = tvm.truncdiv
+ tdiv = tvm.tir.truncdiv
m = analyzer.modular_set(a * 4 + b * 6 + 7)
assert m.coeff == 2
assert m.base == 1
assert m.coeff == 3
assert m.base == 2
- m = analyzer.modular_set(a * 12 + tvm.min(b * 3 * 7, 2))
+ m = analyzer.modular_set(a * 12 + tvm.te.min(b * 3 * 7, 2))
assert m.coeff == 1
assert m.base == 0
def test_constraint_scope():
- a = tvm.var("a")
- b = tvm.var("b")
+ a = te.var("a")
+ b = te.var("b")
analyzer = tvm.arith.Analyzer()
- tmod = tvm.truncmod
+ tmod = tvm.tir.truncmod
with analyzer.constraint_scope(tmod(b, 4) == 2):
m = analyzer.modular_set(b + 1)
assert m.base == 0
def test_intersect():
- a = tvm.var("a")
+ a = te.var("a")
analyzer = tvm.arith.Analyzer()
- tmod = tvm.truncmod
+ tmod = tvm.tir.truncmod
with analyzer.constraint_scope(tmod(a, 4) == 1):
with analyzer.constraint_scope(tmod(a, 3) == 1):
m = analyzer.modular_set(a)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
class RewriteChecker:
def __init__(self):
def verify(self, data, expected):
res = self.analyzer.rewrite_simplify(data)
- assert tvm.ir_pass.Equal(res, expected), "data={}, res={}, expected={}".format(data, res, expected)
+ assert tvm.tir.ir_pass.Equal(res, expected), "data={}, res={}, expected={}".format(data, res, expected)
def test_vector_simplify():
ck = RewriteChecker()
- x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z")
+ x, y, z = te.var("x"), te.var("y"), te.var("z")
# Add rules
ck.verify(tvm.tir.Ramp(x, 1, 4) + tvm.tir.Ramp(y, 2, 4),
tvm.tir.Ramp(x + y, 3, 4))
tvm.tir.Ramp(x * 2, 8, 4))
## DivMod rules
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
# truc div
ck.verify(tdiv(y.astype("int32x2"), x.astype("int32x2")),
tdiv(y, x).astype("int32x2"))
tmod(tvm.tir.Ramp(1, 15, 4), 8))
# floor div
- fld = tvm.floordiv
- flm = tvm.floormod
+ fld = tvm.te.floordiv
+ flm = tvm.te.floormod
ck.analyzer.update(x, tvm.arith.ConstIntBound(-10, 1000), override=True)
ck.verify(fld(y.astype("int32x2"), x.astype("int32x2")),
fld(y, x).astype("int32x2"))
flm(tvm.tir.Ramp(1, 15, 4), 8))
# Min/Max rules
- vx = tvm.var("vx", dtype="int32x2")
- vc = tvm.var("vc", dtype="uint1")
- ck.verify(tvm.min(y.astype("int32x2"), x.astype("int32x2")),
- tvm.min(y, x).astype("int32x2"))
- ck.verify(tvm.min(tvm.min(vx, y.astype("int32x2")), x.astype("int32x2")),
- tvm.min(vx, tvm.min(y, x).astype("int32x2")))
- ck.verify(tvm.max(y.astype("int32x2"), x.astype("int32x2")),
- tvm.max(y, x).astype("int32x2"))
- ck.verify(tvm.max(tvm.max(vx, y.astype("int32x2")), x.astype("int32x2")),
- tvm.max(vx, tvm.max(y, x).astype("int32x2")))
+ vx = te.var("vx", dtype="int32x2")
+ vc = te.var("vc", dtype="uint1")
+ ck.verify(tvm.te.min(y.astype("int32x2"), x.astype("int32x2")),
+ tvm.te.min(y, x).astype("int32x2"))
+ ck.verify(tvm.te.min(tvm.te.min(vx, y.astype("int32x2")), x.astype("int32x2")),
+ tvm.te.min(vx, tvm.te.min(y, x).astype("int32x2")))
+ ck.verify(tvm.te.max(y.astype("int32x2"), x.astype("int32x2")),
+ tvm.te.max(y, x).astype("int32x2"))
+ ck.verify(tvm.te.max(tvm.te.max(vx, y.astype("int32x2")), x.astype("int32x2")),
+ tvm.te.max(vx, tvm.te.max(y, x).astype("int32x2")))
## Logical rules
ck.verify(y.astype("int32x2").equal(x.astype("int32x2")),
def test_select_simplify():
ck = RewriteChecker()
- x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z")
+ x, y, z = te.var("x"), te.var("y"), te.var("z")
# Add rules
ck.verify(tvm.tir.Select(x < 0, y, 0) + tvm.tir.Select(x < 0, 1, z),
tvm.tir.Select(x < 0, y + 1, z))
tvm.tir.Select(x < 0, 0, z - y))
ck.verify(tvm.tir.Select(x < 0, y, z) - z,
tvm.tir.Select(x < 0, y - z, 0))
- ck.verify(tvm.min(tvm.tir.Select(x < 0, y, 0), tvm.tir.Select(x < 0, 1, z)),
- tvm.tir.Select(x < 0, tvm.min(y, 1), tvm.min(0, z)))
- ck.verify(tvm.max(tvm.tir.Select(x < 0, y, 0), tvm.tir.Select(x < 0, 1, z)),
- tvm.tir.Select(x < 0, tvm.max(y, 1), tvm.max(0, z)))
+ ck.verify(tvm.te.min(tvm.tir.Select(x < 0, y, 0), tvm.tir.Select(x < 0, 1, z)),
+ tvm.tir.Select(x < 0, tvm.te.min(y, 1), tvm.te.min(0, z)))
+ ck.verify(tvm.te.max(tvm.tir.Select(x < 0, y, 0), tvm.tir.Select(x < 0, 1, z)),
+ tvm.tir.Select(x < 0, tvm.te.max(y, 1), tvm.te.max(0, z)))
ck.verify(tvm.tir.Select(x * 3 + 1 != 0, y, z), y)
ck.verify(tvm.tir.Select(x * 3 + 1 == 0, y, z), z)
def test_add_index_simplify():
ck = RewriteChecker()
- x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z")
+ x, y, z = te.var("x"), te.var("y"), te.var("z")
ck.verify(x + (y - x), y)
ck.verify(x - (y + 1) + (y + 1), x)
ck.verify((x - 10) + (10 - z), x - z)
ck.verify((x - y) + (z - x), z - y)
- ck.verify(tvm.min(x, y - z) + z, tvm.min(x + z, y))
- ck.verify(tvm.min(x - z, y) + z, tvm.min(x, y + z))
- ck.verify(tvm.max(x, y - 10) + 10, tvm.max(x + 10, y))
- ck.verify(tvm.max(x - 11, y) + 11, tvm.max(x, y + 11))
+ ck.verify(tvm.te.min(x, y - z) + z, tvm.te.min(x + z, y))
+ ck.verify(tvm.te.min(x - z, y) + z, tvm.te.min(x, y + z))
+ ck.verify(tvm.te.max(x, y - 10) + 10, tvm.te.max(x + 10, y))
+ ck.verify(tvm.te.max(x - 11, y) + 11, tvm.te.max(x, y + 11))
- ck.verify(tvm.max(x, y * 2) + tvm.min(x, y * 2), x + y * 2);
- ck.verify(tvm.min(x, y * 2) + tvm.max(x, y * 2), x + y * 2);
+ ck.verify(tvm.te.max(x, y * 2) + tvm.te.min(x, y * 2), x + y * 2);
+ ck.verify(tvm.te.min(x, y * 2) + tvm.te.max(x, y * 2), x + y * 2);
- ck.verify(tvm.max(x, y + 2) + (-2), tvm.max(x + (-2), y));
- ck.verify(tvm.min(x, y + 2) + (-2), tvm.min(x + (-2), y));
- ck.verify(tvm.min(x + 2, y + 3) + (-2), tvm.min(x, y + 1));
+ ck.verify(tvm.te.max(x, y + 2) + (-2), tvm.te.max(x + (-2), y));
+ ck.verify(tvm.te.min(x, y + 2) + (-2), tvm.te.min(x + (-2), y));
+ ck.verify(tvm.te.min(x + 2, y + 3) + (-2), tvm.te.min(x, y + 1));
- ck.verify(tvm.max(0, 1 - x * 4) + x * 4, tvm.max(x * 4, 1))
- ck.verify(tvm.max(2 - x * 4, 0) + x * 4, tvm.max(x * 4, 2))
+ ck.verify(tvm.te.max(0, 1 - x * 4) + x * 4, tvm.te.max(x * 4, 1))
+ ck.verify(tvm.te.max(2 - x * 4, 0) + x * 4, tvm.te.max(x * 4, 2))
- ck.verify(tvm.min(0, 1 - x * 4) + x * 4, tvm.min(x * 4, 1))
- ck.verify(tvm.min(2 - x * 4, 0) + x * 4, tvm.min(x * 4, 2))
+ ck.verify(tvm.te.min(0, 1 - x * 4) + x * 4, tvm.te.min(x * 4, 1))
+ ck.verify(tvm.te.min(2 - x * 4, 0) + x * 4, tvm.te.min(x * 4, 2))
ck.verify(x * y + x * 10, x * (y + 10))
ck.verify(y * x + x * 10, x * (y + 10))
ck.verify(x + 2 + 3 + 4 + x * 3, x * 4 + 9);
# DivMod rules
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
# truc div
ck.verify(y * tmod(x, 8) + 10 * tmod(x, 8), tmod(x, 8) * (y + 10))
ck.analyzer.update(x, tvm.arith.ConstIntBound(-1, 1000), override=True)
ck.verify(tdiv(x, 8) * 8 + tmod(x, 8), x)
# floor div
- fld = tvm.floordiv
- flm = tvm.floormod
+ fld = tvm.te.floordiv
+ flm = tvm.te.floormod
ck.verify(y * flm(x, 8) + 10 * flm(x, 8), flm(x, 8) * (y + 10))
ck.verify(fld(x, 8) * 8 + flm(x, 8), x)
def test_sub_index_simplify():
ck = RewriteChecker()
- x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z")
+ x, y, z = te.var("x"), te.var("y"), te.var("z")
ck.verify(x + y - y, x)
ck.verify(x + y - x, y)
ck.verify(x - (y + x), 0 - y)
ck.verify(x - (x + y), 0 - y)
- ck.verify(tvm.min(x, y) - x, tvm.min(0, y - x))
- ck.verify(tvm.min(x, y) - y, tvm.min(x - y, 0))
- ck.verify(tvm.max(x, y) - x, tvm.max(0, y - x))
- ck.verify(tvm.max(x, y) - y, tvm.max(x - y, 0))
+ ck.verify(tvm.te.min(x, y) - x, tvm.te.min(0, y - x))
+ ck.verify(tvm.te.min(x, y) - y, tvm.te.min(x - y, 0))
+ ck.verify(tvm.te.max(x, y) - x, tvm.te.max(0, y - x))
+ ck.verify(tvm.te.max(x, y) - y, tvm.te.max(x - y, 0))
- ck.verify(x - tvm.min(x, y), tvm.max(0, x - y))
- ck.verify(y - tvm.min(x, y), tvm.max(y - x, 0))
- ck.verify(x - tvm.max(x, y), tvm.min(0, x - y))
- ck.verify(y - tvm.max(x, y), tvm.min(y - x, 0))
+ ck.verify(x - tvm.te.min(x, y), tvm.te.max(0, x - y))
+ ck.verify(y - tvm.te.min(x, y), tvm.te.max(y - x, 0))
+ ck.verify(x - tvm.te.max(x, y), tvm.te.min(0, x - y))
+ ck.verify(y - tvm.te.max(x, y), tvm.te.min(y - x, 0))
# mul co-efficient foldng
ck.verify(x - x, 0)
ck.verify((x + y) - (z + x), y - z)
ck.verify((y + x) - (z + x), y - z)
- ck.verify(tvm.min(x + y, z) - x, tvm.min(y, z - x))
- ck.verify(tvm.min(y + x, z) - x, tvm.min(y, z - x))
- ck.verify(tvm.min(z, x + y) - x, tvm.min(z - x, y))
- ck.verify(tvm.min(z, y + x) - x, tvm.min(z - x, y))
+ ck.verify(tvm.te.min(x + y, z) - x, tvm.te.min(y, z - x))
+ ck.verify(tvm.te.min(y + x, z) - x, tvm.te.min(y, z - x))
+ ck.verify(tvm.te.min(z, x + y) - x, tvm.te.min(z - x, y))
+ ck.verify(tvm.te.min(z, y + x) - x, tvm.te.min(z - x, y))
- ck.verify(tvm.max(x + y, z) - x, tvm.max(y, z - x))
- ck.verify(tvm.max(y + x, z) - x, tvm.max(y, z - x))
- ck.verify(tvm.max(z, x + y) - x, tvm.max(z - x, y))
- ck.verify(tvm.max(z, y + x) - x, tvm.max(z - x, y))
+ ck.verify(tvm.te.max(x + y, z) - x, tvm.te.max(y, z - x))
+ ck.verify(tvm.te.max(y + x, z) - x, tvm.te.max(y, z - x))
+ ck.verify(tvm.te.max(z, x + y) - x, tvm.te.max(z - x, y))
+ ck.verify(tvm.te.max(z, y + x) - x, tvm.te.max(z - x, y))
- ck.verify(x - tvm.min(x + y, z), tvm.max(0 - y, x - z))
- ck.verify(x - tvm.min(y + x, z), tvm.max(0 - y, x - z))
- ck.verify(x - tvm.min(z, x + y), tvm.max(x - z, 0 - y))
- ck.verify(x - tvm.min(z, y + x), tvm.max(x - z, 0 - y))
+ ck.verify(x - tvm.te.min(x + y, z), tvm.te.max(0 - y, x - z))
+ ck.verify(x - tvm.te.min(y + x, z), tvm.te.max(0 - y, x - z))
+ ck.verify(x - tvm.te.min(z, x + y), tvm.te.max(x - z, 0 - y))
+ ck.verify(x - tvm.te.min(z, y + x), tvm.te.max(x - z, 0 - y))
- ck.verify(tvm.min(x, y) - tvm.min(y, x), 0)
- ck.verify(tvm.max(x, y) - tvm.max(y, x), 0)
- ck.verify(tvm.min(x, y) - tvm.min(x + 10, y + 10), -10)
- ck.verify(tvm.min(x + 10, y + 1) - tvm.min(x, y - 9), 10)
+ ck.verify(tvm.te.min(x, y) - tvm.te.min(y, x), 0)
+ ck.verify(tvm.te.max(x, y) - tvm.te.max(y, x), 0)
+ ck.verify(tvm.te.min(x, y) - tvm.te.min(x + 10, y + 10), -10)
+ ck.verify(tvm.te.min(x + 10, y + 1) - tvm.te.min(x, y - 9), 10)
# DivMod patterns
# truc div
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 1000), override=True)
ck.verify(x - tdiv(x, 3) * 3, tmod(x, 3))
ck.verify(tdiv(y - z, 3) * 6 - 2 * y, (0 - tmod(y - z, 3) - z) * 2)
# floor div
- fld = tvm.floordiv
- flm = tvm.floormod
+ fld = tvm.te.floordiv
+ flm = tvm.te.floormod
ck.analyzer.update(x, tvm.arith.ConstIntBound(-1000, 1000), override=True)
ck.analyzer.update(y, tvm.arith.ConstIntBound(-1000, 1000), override=True)
ck.verify(x - fld(x, 3) * 3, flm(x, 3))
def test_mul_index_simplify():
ck = RewriteChecker()
- x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z")
+ x, y, z = te.var("x"), te.var("y"), te.var("z")
ck.verify((x + 2) * 3, x * 3 + 6)
ck.verify((x * 2) * 3, x * 6)
- ck.verify(tvm.min(x, y) * tvm.max(x, y), x * y)
- ck.verify(tvm.max(x, y) * tvm.min(x, y), x * y)
+ ck.verify(tvm.te.min(x, y) * tvm.te.max(x, y), x * y)
+ ck.verify(tvm.te.max(x, y) * tvm.te.min(x, y), x * y)
ck.verify((x - y) * (-2), (y - x) * 2)
def test_div_index_simplify():
ck = RewriteChecker()
- x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z")
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
+ x, y, z = te.var("x"), te.var("y"), te.var("z")
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
ck.verify(tdiv(x, x), 1)
ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 1000), override=True)
ck.verify(tdiv(x * 4, 2), x * 2)
ck.verify(tdiv(x * 4 + y, 2), x * 2 + tdiv(y, 2))
- ck.verify(tdiv(tvm.min(x * 6, y), 2), tvm.min(x * 3, tdiv(y, 2)))
- ck.verify(tdiv(tvm.max(x * 6, y), 2), tvm.max(x * 3, tdiv(y, 2)))
+ ck.verify(tdiv(tvm.te.min(x * 6, y), 2), tvm.te.min(x * 3, tdiv(y, 2)))
+ ck.verify(tdiv(tvm.te.max(x * 6, y), 2), tvm.te.max(x * 3, tdiv(y, 2)))
ck.verify(tdiv(y + x * 4, 2), tdiv(y, 2) + x * 2)
- ck.verify(tdiv(tvm.min(y, x * 6), 2), tvm.min(tdiv(y, 2), x * 3))
- ck.verify(tdiv(tvm.max(y, x * 6), 2), tvm.max(tdiv(y, 2), x * 3))
+ ck.verify(tdiv(tvm.te.min(y, x * 6), 2), tvm.te.min(tdiv(y, 2), x * 3))
+ ck.verify(tdiv(tvm.te.max(y, x * 6), 2), tvm.te.max(tdiv(y, 2), x * 3))
# 3-operands
ck.verify(tdiv(x * 6 + y + z, 2), x * 3 + tdiv(y + z, 2))
def test_floordiv_index_simplify():
# short name for floordiv
- fld = tvm.floordiv
+ fld = tvm.te.floordiv
ck = RewriteChecker()
- x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z")
+ x, y, z = te.var("x"), te.var("y"), te.var("z")
ck.verify(fld(fld(x, 2), 3), fld(x, 6))
ck.verify(fld(fld(x, 2) + 1, 3), fld(x + 2, 6))
ck.verify(fld(x * 4, 2), x * 2)
ck.verify(fld(x * 4 + y, 2), x * 2 + fld(y, 2))
- ck.verify(fld(tvm.min(x * 6, y), 2), tvm.min(x * 3, fld(y, 2)))
- ck.verify(fld(tvm.max(x * 6, y), 2), tvm.max(x * 3, fld(y, 2)))
+ ck.verify(fld(tvm.te.min(x * 6, y), 2), tvm.te.min(x * 3, fld(y, 2)))
+ ck.verify(fld(tvm.te.max(x * 6, y), 2), tvm.te.max(x * 3, fld(y, 2)))
ck.verify(fld(y + x * 4, 2), fld(y, 2) + x * 2)
- ck.verify(fld(tvm.min(y, x * 6), 2), tvm.min(fld(y, 2), x * 3))
- ck.verify(fld(tvm.max(y, x * 6), 2), tvm.max(fld(y, 2), x * 3))
+ ck.verify(fld(tvm.te.min(y, x * 6), 2), tvm.te.min(fld(y, 2), x * 3))
+ ck.verify(fld(tvm.te.max(y, x * 6), 2), tvm.te.max(fld(y, 2), x * 3))
# 3-operands
ck.verify(fld(x * 6 + y + z, 2), x * 3 + fld(y + z, 2))
def test_mod_index_simplify():
ck = RewriteChecker()
- x, y, nx, ny, z = tvm.var("x"), tvm.var("y"), tvm.var("nx"), tvm.var("ny"), tvm.var("z")
+ x, y, nx, ny, z = te.var("x"), te.var("y"), te.var("nx"), te.var("ny"), te.var("z")
ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 1000), override=True)
ck.analyzer.update(y, tvm.arith.ConstIntBound(0, 1000), override=True)
ck.analyzer.update(nx, tvm.arith.ConstIntBound(-1000, 0), override=True)
ck.analyzer.update(ny, tvm.arith.ConstIntBound(-1000, 0), override=True)
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
ck.verify(tmod(x * 10, 2), 0)
ck.verify(tmod(x * 10 + y, 2), tmod(y, 2))
def test_floormod_index_simplify():
# short name for floordiv
- flm = tvm.floormod
+ flm = tvm.te.floormod
ck = RewriteChecker()
- x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z")
+ x, y, z = te.var("x"), te.var("y"), te.var("z")
ck = RewriteChecker()
- x, y, nx, ny, z = tvm.var("x"), tvm.var("y"), tvm.var("nx"), tvm.var("ny"), tvm.var("z")
+ x, y, nx, ny, z = te.var("x"), te.var("y"), te.var("nx"), te.var("ny"), te.var("z")
ck.verify(flm(x * 10, 2), 0)
ck.verify(flm(x * 10 + y, 2), flm(y, 2))
def test_min_index_simplify():
ck = RewriteChecker()
- x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z")
- fld = tvm.floordiv
- flm = tvm.floormod
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
+ x, y, z = te.var("x"), te.var("y"), te.var("z")
+ fld = tvm.te.floordiv
+ flm = tvm.te.floormod
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
# const int bound
- ck.verify(tvm.min(tmod(x, 2), tmod(y, 2) + 10), tmod(x, 2))
- ck.verify(tvm.min(flm(x, 2), flm(y, 2) + 10), flm(x, 2))
-
- ck.verify(tvm.min(x + 1, x + 10), x + 1)
- ck.verify(tvm.min(x + 111, x + 10), x + 10)
- ck.verify(tvm.min(x + 1, x), x)
- ck.verify(tvm.min(x, x + 2), x)
- ck.verify(tvm.min(1 - x, 2 - x), 1 - x)
- ck.verify(tvm.min(3 - x, 2 - x), 2 - x)
-
- ck.verify(tvm.min(tvm.max(x, y), tvm.min(x, y)), tvm.min(x, y))
- ck.verify(tvm.min(tvm.max(x, y), tvm.min(y, x)), tvm.min(x, y))
-
- ck.verify(tvm.min(tvm.max(x, y), x), x)
- ck.verify(tvm.min(tvm.max(y, x), x), x)
- ck.verify(tvm.min(tvm.min(x, y), x), tvm.min(x, y))
- ck.verify(tvm.min(tvm.min(x, y), y), tvm.min(x, y))
-
- ck.verify(tvm.min(x, tvm.max(x, y)), x)
- ck.verify(tvm.min(x, tvm.max(y, x)), x)
- ck.verify(tvm.min(x, tvm.min(x, y)), tvm.min(x, y))
- ck.verify(tvm.min(y, tvm.min(x, y)), tvm.min(x, y))
-
- ck.verify(tvm.min(tvm.min(tvm.min(x, y), z), y),
- tvm.min(tvm.min(x, y), z))
- ck.verify(tvm.min(tvm.min(tvm.min(tvm.min(x, y), z), x * 2), y),
- tvm.min(tvm.min(tvm.min(x, y), z), x * 2))
- ck.verify(tvm.min(tvm.min(tvm.min(tvm.min(tvm.min(x, y), z), x * 2), z * 2), y),
- tvm.min(tvm.min(tvm.min(tvm.min(x, y), z), x * 2), z * 2))
-
- ck.verify(tvm.min(tvm.max(x, y), tvm.max(x, z)), tvm.max(tvm.min(y, z), x))
- ck.verify(tvm.min(tvm.max(x, y), tvm.max(z, x)), tvm.max(tvm.min(y, z), x))
- ck.verify(tvm.min(tvm.max(y, x), tvm.max(x, z)), tvm.max(tvm.min(y, z), x))
- ck.verify(tvm.min(tvm.max(y, x), tvm.max(z, x)), tvm.max(tvm.min(y, z), x))
-
- ck.verify(tvm.min(y + x, z + x), tvm.min(y, z) + x)
- ck.verify(tvm.min(y + x, x + z), tvm.min(y, z) + x)
- ck.verify(tvm.min(x + y, z + x), tvm.min(y, z) + x)
- ck.verify(tvm.min(x + y, x + z), tvm.min(y, z) + x)
-
- ck.verify(tvm.min(x - y, x - z), x - tvm.max(y, z))
- ck.verify(tvm.min(y - x, z - x), tvm.min(y, z) - x)
-
- ck.verify(tvm.min(tvm.min(x, 1), 10), tvm.min(x, 1))
- ck.verify(tvm.min(tvm.min(x, 11), 10), tvm.min(x, 10))
-
- ck.verify(tvm.min(x * 3, 9), tvm.min(x, 3) * 3)
- ck.verify(tvm.min(3 - x, 2), 3 - tvm.max(x, 1))
+ ck.verify(tvm.te.min(tmod(x, 2), tmod(y, 2) + 10), tmod(x, 2))
+ ck.verify(tvm.te.min(flm(x, 2), flm(y, 2) + 10), flm(x, 2))
+
+ ck.verify(tvm.te.min(x + 1, x + 10), x + 1)
+ ck.verify(tvm.te.min(x + 111, x + 10), x + 10)
+ ck.verify(tvm.te.min(x + 1, x), x)
+ ck.verify(tvm.te.min(x, x + 2), x)
+ ck.verify(tvm.te.min(1 - x, 2 - x), 1 - x)
+ ck.verify(tvm.te.min(3 - x, 2 - x), 2 - x)
+
+ ck.verify(tvm.te.min(tvm.te.max(x, y), tvm.te.min(x, y)), tvm.te.min(x, y))
+ ck.verify(tvm.te.min(tvm.te.max(x, y), tvm.te.min(y, x)), tvm.te.min(x, y))
+
+ ck.verify(tvm.te.min(tvm.te.max(x, y), x), x)
+ ck.verify(tvm.te.min(tvm.te.max(y, x), x), x)
+ ck.verify(tvm.te.min(tvm.te.min(x, y), x), tvm.te.min(x, y))
+ ck.verify(tvm.te.min(tvm.te.min(x, y), y), tvm.te.min(x, y))
+
+ ck.verify(tvm.te.min(x, tvm.te.max(x, y)), x)
+ ck.verify(tvm.te.min(x, tvm.te.max(y, x)), x)
+ ck.verify(tvm.te.min(x, tvm.te.min(x, y)), tvm.te.min(x, y))
+ ck.verify(tvm.te.min(y, tvm.te.min(x, y)), tvm.te.min(x, y))
+
+ ck.verify(tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), y),
+ tvm.te.min(tvm.te.min(x, y), z))
+ ck.verify(tvm.te.min(tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), x * 2), y),
+ tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), x * 2))
+ ck.verify(tvm.te.min(tvm.te.min(tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), x * 2), z * 2), y),
+ tvm.te.min(tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), x * 2), z * 2))
+
+ ck.verify(tvm.te.min(tvm.te.max(x, y), tvm.te.max(x, z)), tvm.te.max(tvm.te.min(y, z), x))
+ ck.verify(tvm.te.min(tvm.te.max(x, y), tvm.te.max(z, x)), tvm.te.max(tvm.te.min(y, z), x))
+ ck.verify(tvm.te.min(tvm.te.max(y, x), tvm.te.max(x, z)), tvm.te.max(tvm.te.min(y, z), x))
+ ck.verify(tvm.te.min(tvm.te.max(y, x), tvm.te.max(z, x)), tvm.te.max(tvm.te.min(y, z), x))
+
+ ck.verify(tvm.te.min(y + x, z + x), tvm.te.min(y, z) + x)
+ ck.verify(tvm.te.min(y + x, x + z), tvm.te.min(y, z) + x)
+ ck.verify(tvm.te.min(x + y, z + x), tvm.te.min(y, z) + x)
+ ck.verify(tvm.te.min(x + y, x + z), tvm.te.min(y, z) + x)
+
+ ck.verify(tvm.te.min(x - y, x - z), x - tvm.te.max(y, z))
+ ck.verify(tvm.te.min(y - x, z - x), tvm.te.min(y, z) - x)
+
+ ck.verify(tvm.te.min(tvm.te.min(x, 1), 10), tvm.te.min(x, 1))
+ ck.verify(tvm.te.min(tvm.te.min(x, 11), 10), tvm.te.min(x, 10))
+
+ ck.verify(tvm.te.min(x * 3, 9), tvm.te.min(x, 3) * 3)
+ ck.verify(tvm.te.min(3 - x, 2), 3 - tvm.te.max(x, 1))
# DivMod rules
# truc div
ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 1000))
- ck.verify(tvm.min(tdiv(x + 3, 4) * 4, x), x)
- ck.verify(tvm.min(tdiv(x + 3, 4) * 4, tvm.max(x, 4)), tvm.max(x, 4))
- ck.verify(tvm.min(x, tdiv(x + 3, 4) * 4), x)
- ck.verify(tvm.min(tvm.max(x, 4), tdiv(x + 3, 4) * 4), tvm.max(x, 4))
+ ck.verify(tvm.te.min(tdiv(x + 3, 4) * 4, x), x)
+ ck.verify(tvm.te.min(tdiv(x + 3, 4) * 4, tvm.te.max(x, 4)), tvm.te.max(x, 4))
+ ck.verify(tvm.te.min(x, tdiv(x + 3, 4) * 4), x)
+ ck.verify(tvm.te.min(tvm.te.max(x, 4), tdiv(x + 3, 4) * 4), tvm.te.max(x, 4))
ck.analyzer.update(x, tvm.arith.ConstIntBound(-1000, 1000), True)
- ck.verify(tvm.min(tdiv(x, 10), tdiv(y, 10)), tdiv(tvm.min(x, y), 10))
- ck.verify(tvm.min(tdiv(x, (-10)), tdiv(y, (-10))),
- tdiv(tvm.max(x, y), (-10)))
+ ck.verify(tvm.te.min(tdiv(x, 10), tdiv(y, 10)), tdiv(tvm.te.min(x, y), 10))
+ ck.verify(tvm.te.min(tdiv(x, (-10)), tdiv(y, (-10))),
+ tdiv(tvm.te.max(x, y), (-10)))
# floor div
ck.analyzer.update(x, tvm.arith.ConstIntBound(-1000, 1000), True)
- ck.verify(tvm.min(fld(x + 3, 4) * 4, x), x)
- ck.verify(tvm.min(fld(x + 3, 4) * 4, tvm.max(x, 4)), tvm.max(x, 4))
- ck.verify(tvm.min(x, fld(x + 3, 4) * 4), x)
- ck.verify(tvm.min(x, fld(x, 4) * 4), fld(x, 4) * 4)
- ck.verify(tvm.min(tvm.max(x, 4), fld(x + 3, 4) * 4), tvm.max(x, 4))
- ck.verify(tvm.min(fld(x, 10), fld(y, 10)), fld(tvm.min(x, y), 10))
- ck.verify(tvm.min(fld(x, (-10)), fld(y, (-10))), fld(tvm.max(x, y), (-10)))
+ ck.verify(tvm.te.min(fld(x + 3, 4) * 4, x), x)
+ ck.verify(tvm.te.min(fld(x + 3, 4) * 4, tvm.te.max(x, 4)), tvm.te.max(x, 4))
+ ck.verify(tvm.te.min(x, fld(x + 3, 4) * 4), x)
+ ck.verify(tvm.te.min(x, fld(x, 4) * 4), fld(x, 4) * 4)
+ ck.verify(tvm.te.min(tvm.te.max(x, 4), fld(x + 3, 4) * 4), tvm.te.max(x, 4))
+ ck.verify(tvm.te.min(fld(x, 10), fld(y, 10)), fld(tvm.te.min(x, y), 10))
+ ck.verify(tvm.te.min(fld(x, (-10)), fld(y, (-10))), fld(tvm.te.max(x, y), (-10)))
def test_max_index_simplify():
ck = RewriteChecker()
- x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z")
- flm = tvm.floormod
- fld = tvm.floordiv
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
+ x, y, z = te.var("x"), te.var("y"), te.var("z")
+ flm = tvm.te.floormod
+ fld = tvm.te.floordiv
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
# const int bound
- ck.verify(tvm.max(tmod(x, 2), tmod(y, 2) + 10), tmod(y, 2) + 10)
- ck.verify(tvm.max(flm(x, 2), flm(y, 2) + 10), flm(y, 2) + 10)
-
- ck.verify(tvm.max(x + 1, x + 10), x + 10)
- ck.verify(tvm.max(x + 111, x + 10), x + 111)
- ck.verify(tvm.max(x + 1, x), x + 1)
- ck.verify(tvm.max(x, x + 2), x + 2)
- ck.verify(tvm.max(1 - x, 2 - x), 2 - x)
- ck.verify(tvm.max(3 - x, 2 - x), 3 - x)
-
- ck.verify(tvm.max(tvm.min(x, y), tvm.max(x, y)), tvm.max(x, y))
- ck.verify(tvm.max(tvm.min(x, y), tvm.max(y, x)), tvm.max(x, y))
-
- ck.verify(tvm.max(tvm.min(x, y), x), x)
- ck.verify(tvm.max(tvm.min(y, x), x), x)
- ck.verify(tvm.max(tvm.max(x, y), x), tvm.max(x, y))
- ck.verify(tvm.max(tvm.max(x, y), y), tvm.max(x, y))
-
- ck.verify(tvm.max(x, tvm.min(x, y)), x)
- ck.verify(tvm.max(x, tvm.min(y, x)), x)
- ck.verify(tvm.max(x, tvm.max(x, y)), tvm.max(x, y))
- ck.verify(tvm.max(y, tvm.max(x, y)), tvm.max(x, y))
-
- ck.verify(tvm.max(tvm.max(tvm.max(x, y), z), y),
- tvm.max(tvm.max(x, y), z))
- ck.verify(tvm.max(tvm.max(tvm.max(tvm.max(x, y), z), x * 2), y),
- tvm.max(tvm.max(tvm.max(x, y), z), x * 2))
- ck.verify(tvm.max(tvm.max(tvm.max(tvm.max(tvm.max(x, y), z), x * 2), z * 2), y),
- tvm.max(tvm.max(tvm.max(tvm.max(x, y), z), x * 2), z * 2))
-
- ck.verify(tvm.max(tvm.min(x, y), tvm.min(x, z)), tvm.min(tvm.max(y, z), x))
- ck.verify(tvm.max(tvm.min(x, y), tvm.min(z, x)), tvm.min(tvm.max(y, z), x))
- ck.verify(tvm.max(tvm.min(y, x), tvm.min(x, z)), tvm.min(tvm.max(y, z), x))
- ck.verify(tvm.max(tvm.min(y, x), tvm.min(z, x)), tvm.min(tvm.max(y, z), x))
-
- ck.verify(tvm.max(y + x, z + x), tvm.max(y, z) + x)
- ck.verify(tvm.max(y + x, x + z), tvm.max(y, z) + x)
- ck.verify(tvm.max(x + y, z + x), tvm.max(y, z) + x)
- ck.verify(tvm.max(x + y, x + z), tvm.max(y, z) + x)
-
- ck.verify(tvm.max(x - y, x - z), x - tvm.min(y, z))
- ck.verify(tvm.max(y - x, z - x), tvm.max(y, z) - x)
-
- ck.verify(tvm.max(tvm.max(x, 1), 10), tvm.max(x, 10))
- ck.verify(tvm.max(tvm.max(x, 11), 10), tvm.max(x, 11))
-
- ck.verify(tvm.max(x * 3, 9), tvm.max(x, 3) * 3)
- ck.verify(tvm.max(3 - x, 1), 3 - tvm.min(x, 2))
+ ck.verify(tvm.te.max(tmod(x, 2), tmod(y, 2) + 10), tmod(y, 2) + 10)
+ ck.verify(tvm.te.max(flm(x, 2), flm(y, 2) + 10), flm(y, 2) + 10)
+
+ ck.verify(tvm.te.max(x + 1, x + 10), x + 10)
+ ck.verify(tvm.te.max(x + 111, x + 10), x + 111)
+ ck.verify(tvm.te.max(x + 1, x), x + 1)
+ ck.verify(tvm.te.max(x, x + 2), x + 2)
+ ck.verify(tvm.te.max(1 - x, 2 - x), 2 - x)
+ ck.verify(tvm.te.max(3 - x, 2 - x), 3 - x)
+
+ ck.verify(tvm.te.max(tvm.te.min(x, y), tvm.te.max(x, y)), tvm.te.max(x, y))
+ ck.verify(tvm.te.max(tvm.te.min(x, y), tvm.te.max(y, x)), tvm.te.max(x, y))
+
+ ck.verify(tvm.te.max(tvm.te.min(x, y), x), x)
+ ck.verify(tvm.te.max(tvm.te.min(y, x), x), x)
+ ck.verify(tvm.te.max(tvm.te.max(x, y), x), tvm.te.max(x, y))
+ ck.verify(tvm.te.max(tvm.te.max(x, y), y), tvm.te.max(x, y))
+
+ ck.verify(tvm.te.max(x, tvm.te.min(x, y)), x)
+ ck.verify(tvm.te.max(x, tvm.te.min(y, x)), x)
+ ck.verify(tvm.te.max(x, tvm.te.max(x, y)), tvm.te.max(x, y))
+ ck.verify(tvm.te.max(y, tvm.te.max(x, y)), tvm.te.max(x, y))
+
+ ck.verify(tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), y),
+ tvm.te.max(tvm.te.max(x, y), z))
+ ck.verify(tvm.te.max(tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), x * 2), y),
+ tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), x * 2))
+ ck.verify(tvm.te.max(tvm.te.max(tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), x * 2), z * 2), y),
+ tvm.te.max(tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), x * 2), z * 2))
+
+ ck.verify(tvm.te.max(tvm.te.min(x, y), tvm.te.min(x, z)), tvm.te.min(tvm.te.max(y, z), x))
+ ck.verify(tvm.te.max(tvm.te.min(x, y), tvm.te.min(z, x)), tvm.te.min(tvm.te.max(y, z), x))
+ ck.verify(tvm.te.max(tvm.te.min(y, x), tvm.te.min(x, z)), tvm.te.min(tvm.te.max(y, z), x))
+ ck.verify(tvm.te.max(tvm.te.min(y, x), tvm.te.min(z, x)), tvm.te.min(tvm.te.max(y, z), x))
+
+ ck.verify(tvm.te.max(y + x, z + x), tvm.te.max(y, z) + x)
+ ck.verify(tvm.te.max(y + x, x + z), tvm.te.max(y, z) + x)
+ ck.verify(tvm.te.max(x + y, z + x), tvm.te.max(y, z) + x)
+ ck.verify(tvm.te.max(x + y, x + z), tvm.te.max(y, z) + x)
+
+ ck.verify(tvm.te.max(x - y, x - z), x - tvm.te.min(y, z))
+ ck.verify(tvm.te.max(y - x, z - x), tvm.te.max(y, z) - x)
+
+ ck.verify(tvm.te.max(tvm.te.max(x, 1), 10), tvm.te.max(x, 10))
+ ck.verify(tvm.te.max(tvm.te.max(x, 11), 10), tvm.te.max(x, 11))
+
+ ck.verify(tvm.te.max(x * 3, 9), tvm.te.max(x, 3) * 3)
+ ck.verify(tvm.te.max(3 - x, 1), 3 - tvm.te.min(x, 2))
# DivMod rules
# truc div
- ck.verify(tvm.max(tdiv(x, 10), tdiv(y, 10)), tdiv(tvm.max(x, y), 10))
- ck.verify(tvm.max(tdiv(x, (-10)), tdiv(y, (-10))), tdiv(tvm.min(x, y), (-10)))
- ck.verify(tvm.max(tdiv(x + 3, 4) * 4, x), tdiv(x + 3, 4) * 4)
+ ck.verify(tvm.te.max(tdiv(x, 10), tdiv(y, 10)), tdiv(tvm.te.max(x, y), 10))
+ ck.verify(tvm.te.max(tdiv(x, (-10)), tdiv(y, (-10))), tdiv(tvm.te.min(x, y), (-10)))
+ ck.verify(tvm.te.max(tdiv(x + 3, 4) * 4, x), tdiv(x + 3, 4) * 4)
# floordiv
- ck.verify(tvm.max(fld(x, 10), fld(y, 10)), fld(tvm.max(x, y), 10))
- ck.verify(tvm.max(fld(x, (-10)), fld(y, (-10))), fld(tvm.min(x, y), (-10)))
- ck.verify(tvm.max(fld(x + 3, 4) * 4, x), fld(x + 3, 4) * 4)
- ck.verify(tvm.max(fld(x, 4) * 4, x), x)
- ck.verify(tvm.max(x, fld(x, 4) * 4), x)
+ ck.verify(tvm.te.max(fld(x, 10), fld(y, 10)), fld(tvm.te.max(x, y), 10))
+ ck.verify(tvm.te.max(fld(x, (-10)), fld(y, (-10))), fld(tvm.te.min(x, y), (-10)))
+ ck.verify(tvm.te.max(fld(x + 3, 4) * 4, x), fld(x + 3, 4) * 4)
+ ck.verify(tvm.te.max(fld(x, 4) * 4, x), x)
+ ck.verify(tvm.te.max(x, fld(x, 4) * 4), x)
def test_cmp_simplify():
ck = RewriteChecker()
- x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z")
- flm = tvm.floormod
- fld = tvm.floordiv
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
+ x, y, z = te.var("x"), te.var("y"), te.var("z")
+ flm = tvm.te.floormod
+ fld = tvm.te.floordiv
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
# const int bound
- ck.verify((tmod(x, 2) + 10).equal(0), tvm.const(0, "bool"))
- ck.verify(tvm.tir.NE(tmod(x, 2) + 10, 0), tvm.const(1, "bool"))
- ck.verify(tmod(x, 2) + 10 > 1, tvm.const(1, "bool"))
- ck.verify(tmod(x, 2) + 10 <= 1, tvm.const(0, "bool"))
- ck.verify(flm(x, 2) + 2 > 1, tvm.const(1, "bool"))
- ck.verify(flm(x, 2) + 10 <= 1, tvm.const(0, "bool"))
+ ck.verify((tmod(x, 2) + 10).equal(0), tvm.tir.const(0, "bool"))
+ ck.verify(tvm.tir.NE(tmod(x, 2) + 10, 0), tvm.tir.const(1, "bool"))
+ ck.verify(tmod(x, 2) + 10 > 1, tvm.tir.const(1, "bool"))
+ ck.verify(tmod(x, 2) + 10 <= 1, tvm.tir.const(0, "bool"))
+ ck.verify(flm(x, 2) + 2 > 1, tvm.tir.const(1, "bool"))
+ ck.verify(flm(x, 2) + 10 <= 1, tvm.tir.const(0, "bool"))
- ck.verify(x * 3 + 10 == 0, tvm.const(0, "bool"))
- ck.verify(x * 3 + 10 != 0, tvm.const(1, "bool"))
+ ck.verify(x * 3 + 10 == 0, tvm.tir.const(0, "bool"))
+ ck.verify(x * 3 + 10 != 0, tvm.tir.const(1, "bool"))
# canonicalization
ck.verify((x - 10).equal(0), x.equal(10))
ck.verify(fld(x + 2, 4) * 4 >= x - y, tvm.tir.LE(flm(x + 2, 4) + (-2), y))
# End DivMod Rules
- ck.verify(tvm.min(x, 11) < 10, x < 10)
- ck.verify(tvm.min(x, 8) < 10, tvm.const(1, "bool"))
- ck.verify(tvm.max(8, x) > 10, tvm.tir.LT(10, x))
- ck.verify(x + 1 < tvm.max(8, x), x < 7)
+ ck.verify(tvm.te.min(x, 11) < 10, x < 10)
+ ck.verify(tvm.te.min(x, 8) < 10, tvm.tir.const(1, "bool"))
+ ck.verify(tvm.te.max(8, x) > 10, tvm.tir.LT(10, x))
+ ck.verify(x + 1 < tvm.te.max(8, x), x < 7)
ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 10), override=True)
ck.analyzer.update(y, tvm.arith.ConstIntBound(-10, 0), override=True)
ck.analyzer.update(z, tvm.arith.ConstIntBound(-5, 5), override=True)
- ck.verify(x < 11, tvm.const(1, "bool"))
- ck.verify(x <= 10, tvm.const(1, "bool"))
- ck.verify(z <= 5, tvm.const(1, "bool"))
- ck.verify(x + y <= 10, tvm.const(1, "bool"))
- ck.verify(x + y >= -10, tvm.const(1, "bool"))
- ck.verify(z - 5 <= y + 10, tvm.const(1, "bool"))
- ck.verify(tvm.all(x > -1, z <= x + 5), tvm.const(1, "bool"))
- ck.verify(x*y <= 0, tvm.const(1, "bool"))
- ck.verify((x + 1)*(y - 1) < 0, tvm.const(1, "bool"))
- ck.verify(y*y >= 0, tvm.const(1, "bool"))
- ck.verify(x*6 <= -3, tvm.const(0, "bool"))
+ ck.verify(x < 11, tvm.tir.const(1, "bool"))
+ ck.verify(x <= 10, tvm.tir.const(1, "bool"))
+ ck.verify(z <= 5, tvm.tir.const(1, "bool"))
+ ck.verify(x + y <= 10, tvm.tir.const(1, "bool"))
+ ck.verify(x + y >= -10, tvm.tir.const(1, "bool"))
+ ck.verify(z - 5 <= y + 10, tvm.tir.const(1, "bool"))
+ ck.verify(tvm.tir.all(x > -1, z <= x + 5), tvm.tir.const(1, "bool"))
+ ck.verify(x*y <= 0, tvm.tir.const(1, "bool"))
+ ck.verify((x + 1)*(y - 1) < 0, tvm.tir.const(1, "bool"))
+ ck.verify(y*y >= 0, tvm.tir.const(1, "bool"))
+ ck.verify(x*6 <= -3, tvm.tir.const(0, "bool"))
ck.verify(tmod(y - 1, 3) == 0, tmod(y + (-1), 3) == 0)
def test_logical_simplify():
ck = RewriteChecker()
- x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z")
+ x, y, z = te.var("x"), te.var("y"), te.var("z")
ck.verify(tvm.tir.And(tvm.tir.EQ(x, y), tvm.tir.NE(x, y)),
- tvm.const(False, "bool"))
+ tvm.tir.const(False, "bool"))
ck.verify(tvm.tir.And(tvm.tir.NE(x, y), tvm.tir.EQ(x, y)),
- tvm.const(False, "bool"))
- ck.verify(tvm.tir.And(x > 1, tvm.tir.Not(x > 1)), tvm.const(False, "bool"))
- ck.verify(tvm.tir.And(x <= y, y < x), tvm.const(False, "bool"))
- ck.verify(tvm.tir.And(y < x, x <= y), tvm.const(False, "bool"))
- ck.verify(tvm.tir.And(x < 1, 0 < x), tvm.const(False, "bool"))
- ck.verify(tvm.tir.And(x < 0, 1 < x), tvm.const(False, "bool"))
- ck.verify(tvm.tir.And(x < 1, 1 <= x), tvm.const(False, "bool"))
- ck.verify(tvm.tir.And(x <= 1, 1 < x), tvm.const(False, "bool"))
- ck.verify(tvm.tir.And(1 <= x, x < 1), tvm.const(False, "bool"))
- ck.verify(tvm.tir.And(1 < x, x <= 1), tvm.const(False, "bool"))
- ck.verify(tvm.tir.And(x <= 1, 2 <= x), tvm.const(False, "bool"))
- ck.verify(tvm.tir.And(2 <= x, x <= 1), tvm.const(False, "bool"))
+ tvm.tir.const(False, "bool"))
+ ck.verify(tvm.tir.And(x > 1, tvm.tir.Not(x > 1)), tvm.tir.const(False, "bool"))
+ ck.verify(tvm.tir.And(x <= y, y < x), tvm.tir.const(False, "bool"))
+ ck.verify(tvm.tir.And(y < x, x <= y), tvm.tir.const(False, "bool"))
+ ck.verify(tvm.tir.And(x < 1, 0 < x), tvm.tir.const(False, "bool"))
+ ck.verify(tvm.tir.And(x < 0, 1 < x), tvm.tir.const(False, "bool"))
+ ck.verify(tvm.tir.And(x < 1, 1 <= x), tvm.tir.const(False, "bool"))
+ ck.verify(tvm.tir.And(x <= 1, 1 < x), tvm.tir.const(False, "bool"))
+ ck.verify(tvm.tir.And(1 <= x, x < 1), tvm.tir.const(False, "bool"))
+ ck.verify(tvm.tir.And(1 < x, x <= 1), tvm.tir.const(False, "bool"))
+ ck.verify(tvm.tir.And(x <= 1, 2 <= x), tvm.tir.const(False, "bool"))
+ ck.verify(tvm.tir.And(2 <= x, x <= 1), tvm.tir.const(False, "bool"))
ck.verify(tvm.tir.And(x == 1, x != 2), x == 1)
ck.verify(tvm.tir.Or(tvm.tir.EQ(x, y), tvm.tir.NE(x, y)),
- tvm.const(True, "bool"))
+ tvm.tir.const(True, "bool"))
ck.verify(tvm.tir.Or(tvm.tir.NE(x, y), tvm.tir.EQ(x, y)),
- tvm.const(True, "bool"))
- ck.verify(tvm.tir.Or(x > y, tvm.tir.Not(x > y)), tvm.const(True, "bool"))
+ tvm.tir.const(True, "bool"))
+ ck.verify(tvm.tir.Or(x > y, tvm.tir.Not(x > y)), tvm.tir.const(True, "bool"))
- ck.verify(tvm.tir.Or(x <= y, y < x), tvm.const(True, "bool"))
- ck.verify(tvm.tir.Or(y < x, y >= x), tvm.const(True, "bool"))
+ ck.verify(tvm.tir.Or(x <= y, y < x), tvm.tir.const(True, "bool"))
+ ck.verify(tvm.tir.Or(y < x, y >= x), tvm.tir.const(True, "bool"))
- ck.verify(tvm.tir.Or(x < 1, 0 < x), tvm.const(True, "bool"))
- ck.verify(tvm.tir.Or(0 < x, x < 1), tvm.const(True, "bool"))
+ ck.verify(tvm.tir.Or(x < 1, 0 < x), tvm.tir.const(True, "bool"))
+ ck.verify(tvm.tir.Or(0 < x, x < 1), tvm.tir.const(True, "bool"))
- ck.verify(tvm.tir.Or(x < 1, 1 <= x), tvm.const(True, "bool"))
- ck.verify(tvm.tir.Or(x <= 1, 1 < x), tvm.const(True, "bool"))
- ck.verify(tvm.tir.Or(1 <= x, x < 1), tvm.const(True, "bool"))
- ck.verify(tvm.tir.Or(1 < x, x <= 1), tvm.const(True, "bool"))
- ck.verify(tvm.tir.Or(x <= 1, 2 <= x), tvm.const(True, "bool"))
- ck.verify(tvm.tir.Or(2 <= x, x <= 1), tvm.const(True, "bool"))
+ ck.verify(tvm.tir.Or(x < 1, 1 <= x), tvm.tir.const(True, "bool"))
+ ck.verify(tvm.tir.Or(x <= 1, 1 < x), tvm.tir.const(True, "bool"))
+ ck.verify(tvm.tir.Or(1 <= x, x < 1), tvm.tir.const(True, "bool"))
+ ck.verify(tvm.tir.Or(1 < x, x <= 1), tvm.tir.const(True, "bool"))
+ ck.verify(tvm.tir.Or(x <= 1, 2 <= x), tvm.tir.const(True, "bool"))
+ ck.verify(tvm.tir.Or(2 <= x, x <= 1), tvm.tir.const(True, "bool"))
ck.verify(tvm.tir.Or(x != 1, x == 2), x != 1)
def test_let_simplify():
ck = RewriteChecker()
- x, y = tvm.var("x"), tvm.var("y")
+ x, y = te.var("x"), te.var("y")
z = tvm.tir.Let(x, 1, x + 1)
ck.verify(z + z, 4)
def test_cast_simplify():
ck = RewriteChecker()
- x = tvm.var("x")
+ x = te.var("x")
dtypes = ["float32", "float16", "int32", "int8", "bool"]
for dtype1 in dtypes:
- ck.verify(tvm.tir.Cast(dtype1, x - x), tvm.const(0, dtype1))
- ck.verify(tvm.tir.Cast(dtype1, x == x), tvm.const(1, dtype1))
+ ck.verify(tvm.tir.Cast(dtype1, x - x), tvm.tir.const(0, dtype1))
+ ck.verify(tvm.tir.Cast(dtype1, x == x), tvm.tir.const(1, dtype1))
for dtype2 in dtypes:
for i in [0, 1, 2, 3]:
- ck.verify(tvm.tir.Cast(dtype1, tvm.const(i, dtype2)), tvm.const(i, dtype1))
+ ck.verify(tvm.tir.Cast(dtype1, tvm.tir.const(i, dtype2)), tvm.tir.const(i, dtype1))
if __name__ == "__main__":
test_floordiv_index_simplify()
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_stmt_simplify():
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
A = ib.pointer("float32", name="A")
C = ib.pointer("float32", name="C")
- n = tvm.size_var("n")
+ n = te.size_var("n")
with ib.for_range(0, n, name="i") as i:
with ib.if_scope(i < 12):
A[i] = C[i]
body = tvm.tir.LetStmt(n, 10, ib.get())
- body = tvm.ir_pass.CanonicalSimplify(body)
+ body = tvm.tir.ir_pass.CanonicalSimplify(body)
assert isinstance(body.body, tvm.tir.Store)
def test_thread_extent_simplify():
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
A = ib.pointer("float32", name="A")
C = ib.pointer("float32", name="C")
- n = tvm.size_var("n")
- tx = tvm.thread_axis("threadIdx.x")
- ty = tvm.thread_axis("threadIdx.y")
+ n = te.size_var("n")
+ tx = te.thread_axis("threadIdx.x")
+ ty = te.thread_axis("threadIdx.y")
ib.scope_attr(tx, "thread_extent", n)
ib.scope_attr(tx, "thread_extent", n)
ib.scope_attr(ty, "thread_extent", 1)
with ib.if_scope(tx + ty < 12):
A[tx] = C[tx + ty]
body = tvm.tir.LetStmt(n, 10, ib.get())
- body = tvm.ir_pass.CanonicalSimplify(body)
+ body = tvm.tir.ir_pass.CanonicalSimplify(body)
assert isinstance(body.body.body.body, tvm.tir.Store)
def test_basic_likely_elimination():
- n = tvm.size_var('n')
- X = tvm.placeholder(shape=(n,), name="x")
- W = tvm.placeholder(shape=(n + 1,), dtype="int32", name="w")
+ n = te.size_var('n')
+ X = te.placeholder(shape=(n,), name="x")
+ W = te.placeholder(shape=(n + 1,), dtype="int32", name="w")
def f(i):
start = W[i]
extent = W[i+1] - W[i]
- rv = tvm.reduce_axis((0, extent))
- return tvm.sum(X[rv + start], axis=rv)
- Y = tvm.compute(X.shape, f, name="y")
- s = tvm.create_schedule([Y.op])
+ rv = te.reduce_axis((0, extent))
+ return te.sum(X[rv + start], axis=rv)
+ Y = te.compute(X.shape, f, name="y")
+ s = te.create_schedule([Y.op])
stmt = tvm.lower(s, [X, W, Y], simple_mode=True)
assert('if' not in str(stmt))
Y[i] = sum(X[:i])
"""
(m, ) = X.shape
- s_state = tvm.placeholder((m + 1, ), dtype="int32", name="state")
- s_init = tvm.compute((1, ), lambda _: tvm.const(0, "int32"))
- s_update = tvm.compute((m + 1, ), lambda l: s_state[l - 1] + X[l - 1])
- return tvm.scan(s_init, s_update, s_state, inputs=[X], name="cumsum")
+ s_state = te.placeholder((m + 1, ), dtype="int32", name="state")
+ s_init = te.compute((1, ), lambda _: tvm.tir.const(0, "int32"))
+ s_update = te.compute((m + 1, ), lambda l: s_state[l - 1] + X[l - 1])
+ return tvm.te.scan(s_init, s_update, s_state, inputs=[X], name="cumsum")
def sparse_lengths_sum(data, indices, lengths):
oshape = list(data.shape)
length_offsets = cumsum(lengths)
def sls(n, d):
- gg = tvm.reduce_axis((0, lengths[n]))
+ gg = te.reduce_axis((0, lengths[n]))
indices_idx = length_offsets[n] + gg
data_idx = indices[indices_idx]
data_val = data[data_idx, d]
- return tvm.sum(data_val, axis=gg)
+ return te.sum(data_val, axis=gg)
- return tvm.compute(oshape, sls)
+ return te.compute(oshape, sls)
- m, n, d, i, l = tvm.size_var('m'), tvm.size_var('n'), tvm.size_var('d'),\
- tvm.size_var('i'), tvm.size_var('l')
- data_ph = tvm.placeholder((m, d * 32), name="data")
- indices_ph = tvm.placeholder((i,), name="indices", dtype="int32")
- lengths_ph = tvm.placeholder((n,), name="lengths", dtype="int32")
+ m, n, d, i, l = te.size_var('m'), te.size_var('n'), te.size_var('d'),\
+ te.size_var('i'), te.size_var('l')
+ data_ph = te.placeholder((m, d * 32), name="data")
+ indices_ph = te.placeholder((i,), name="indices", dtype="int32")
+ lengths_ph = te.placeholder((n,), name="lengths", dtype="int32")
Y = sparse_lengths_sum(data_ph, indices_ph, lengths_ph)
- s = tvm.create_schedule([Y.op])
+ s = te.create_schedule([Y.op])
(n, d) = s[Y].op.axis
(do, di) = s[Y].split(d, factor=32)
(gg,) = s[Y].op.reduce_axis
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm import MeasureInput, MeasureResult
from tvm.autotvm.measure.measure import Runner
@autotvm.register_customized_task("testing/matmul")
def matmul(N, L, M, dtype):
- A = tvm.placeholder((N, L), name='A', dtype=dtype)
- B = tvm.placeholder((L, M), name='B', dtype=dtype)
+ A = te.placeholder((N, L), name='A', dtype=dtype)
+ B = te.placeholder((L, M), name='B', dtype=dtype)
- k = tvm.reduce_axis((0, L), name='k')
- C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C')
- s = tvm.create_schedule(C.op)
+ k = te.reduce_axis((0, L), name='k')
+ C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C')
+ s = te.create_schedule(C.op)
# schedule
y, x = s[C].op.axis
@autotvm.register_customized_task("testing/bad_matmul")
def bad_matmul(N, L, M, dtype):
if 'bad_device' in tvm.target.Target.current().keys:
- A = tvm.placeholder((N, L), name='A', dtype=dtype)
- B = tvm.placeholder((L, M), name='B', dtype=dtype)
+ A = te.placeholder((N, L), name='A', dtype=dtype)
+ B = te.placeholder((L, M), name='B', dtype=dtype)
- k = tvm.reduce_axis((0, L-1), name='k')
- C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C')
- s = tvm.create_schedule(C.op)
+ k = te.reduce_axis((0, L-1), name='k')
+ C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C')
+ s = te.create_schedule(C.op)
# schedule
y, x = s[C].op.axis
import numpy as np
import tvm
+from tvm import te
from tvm.autotvm import feature
def test_iter_feature_gemm():
N = 128
- k = tvm.reduce_axis((0, N), 'k')
- A = tvm.placeholder((N, N), name='A')
- B = tvm.placeholder((N, N), name='B')
- C = tvm.compute(
+ k = te.reduce_axis((0, N), 'k')
+ A = te.placeholder((N, N), name='A')
+ B = te.placeholder((N, N), name='B')
+ C = te.compute(
A.shape,
- lambda y, x: tvm.sum(A[y, k] * B[k, x], axis=k),
+ lambda y, x: te.sum(A[y, k] * B[k, x], axis=k),
name='C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
feas = feature.get_itervar_feature(s, [A, B, C], take_log=False)
def test_curve_feature_gemm():
N = 128
- k = tvm.reduce_axis((0, N), 'k')
- A = tvm.placeholder((N, N), name='A')
- B = tvm.placeholder((N, N), name='B')
- C = tvm.compute(
+ k = te.reduce_axis((0, N), 'k')
+ A = te.placeholder((N, N), name='A')
+ B = te.placeholder((N, N), name='B')
+ C = te.compute(
A.shape,
- lambda y, x: tvm.sum(A[y, k] * B[k, x], axis=k),
+ lambda y, x: te.sum(A[y, k] * B[k, x], axis=k),
name='C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
feas = feature.get_buffer_curve_sample_flatten(s, [A, B, C], sample_n=30)
# sample_n * #buffers * #curves * 2 numbers per curve
n_sample = 100
def get_gemm_feature(target):
- k = tvm.reduce_axis((0, N), 'k')
- A = tvm.placeholder((N, N), name='A')
- B = tvm.placeholder((N, N), name='B')
- C = tvm.compute(A.shape, lambda y, x: tvm.sum(A[y, k] * B[k, x], axis=k),
+ k = te.reduce_axis((0, N), 'k')
+ A = te.placeholder((N, N), name='A')
+ B = te.placeholder((N, N), name='B')
+ C = te.compute(A.shape, lambda y, x: te.sum(A[y, k] * B[k, x], axis=k),
name='C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
y, x = s[C].op.axis
axes = list(s[C].tile(y, x, 8, 8)) + [k]
for i in range(len(perm)):
if perm[i] != 4:
pick.append(axes[i])
- s[C].bind(pick[0], tvm.thread_axis("blockIdx.x"))
- s[C].bind(pick[1], tvm.thread_axis("vthread"))
- s[C].bind(pick[2], tvm.thread_axis("threadIdx.y"))
+ s[C].bind(pick[0], te.thread_axis("blockIdx.x"))
+ s[C].bind(pick[1], te.thread_axis("vthread"))
+ s[C].bind(pick[2], te.thread_axis("threadIdx.y"))
with target:
feas = feature.get_itervar_feature(s, [A, B, C])
"""Test flop calculation"""
import tvm
+from tvm import te
import numpy as np
from tvm.autotvm.task.task import compute_flop
for i in range(5):
N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)]
(input_dtype, acc_dtype) = random_dtypes()
- D = tvm.placeholder((N, CI, H, W), dtype=input_dtype)
- K = tvm.placeholder((CO, CI, KH, KW), dtype=input_dtype)
+ D = te.placeholder((N, CI, H, W), dtype=input_dtype)
+ K = te.placeholder((CO, CI, KH, KW), dtype=input_dtype)
KH = min(H, KH)
KW = min(W, KW)
- ci = tvm.reduce_axis((0, CI))
- kh = tvm.reduce_axis((0, KH))
- kw = tvm.reduce_axis((0, KW))
+ ci = te.reduce_axis((0, CI))
+ kh = te.reduce_axis((0, KH))
+ kw = te.reduce_axis((0, KW))
OH = (H - KH) + 1
OW = (W - KW) + 1
- C = tvm.compute((N, CO, OH, OW), lambda n, co, h, w:
- tvm.sum(D[n][ci][h][w].astype(acc_dtype) * K[co][ci][h][w].astype(acc_dtype),
+ C = te.compute((N, CO, OH, OW), lambda n, co, h, w:
+ te.sum(D[n][ci][h][w].astype(acc_dtype) * K[co][ci][h][w].astype(acc_dtype),
axis=[ci, kh, kw]))
- s = tvm.create_schedule([C.op])
+ s = te.create_schedule([C.op])
assert compute_flop(s) == 2 * N * CO * OH * OW * CI * KH * KW
for i in range(5):
N, L, M = [np.random.randint(10, 128) * 4 for _ in range(3)]
(input_dtype, acc_dtype) = random_dtypes()
- A = tvm.placeholder((N, L), dtype=input_dtype)
- B = tvm.placeholder((M, L), dtype=input_dtype)
- k = tvm.reduce_axis((0, L))
+ A = te.placeholder((N, L), dtype=input_dtype)
+ B = te.placeholder((M, L), dtype=input_dtype)
+ k = te.reduce_axis((0, L))
bn = 4
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
- A_pack = tvm.compute((N // bn, L, bn), lambda i, j, k: A[i * bn + k][j])
- B_pack = tvm.compute((M // bn, L, bn), lambda i, j, k: B[i * bn + k][j])
- C_pack = tvm.compute((N // bn, M // bn, bn, bn), lambda i, j, ii, jj:
- tvm.sum(A_pack[i, k, ii].astype(acc_dtype) * B_pack[j, k, jj].astype(acc_dtype), axis=[k]))
- C = tvm.compute((N, M), lambda i, j: C_pack[idxd(i, bn)][idxd(j, bn)][idxm(i, bn)][idxm(j, bn)])
+ A_pack = te.compute((N // bn, L, bn), lambda i, j, k: A[i * bn + k][j])
+ B_pack = te.compute((M // bn, L, bn), lambda i, j, k: B[i * bn + k][j])
+ C_pack = te.compute((N // bn, M // bn, bn, bn), lambda i, j, ii, jj:
+ te.sum(A_pack[i, k, ii].astype(acc_dtype) * B_pack[j, k, jj].astype(acc_dtype), axis=[k]))
+ C = te.compute((N, M), lambda i, j: C_pack[idxd(i, bn)][idxd(j, bn)][idxm(i, bn)][idxm(j, bn)])
- s = tvm.create_schedule([C.op])
+ s = te.create_schedule([C.op])
assert compute_flop(s) == 2 * N * L * M
def test_outer_dot():
for i in range(5):
N, M = [np.random.randint(10, 128) * 4 for _ in range(2)]
(input_dtype, acc_dtype) = random_dtypes()
- A = tvm.placeholder((N,), dtype=input_dtype)
- B = tvm.placeholder((M,), dtype=input_dtype)
+ A = te.placeholder((N,), dtype=input_dtype)
+ B = te.placeholder((M,), dtype=input_dtype)
- C = tvm.compute((N, M), lambda i, j: A[i].astype(acc_dtype) * B[j].astype(acc_dtype))
+ C = te.compute((N, M), lambda i, j: A[i].astype(acc_dtype) * B[j].astype(acc_dtype))
- s = tvm.create_schedule([C.op])
+ s = te.create_schedule([C.op])
assert compute_flop(s) == N * M
def test_max_pool():
for i in range(5):
N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)]
(input_dtype, _) = random_dtypes()
- D = tvm.placeholder((N, CI, H, W), dtype=input_dtype)
+ D = te.placeholder((N, CI, H, W), dtype=input_dtype)
KH = min(H, KH)
KW = min(W, KW)
- kh = tvm.reduce_axis((0, KH))
- kw = tvm.reduce_axis((0, KW))
+ kh = te.reduce_axis((0, KH))
+ kw = te.reduce_axis((0, KW))
OH = (H - KH) + 1
OW = (W - KW) + 1
- C = tvm.compute(
+ C = te.compute(
(N, CO, OH, OW),
- lambda n, co, h, w: tvm.max(D[n][co][h + kh][w + kw], axis=[kh, kw]))
+ lambda n, co, h, w: tvm.te.max(D[n][co][h + kh][w + kw], axis=[kh, kw]))
- s = tvm.create_schedule([C.op])
+ s = te.create_schedule([C.op])
assert compute_flop(s) == N * CO * OH * OW * KH * KW
for i in range(5):
N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)]
(input_dtype, acc_dtype) = random_dtypes()
- D = tvm.placeholder((N, CI, H, W), dtype=input_dtype)
+ D = te.placeholder((N, CI, H, W), dtype=input_dtype)
KH = min(H, KH)
KW = min(W, KW)
- kh = tvm.reduce_axis((0, KH))
- kw = tvm.reduce_axis((0, KW))
+ kh = te.reduce_axis((0, KH))
+ kw = te.reduce_axis((0, KW))
OH = (H - KH) + 1
OW = (W - KW) + 1
- C = tvm.compute(
+ C = te.compute(
(N, CO, OH, OW),
- lambda n, co, h, w: tvm.sum(
- tvm.div(D[n][co][h + kh][w + kw].astype(acc_dtype), (KW * KH)), axis=[kh, kw]))
+ lambda n, co, h, w: te.sum(
+ te.div(D[n][co][h + kh][w + kw].astype(acc_dtype), (KW * KH)), axis=[kh, kw]))
- s = tvm.create_schedule([C.op])
+ s = te.create_schedule([C.op])
assert compute_flop(s) == 2 * N * CO * OH * OW * KH * KW
"""No float number operation in simple move. So the estimator should raise an error """
N = 1024
- A = tvm.placeholder((N,))
- C = tvm.compute((N,), lambda i: A[i])
- s = tvm.create_schedule([C.op])
+ A = te.placeholder((N,))
+ C = te.compute((N,), lambda i: A[i])
+ s = te.create_schedule([C.op])
try:
compute_flop(s)
import numpy as np
import tvm
+from tvm import te
from test_autotvm_common import DummyRunner, bad_matmul, get_sample_task
from tvm import autotvm
from tvm.autotvm.measure.measure import MeasureErrorNo, MeasureResult
import time
import tvm
+from tvm import te
from tvm.contrib import util
from tvm import autotvm
"""Test space definition primitives"""
import tvm
+from tvm import te
from tvm.autotvm.task.space import ConfigSpace, FallbackConfigEntity
def gemm_func(cfg, N):
- A = tvm.placeholder((N, N), name='A')
- B = tvm.placeholder((N, N), name='B')
+ A = te.placeholder((N, N), name='A')
+ B = te.placeholder((N, N), name='B')
- k = tvm.reduce_axis((0, N), name='k')
- C = tvm.compute((N, N), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=[k]), name='C')
+ k = te.reduce_axis((0, N), name='k')
+ C = te.compute((N, N), lambda i, j: te.sum(A[i, k] * B[k, j], axis=[k]), name='C')
- s = tvm.create_schedule([C.op])
+ s = te.create_schedule([C.op])
y, x = s[C].op.axis
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm import MeasureInput, MeasureResult
from tvm.autotvm.tuner.xgboost_cost_model import XGBoostCostModel
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_lower_rfactor():
- n = tvm.size_var("n")
- m = tvm.size_var("m")
- A = tvm.placeholder((n, m), name='A')
- k = tvm.reduce_axis((0, m), "k")
- B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="B")
- s = tvm.create_schedule(B.op)
+ n = te.size_var("n")
+ m = te.size_var("m")
+ A = te.placeholder((n, m), name='A')
+ k = te.reduce_axis((0, m), "k")
+ B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B")
+ s = te.create_schedule(B.op)
ko, ki = s[B].split(B.op.reduce_axis[0], factor=16)
BF = s.rfactor(B, ki)
xo, xi = s[B].split(s[B].op.axis[0], factor=32)
- s[B.op].bind(xo, tvm.thread_axis("blockIdx.x"))
- s[B.op].bind(xi, tvm.thread_axis("threadIdx.y"))
- s[B].bind(s[B].op.reduce_axis[0], tvm.thread_axis("threadIdx.x"))
+ s[B.op].bind(xo, te.thread_axis("blockIdx.x"))
+ s[B.op].bind(xi, te.thread_axis("threadIdx.y"))
+ s[B].bind(s[B].op.reduce_axis[0], te.thread_axis("threadIdx.x"))
s[BF].compute_at(s[B], s[B].op.reduce_axis[0])
fapi = tvm.lower(s, [A, B])
def test_dependent_output_shape():
- n, m, x = tvm.size_var('n'), tvm.size_var('m'), tvm.size_var('x')
- A = tvm.placeholder((n, m))
- B = tvm.compute((m, n//x), lambda i, j: A[i,j] , name='B')
- s = tvm.create_schedule(B.op)
+ n, m, x = te.size_var('n'), te.size_var('m'), te.size_var('x')
+ A = te.placeholder((n, m))
+ B = te.compute((m, n//x), lambda i, j: A[i,j] , name='B')
+ s = te.create_schedule(B.op)
mod = tvm.build(s, [A, B, x])
if __name__ == "__main__":
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import re
import os
import ctypes
target = 'llvm -target=armv7l-none-linux-gnueabihf -mcpu=cortex-a53 -mattr=+neon'
def check_correct_assembly(type, elements, counts):
- n = tvm.convert(elements)
- A = tvm.placeholder(n, dtype=type, name='A')
- B = tvm.compute(A.shape, lambda i: tvm.popcount(A[i]), name='B')
- s = tvm.create_schedule(B.op)
+ n = tvm.runtime.convert(elements)
+ A = te.placeholder(n, dtype=type, name='A')
+ B = te.compute(A.shape, lambda i: tvm.tir.popcount(A[i]), name='B')
+ s = te.create_schedule(B.op)
s[B].vectorize(s[B].op.axis[0])
f = tvm.build(s, [A, B], target)
target = 'llvm -target=armv7l-none-linux-gnueabihf -mcpu=cortex-a53 -mattr=+neon'
def check_correct_assembly(N):
- K = tvm.size_var("K")
- A = tvm.placeholder((K, N), dtype="int8", name='A')
- B = tvm.placeholder((K, N), dtype="int8", name='B')
- k = tvm.reduce_axis((0, K))
- C = tvm.compute((N, ), lambda n: tvm.sum(
+ K = te.size_var("K")
+ A = te.placeholder((K, N), dtype="int8", name='A')
+ B = te.placeholder((K, N), dtype="int8", name='B')
+ k = te.reduce_axis((0, K))
+ C = te.compute((N, ), lambda n: te.sum(
A[k, n].astype("int32") * B[k, n].astype("int32"), axis=[k]), name='C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
s[C].vectorize(s[C].op.axis[0])
f = tvm.build(s, [A, B, C], target)
check_correct_assembly(64)
def check_broadcast_correct_assembly(N):
- K = tvm.size_var("K")
- A = tvm.placeholder((K, N), dtype="int8", name='A')
- B = tvm.placeholder((K,), dtype="int8", name='B')
- k = tvm.reduce_axis((0, K))
- C = tvm.compute((N, ), lambda n: tvm.sum(
+ K = te.size_var("K")
+ A = te.placeholder((K, N), dtype="int8", name='A')
+ B = te.placeholder((K,), dtype="int8", name='B')
+ k = te.reduce_axis((0, K))
+ C = te.compute((N, ), lambda n: te.sum(
A[k, n].astype("int32") * B[k].astype("int32"),
axis=[k]), name='C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
s[C].vectorize(s[C].op.axis[0])
f = tvm.build(s, [A, B, C], target)
from tvm.relay import testing
from tvm.contrib import graph_runtime
import tvm
+from tvm import te
import ctypes
def test_resnet18():
print("skip because %s is not enabled..." % device)
return
nn = 12
- n = tvm.convert(nn)
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
- s = tvm.create_schedule(B.op)
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ s = te.create_schedule(B.op)
bx, tx = s[B].split(B.op.axis[0], factor=4)
- s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[B].bind(bx, te.thread_axis("blockIdx.x"))
+ s[B].bind(tx, te.thread_axis("threadIdx.x"))
from tvm.contrib import util
temp = util.tempdir()
"""codegen related to bool types"""
import tvm
+from tvm import te
import numpy as np
def test_cmp_load_store():
n = 32
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda *i: A(*i) > B(*i), name='C')
- D = tvm.compute(C.shape, lambda *i: tvm.all(C(*i),
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda *i: A(*i) > B(*i), name='C')
+ D = te.compute(C.shape, lambda *i: tvm.tir.all(C(*i),
A(*i) > 1).astype('float32'), name="D")
def check_llvm():
if not tvm.runtime.enabled("llvm"):
return
- s = tvm.create_schedule(D.op)
+ s = te.create_schedule(D.op)
xo, xi = s[C].split(C.op.axis[0], factor=4)
xo1, xo2 = s[C].split(xo, factor=13)
s[C].parallel(xo2)
ctx = tvm.context(device, 0)
if not ctx.exist:
return
- s = tvm.create_schedule(D.op)
+ s = te.create_schedule(D.op)
for stage in [C, D]:
xo, xi = s[stage].split(stage.op.axis[0], factor=4)
- s[stage].bind(xo, tvm.thread_axis("blockIdx.x"))
- s[stage].bind(xi, tvm.thread_axis("threadIdx.x"))
+ s[stage].bind(xo, te.thread_axis("blockIdx.x"))
+ s[stage].bind(xi, te.thread_axis("threadIdx.x"))
f = tvm.build(s, [A, B, D], device)
a_np = np.random.uniform(size=n).astype(A.dtype)
a = tvm.nd.array(a_np, ctx)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm.contrib import util
def test_add():
nn = 1024
- n = tvm.convert(nn)
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
- s = tvm.create_schedule(C.op)
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
+ s = te.create_schedule(C.op)
def check_c():
mhost = tvm.build(s, [A, B, C], "c", name="fadd")
def test_add_pipeline():
nn = 1024
- n = tvm.convert(nn)
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- AA = tvm.compute((n,), lambda *i: A(*i), name='A')
- BB = tvm.compute((n,), lambda *i: B(*i), name='B')
- T = tvm.compute(A.shape, lambda *i: AA(*i) + BB(*i), name='T')
- C = tvm.compute(A.shape, lambda *i: T(*i), name='C')
- s = tvm.create_schedule(C.op)
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ AA = te.compute((n,), lambda *i: A(*i), name='A')
+ BB = te.compute((n,), lambda *i: B(*i), name='B')
+ T = te.compute(A.shape, lambda *i: AA(*i) + BB(*i), name='T')
+ C = te.compute(A.shape, lambda *i: T(*i), name='C')
+ s = te.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], factor=4)
xo1, xo2 = s[C].split(xo, factor=13)
s[C].parallel(xo2)
def check_c():
# Specifically allow offset to test codepath when offset is available
- Ab = tvm.decl_buffer(
+ Ab = tvm.tir.decl_buffer(
A.shape, A.dtype,
- elem_offset=tvm.size_var('Aoffset'),
+ elem_offset=te.size_var('Aoffset'),
offset_factor=8,
name='A')
binds = {A : Ab}
# BUILD and invoke the kernel.
f1 = tvm.lower(s, [A,B,C], name="fadd_pipeline")
- fsplits = [x for x in tvm.ir_pass.SplitHostDevice(f1)]
- fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0])
+ fsplits = [x for x in tvm.tir.ir_pass.SplitHostDevice(f1)]
+ fsplits[0] = tvm.tir.ir_pass.LowerTVMBuiltin(fsplits[0])
mhost = tvm.target.codegen.build_module(fsplits[0], "c")
temp = util.tempdir()
path_dso = temp.relpath("temp.so")
tvm.testing.assert_allclose(
c.asnumpy(), a.asnumpy() + b.asnumpy())
- with tvm.build_config(offset_factor=4):
+ with tvm.target.build_config(offset_factor=4):
check_c()
def test_reinterpret():
nn = 1024
- n = tvm.convert(nn)
- A = tvm.placeholder((n,), name='A', dtype="int32")
- B = tvm.compute(A.shape, lambda *i: tvm.call_pure_intrin("float32", "reinterpret", A(*i)), name='B')
- s = tvm.create_schedule(B.op)
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n,), name='A', dtype="int32")
+ B = te.compute(A.shape, lambda *i: tvm.tir.call_pure_intrin("float32", "reinterpret", A(*i)), name='B')
+ s = te.create_schedule(B.op)
def check_c():
mhost = tvm.build(s, [A, B], "c", name="reinterpret")
# under the License.
"""Test cross compilation"""
import tvm
+from tvm import te
import os
import struct
from tvm import rpc
def test_llvm_add_pipeline():
nn = 1024
- n = tvm.convert(nn)
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
- s = tvm.create_schedule(C.op)
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
+ s = te.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], factor=4)
s[C].parallel(xo)
s[C].vectorize(xi)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
import topi
import unittest
from tvm.contrib.nvcc import have_fp16, have_int8
from tvm.contrib import nvcc
-tx = tvm.thread_axis("threadIdx.x")
-bx = tvm.thread_axis("blockIdx.x")
+tx = te.thread_axis("threadIdx.x")
+bx = te.thread_axis("blockIdx.x")
def test_cuda_vectorize_add():
num_thread = 8
if dtype == "int8" and not have_int8(tvm.gpu(0).compute_version):
print("skip because gpu does not support int8")
return
- A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
- B = tvm.compute((n,), lambda i: A[i] + tvm.const(1, A.dtype), name='B')
- s = tvm.create_schedule(B.op)
+ A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
+ B = te.compute((n,), lambda i: A[i] + tvm.tir.const(1, A.dtype), name='B')
+ s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=num_thread)
s[B].bind(xo, bx)
s[B].bind(xi, tx)
if dtype == "int8" and not have_int8(tvm.gpu(0).compute_version):
print("skip because gpu does not support int8")
return
- A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
- B = tvm.placeholder((n,), name='B', dtype="%sx%d" % (dtype, lanes))
- C = tvm.placeholder((n,), name='C', dtype="int32")
- D = tvm.compute((n,),
- lambda i: tvm.call_pure_extern("int32", "__dp4a", A[i], B[i], C[i]), name='D')
- s = tvm.create_schedule(D.op)
+ A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
+ B = te.placeholder((n,), name='B', dtype="%sx%d" % (dtype, lanes))
+ C = te.placeholder((n,), name='C', dtype="int32")
+ D = te.compute((n,),
+ lambda i: tvm.tir.call_pure_extern("int32", "__dp4a", A[i], B[i], C[i]), name='D')
+ s = te.create_schedule(D.op)
xo, xi = s[D].split(D.op.axis[0], factor=num_thread)
s[D].bind(xo, bx)
s[D].bind(xi, tx)
print("skip because cuda is not enabled..")
return
ctx = tvm.gpu(0)
- A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
- B = tvm.compute((n,), lambda i: A[i], name='B')
- s = tvm.create_schedule(B.op)
+ A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
+ B = te.compute((n,), lambda i: A[i], name='B')
+ s = te.create_schedule(B.op)
block, thread = s[B].split(B.op.axis[0], factor=num_thread)
s[B].bind(block, bx)
s[B].bind(thread, tx)
lanes = 4
dtype = 'int8'
ctx = tvm.gpu(0)
- A = tvm.compute((n, lanes), lambda i,j: tvm.const(value, dtype=dtype))
- s = tvm.create_schedule(A.op)
+ A = te.compute((n, lanes), lambda i,j: tvm.tir.const(value, dtype=dtype))
+ s = te.create_schedule(A.op)
y, x = s[A].op.axis
s[A].vectorize(x)
s[A].bind(y, bx)
def test_cuda_inf_nan():
target = 'cuda'
def check_inf_nan(ctx, n, value, dtype):
- A = tvm.placeholder((n,), name='A', dtype=dtype)
- inf_value = tvm.const(value, dtype=dtype)
- C = tvm.compute((n,), lambda i: inf_value, name='C')
- s = tvm.create_schedule(C.op)
+ A = te.placeholder((n,), name='A', dtype=dtype)
+ inf_value = tvm.tir.const(value, dtype=dtype)
+ C = te.compute((n,), lambda i: inf_value, name='C')
+ s = te.create_schedule(C.op)
s[C].bind(s[C].op.axis[0], tx)
fun = tvm.build(s, [A, C], target)
a = tvm.nd.empty((n,), A.dtype, ctx)
print("skip because cuda is not enabled..")
return
- idxm = tvm.indexmod
- a = tvm.placeholder((64, ), 'int32')
- b = tvm.placeholder((64, ), 'int32')
- c = tvm.compute((64, ), lambda x: a[x] + b[x - idxm(x, 4) + (3 - idxm(x, 4))])
- sch = tvm.create_schedule(c.op)
+ idxm = tvm.tir.indexmod
+ a = te.placeholder((64, ), 'int32')
+ b = te.placeholder((64, ), 'int32')
+ c = te.compute((64, ), lambda x: a[x] + b[x - idxm(x, 4) + (3 - idxm(x, 4))])
+ sch = te.create_schedule(c.op)
x = c.op.axis[0]
xo, xi = sch[c].split(x, 4)
- thrx = tvm.thread_axis("threadIdx.x")
+ thrx = te.thread_axis("threadIdx.x")
sch[c].bind(xo, thrx)
sch[c].vectorize(xi)
def my_vectorize(stmt):
def vectorizer(op):
if op.for_type == tvm.tir.For.Vectorized:
- four = tvm.const(4, 'int32')
- idx = tvm.tir.Ramp(thrx.var * four, tvm.const(1, 'int32'), 4)
- all_ones = tvm.const(1, 'int32x4')
+ four = tvm.tir.const(4, 'int32')
+ idx = tvm.tir.Ramp(thrx.var * four, tvm.tir.const(1, 'int32'), 4)
+ all_ones = tvm.tir.const(1, 'int32x4')
store = op.body
value = store.value
new_a = tvm.tir.Load('int32x4', value.a.buffer_var, idx, all_ones)
bs, ids = [], []
for i in range(4):
- bs.append(tvm.tir.Load('int32', value.b.buffer_var, thrx.var * four + tvm.const(i, 'int32')))
- ids.append(tvm.const(3 - i, 'int32'))
+ bs.append(tvm.tir.Load('int32', value.b.buffer_var, thrx.var * four + tvm.tir.const(i, 'int32')))
+ ids.append(tvm.tir.const(3 - i, 'int32'))
new_b = tvm.tir.Shuffle(bs, ids)
return tvm.tir.Store(store.buffer_var, new_a + new_b, idx, all_ones)
return None
- return tvm.ir_pass.IRTransform(stmt, None, vectorizer, ['For'])
+ return tvm.tir.ir_pass.IRTransform(stmt, None, vectorizer, ['For'])
- with tvm.build_config(add_lower_pass=[(1, my_vectorize)]):
+ with tvm.target.build_config(add_lower_pass=[(1, my_vectorize)]):
module = tvm.build(sch, [a, b, c], target='cuda')
a_ = np.array(list(range(64)), dtype='int32')
b_ = np.array((list(range(4))[::-1]) * 16, dtype='int32')
print("skip because cuda is not enabled..")
return
- k = tvm.reduce_axis((0, 32), 'k')
- A = tvm.placeholder((96, 32), name='A')
- B = tvm.compute( (96,), lambda m:
- tvm.sum(A[m, k], axis=k),
+ k = te.reduce_axis((0, 32), 'k')
+ A = te.placeholder((96, 32), name='A')
+ B = te.compute( (96,), lambda m:
+ te.sum(A[m, k], axis=k),
name='B')
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
s[B].reorder(B.op.reduce_axis[0], B.op.axis[0])
mo, _ = s[B].split(B.op.axis[0], 32)
- s[B].bind(mo, tvm.thread_axis("blockIdx.x"))
+ s[B].bind(mo, te.thread_axis("blockIdx.x"))
fcuda = tvm.build(s, [A, B], "cuda")
print("skip because cuda is not enabled..")
return
- n = tvm.reduce_axis((0, 129), 'n')
- A = tvm.placeholder((129,), name='A')
- B = tvm.compute( (1, ), lambda b:
- tvm.sum(A[n],
+ n = te.reduce_axis((0, 129), 'n')
+ A = te.placeholder((129,), name='A')
+ B = te.compute( (1, ), lambda b:
+ te.sum(A[n],
axis=n),
name='B'
)
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
_, ni = s[B].split(s[B].op.reduce_axis[0], factor=8)
# otherwise it is found that the code gen is done by nvrtc.
from tvm import autotvm
shape = (2, 3, 4)
- a = tvm.placeholder(shape, dtype='float16', name='a')
- b = tvm.const(0.5, dtype='float16')
- c = tvm.compute(shape, lambda i, j, k: a[i, j, k] > b, name='c')
- s = tvm.create_schedule(c.op)
+ a = te.placeholder(shape, dtype='float16', name='a')
+ b = tvm.tir.const(0.5, dtype='float16')
+ c = te.compute(shape, lambda i, j, k: a[i, j, k] > b, name='c')
+ s = te.create_schedule(c.op)
axes = [axis for axis in c.op.axis]
fused = s[c].fuse(*axes)
bx, tx = s[c].split(fused, factor=64)
- s[c].bind(bx, tvm.thread_axis('blockIdx.x'))
- s[c].bind(tx, tvm.thread_axis('threadIdx.x'))
+ s[c].bind(bx, te.thread_axis('blockIdx.x'))
+ s[c].bind(tx, te.thread_axis('threadIdx.x'))
func = tvm.build(s, [a, c], 'cuda')
ctx = tvm.gpu(0)
print("Skip because gpu does not have fp16 support")
return
- a = tvm.placeholder((m, n), name="a", dtype=dtype)
- b = tvm.placeholder((m, n), name="b", dtype=dtype)
+ a = te.placeholder((m, n), name="a", dtype=dtype)
+ b = te.placeholder((m, n), name="b", dtype=dtype)
c = a + b
d = a * b
e = topi.elemwise_sum([c, d])
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm.contrib import util
import numpy as np
def test_large_uint_imm():
value = (1 << 63) + 123
- other = tvm.const(3, "uint64")
+ other = tvm.tir.const(3, "uint64")
n = 12
num_thread = 2
- A = tvm.compute((n,), lambda *i: tvm.const(value, "uint64") + other, name='A')
- s = tvm.create_schedule(A.op)
+ A = te.compute((n,), lambda *i: tvm.tir.const(value, "uint64") + other, name='A')
+ s = te.create_schedule(A.op)
xo, xi = s[A].split(A.op.axis[0], factor=num_thread)
- s[A].bind(xi, tvm.thread_axis("threadIdx.x"))
- s[A].bind(xo, tvm.thread_axis("blockIdx.x"))
+ s[A].bind(xi, te.thread_axis("threadIdx.x"))
+ s[A].bind(xo, te.thread_axis("blockIdx.x"))
def check_target(device):
ctx = tvm.context(device, 0)
def test_add_pipeline():
- n = tvm.size_var('n')
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((), name='B')
- C = tvm.compute(A.shape, lambda *i: A(*i) + B(), name='C')
- D = tvm.compute(A.shape, lambda *i: C(*i) + 1, name='D')
- s = tvm.create_schedule(D.op)
+ n = te.size_var('n')
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((), name='B')
+ C = te.compute(A.shape, lambda *i: A(*i) + B(), name='C')
+ D = te.compute(A.shape, lambda *i: C(*i) + 1, name='D')
+ s = te.create_schedule(D.op)
# GPU schedule have to split by gridIdx and threadIdx
num_thread = 256
xo, xi = s[C].split(C.op.axis[0], factor=num_thread)
- s[C].bind(xi, tvm.thread_axis("threadIdx.x"))
- s[C].bind(xo, tvm.thread_axis("blockIdx.x"))
+ s[C].bind(xi, te.thread_axis("threadIdx.x"))
+ s[C].bind(xo, te.thread_axis("blockIdx.x"))
xo, xi = s[D].split(D.op.axis[0], factor=num_thread)
- s[D].bind(xi, tvm.thread_axis("threadIdx.x"))
- s[D].bind(xo, tvm.thread_axis("blockIdx.x"))
+ s[D].bind(xi, te.thread_axis("threadIdx.x"))
+ s[D].bind(xo, te.thread_axis("blockIdx.x"))
# compile to IR
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
- Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
- Db = tvm.decl_buffer(D.shape, D.dtype, name='D')
- stmt = tvm.ir_pass.LoopPartition(stmt, False)
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, D:Db}, 64)
- stmt = tvm.ir_pass.Simplify(stmt)
- fapi = tvm.ir_pass.MakeAPI(stmt, "myadd", [Ab, Bb, Db], 0, True)
- fsplits = [x for x in tvm.ir_pass.SplitHostDevice(fapi)]
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
+ Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
+ Db = tvm.tir.decl_buffer(D.shape, D.dtype, name='D')
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, D:Db}, 64)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
+ fapi = tvm.tir.ir_pass.MakeAPI(stmt, "myadd", [Ab, Bb, Db], 0, True)
+ fsplits = [x for x in tvm.tir.ir_pass.SplitHostDevice(fapi)]
# lower the floordiv(use stackvm rules so it works for all targets)
- fsplits = [tvm.ir_pass.LowerIntrin(x, "stackvm") for x in fsplits]
- fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0])
+ fsplits = [tvm.tir.ir_pass.LowerIntrin(x, "stackvm") for x in fsplits]
+ fsplits[0] = tvm.tir.ir_pass.LowerTVMBuiltin(fsplits[0])
def check_target(device, host="stackvm"):
ctx = tvm.context(device, 0)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
def test_add_pipeline():
nn = 64
max_threads = 4
- n = tvm.convert(nn)
- A = tvm.placeholder((n,), name='A')
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n,), name='A')
def extern_generator(ins, outs):
"""Manually write the IR for the extern function, add pipeline"""
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
with ib.for_range(0, (n+1) // 2) as i:
- ib.emit(outs[0].vstore(i*2, ins[0].vload(i*2, "float32x2") + tvm.const(1, "float32x2")))
+ ib.emit(outs[0].vstore(i*2, ins[0].vload(i*2, "float32x2") + tvm.tir.const(1, "float32x2")))
return ib.get()
def extern_generator_gpu(ins, outs):
"""Manually write the IR for the extern function, add pipeline"""
- ib = tvm.ir_builder.create()
- bx = tvm.thread_axis("blockIdx.x")
- tx = tvm.thread_axis("threadIdx.x")
+ ib = tvm.tir.ir_builder.create()
+ bx = te.thread_axis("blockIdx.x")
+ tx = te.thread_axis("threadIdx.x")
ib.scope_attr(bx, "thread_extent", (nn+max_threads-1) // max_threads)
ib.scope_attr(tx, "thread_extent", max_threads)
idx = bx.var * max_threads + tx.var
with ib.if_scope(ib.likely(idx < n)):
- ib.emit(outs[0].vstore(idx*2, ins[0].vload(idx*2, "float32x2") + tvm.const(1, "float32x2")))
+ ib.emit(outs[0].vstore(idx*2, ins[0].vload(idx*2, "float32x2") + tvm.tir.const(1, "float32x2")))
return ib.get()
- C_cpu = tvm.extern(A.shape, [A], extern_generator, name='C')
- C_gpu = tvm.extern(A.shape, [A], extern_generator_gpu, name='C')
- s_cpu = tvm.create_schedule(C_cpu.op)
- s_gpu = tvm.create_schedule(C_gpu.op)
+ C_cpu = te.extern(A.shape, [A], extern_generator, name='C')
+ C_gpu = te.extern(A.shape, [A], extern_generator_gpu, name='C')
+ s_cpu = te.create_schedule(C_cpu.op)
+ s_gpu = te.create_schedule(C_gpu.op)
print(tvm.lower(s_cpu, [A, C_cpu], simple_mode=True))
print(tvm.lower(s_gpu, [A, C_gpu], simple_mode=True))
def test_pack_buffer_simple():
nn = 1024
- n = tvm.convert(nn)
- A = tvm.placeholder((n,), name='A')
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n,), name='A')
def extern_generator(ins, outs):
"""Manually write the IR for the extern function, add pipeline."""
- return tvm.call_packed("my_extern_array_func1", ins[0], outs[0])
+ return tvm.tir.call_packed("my_extern_array_func1", ins[0], outs[0])
- C = tvm.extern(A.shape, [A], extern_generator, name='C')
- s = tvm.create_schedule(C.op)
+ C = te.extern(A.shape, [A], extern_generator, name='C')
+ s = te.create_schedule(C.op)
@tvm.register_func
def my_extern_array_func1(aa, bb):
def test_pack_buffer_intermediate():
nn = 1024
- n = tvm.convert(nn)
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute((n,), lambda i: A[i] + 1, name="B")
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n,), name='A')
+ B = te.compute((n,), lambda i: A[i] + 1, name="B")
def extern_generator(ins, outs):
"""Manually write the IR for the extern function, add pipeline."""
- return tvm.call_packed("my_extern_array_func2", ins[0], outs[0])
+ return tvm.tir.call_packed("my_extern_array_func2", ins[0], outs[0])
- C = tvm.extern(B.shape, [B], extern_generator, name='C')
- s = tvm.create_schedule(C.op)
+ C = te.extern(B.shape, [B], extern_generator, name='C')
+ s = te.create_schedule(C.op)
def check_target(target):
if not tvm.runtime.enabled(target):
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import topi
from tvm.contrib import util, clang
import numpy as np
import math
def test_llvm_intrin():
- ib = tvm.ir_builder.create()
- n = tvm.convert(4)
+ ib = tvm.tir.ir_builder.create()
+ n = tvm.runtime.convert(4)
A = ib.pointer("float32", name="A")
args = [
- tvm.call_pure_intrin("handle", "tvm_address_of", A[0]),
+ tvm.tir.call_pure_intrin("handle", "tvm_address_of", A[0]),
0, 3, 1
]
ib.emit(tvm.tir.Evaluate(
tvm.tir.Call(
"int32", "prefetch", args, tvm.tir.Call.Intrinsic, None, 0)))
body = ib.get()
- func = tvm.ir_pass.MakeAPI(body, "prefetch", [A], 0, True)
+ func = tvm.tir.ir_pass.MakeAPI(body, "prefetch", [A], 0, True)
fcode = tvm.build(func, None, "llvm")
}
"""
n = 10
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute((n,), lambda *i:
- tvm.call_pure_extern("float32", "my_add", A(*i), 1.0),
+ A = te.placeholder((n,), name='A')
+ B = te.compute((n,), lambda *i:
+ tvm.tir.call_pure_extern("float32", "my_add", A(*i), 1.0),
name='B')
def check_llvm(use_file):
if not tvm.runtime.enabled("llvm"):
temp = util.tempdir()
ll_path = temp.relpath("temp.ll")
ll_code = clang.create_llvm(cc_code, output=ll_path)
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
if use_file:
s[B].pragma(s[B].op.axis[0], "import_llvm", ll_path)
else:
def test_llvm_lookup_intrin():
- ib = tvm.ir_builder.create()
- m = tvm.size_var("m")
+ ib = tvm.tir.ir_builder.create()
+ m = te.size_var("m")
A = ib.pointer("uint8x8", name="A")
- x = tvm.call_llvm_intrin("uint8x8", "llvm.ctpop.i8", tvm.const(1, 'uint32'), A)
+ x = tvm.tir.call_llvm_intrin("uint8x8", "llvm.ctpop.i8", tvm.tir.const(1, 'uint32'), A)
ib.emit(x)
body = ib.get()
- func = tvm.ir_pass.MakeAPI(body, "ctpop", [A], 1, True)
+ func = tvm.tir.ir_pass.MakeAPI(body, "ctpop", [A], 1, True)
fcode = tvm.build(func, None, "llvm")
def test_llvm_large_uintimm():
value = (1 << 63) + 123
- other = tvm.const(3, "uint64")
- A = tvm.compute((), lambda : tvm.const(value, "uint64") + other, name='A')
- s = tvm.create_schedule(A.op)
+ other = tvm.tir.const(3, "uint64")
+ A = te.compute((), lambda : tvm.tir.const(value, "uint64") + other, name='A')
+ s = te.create_schedule(A.op)
def check_llvm():
if not tvm.runtime.enabled("llvm"):
def test_llvm_add_pipeline():
nn = 1024
- n = tvm.convert(nn)
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- AA = tvm.compute((n,), lambda *i: A(*i), name='A')
- BB = tvm.compute((n,), lambda *i: B(*i), name='B')
- T = tvm.compute(A.shape, lambda *i: AA(*i) + BB(*i), name='T')
- C = tvm.compute(A.shape, lambda *i: T(*i), name='C')
- s = tvm.create_schedule(C.op)
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ AA = te.compute((n,), lambda *i: A(*i), name='A')
+ BB = te.compute((n,), lambda *i: B(*i), name='B')
+ T = te.compute(A.shape, lambda *i: AA(*i) + BB(*i), name='T')
+ C = te.compute(A.shape, lambda *i: T(*i), name='C')
+ s = te.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], factor=4)
xo1, xo2 = s[C].split(xo, factor=13)
s[C].parallel(xo2)
if not tvm.runtime.enabled("llvm"):
return
# Specifically allow offset to test codepath when offset is available
- Ab = tvm.decl_buffer(
+ Ab = tvm.tir.decl_buffer(
A.shape, A.dtype,
- elem_offset=tvm.size_var('Aoffset'),
+ elem_offset=te.size_var('Aoffset'),
offset_factor=8,
name='A')
binds = {A : Ab}
tvm.testing.assert_allclose(
c.asnumpy(), a.asnumpy() + b.asnumpy())
- with tvm.build_config(offset_factor=4):
+ with tvm.target.build_config(offset_factor=4):
check_llvm()
def test_llvm_persist_parallel():
n = 128
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1, name='B')
- C = tvm.compute(A.shape, lambda *i: tvm.sqrt(B(*i)) * 2 + 2, name='C')
- s = tvm.create_schedule(C.op)
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1, name='B')
+ C = te.compute(A.shape, lambda *i: te.sqrt(B(*i)) * 2 + 2, name='C')
+ s = te.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], factor=8)
xo1, xo2 = s[C].split(xo, nparts=1)
s[B].compute_at(s[C], xo1)
def check_llvm(nn, base):
if not tvm.runtime.enabled("llvm"):
return
- n = tvm.convert(nn)
- A = tvm.placeholder((n + base), name='A')
- C = tvm.compute((n,), lambda i: A(nn + base- i - 1), name='C')
- s = tvm.create_schedule(C.op)
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n + base), name='A')
+ C = te.compute((n,), lambda i: A(nn + base- i - 1), name='C')
+ s = te.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], factor=4)
s[C].parallel(xo)
s[C].vectorize(xi)
def check_llvm(n, lanes):
if not tvm.runtime.enabled("llvm"):
return
- A = tvm.placeholder((n,), name='A', dtype="float32x%d" % lanes)
- B = tvm.compute((n,), lambda i: A[i], name='B')
- C = tvm.compute((n,), lambda i: B[i] + tvm.const(1, A.dtype), name='C')
- s = tvm.create_schedule(C.op)
+ A = te.placeholder((n,), name='A', dtype="float32x%d" % lanes)
+ B = te.compute((n,), lambda i: A[i], name='B')
+ C = te.compute((n,), lambda i: B[i] + tvm.tir.const(1, A.dtype), name='C')
+ s = te.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], nparts=2)
_, xi = s[C].split(xi, factor=2)
s[C].parallel(xo)
def check_llvm(nn, base, stride):
if not tvm.runtime.enabled("llvm"):
return
- n = tvm.convert(nn)
- A = tvm.placeholder((n + base, stride), name='A')
- C = tvm.compute((n, stride), lambda i, j: A(base + i, j) + 1, name='C')
- s = tvm.create_schedule(C.op)
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n + base, stride), name='A')
+ C = te.compute((n, stride), lambda i, j: A(base + i, j) + 1, name='C')
+ s = te.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], factor=4)
s[C].parallel(xo)
s[C].vectorize(xi)
c.asnumpy(), a.asnumpy()[base:] + 1)
check_llvm(64, 0, 2)
check_llvm(4, 0, 1)
- with tvm.build_config(restricted_func=False):
+ with tvm.target.build_config(restricted_func=False):
check_llvm(4, 0, 3)
def test_llvm_temp_space():
nn = 1024
- n = tvm.convert(nn)
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda i: A(i) + 1, name='B')
- C = tvm.compute(A.shape, lambda i: B(i) + 1, name='C')
- s = tvm.create_schedule(C.op)
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda i: A(i) + 1, name='B')
+ C = te.compute(A.shape, lambda i: B(i) + 1, name='C')
+ s = te.create_schedule(C.op)
def check_llvm():
if not tvm.runtime.enabled("llvm"):
def test_multiple_func():
nn = 1024
- n = tvm.convert(nn)
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
- s = tvm.create_schedule(C.op)
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
+ s = te.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], factor=4)
s[C].parallel(xo)
s[C].vectorize(xi)
def check_llvm(n, offset):
if not tvm.runtime.enabled("llvm"):
return
- A = tvm.placeholder((n, ), name='A')
- C = tvm.compute((n,), lambda i: tvm.if_then_else(i >= offset, A[i], 0.0), name='C')
- s = tvm.create_schedule(C.op)
+ A = te.placeholder((n, ), name='A')
+ C = te.compute((n,), lambda i: tvm.tir.if_then_else(i >= offset, A[i], 0.0), name='C')
+ s = te.create_schedule(C.op)
# build and invoke the kernel.
f = tvm.build(s, [A, C], "llvm")
ctx = tvm.cpu(0)
def check_llvm(n):
if not tvm.runtime.enabled("llvm"):
return
- A = tvm.placeholder((n, ), name='A', dtype="int32")
- C = tvm.compute((n,), lambda i: A[i].equal(1).astype("float"), name='C')
- s = tvm.create_schedule(C.op)
+ A = te.placeholder((n, ), name='A', dtype="int32")
+ C = te.compute((n,), lambda i: A[i].equal(1).astype("float"), name='C')
+ s = te.create_schedule(C.op)
# build and invoke the kernel.
f = tvm.build(s, [A, C], "llvm")
ctx = tvm.cpu(0)
def check_llvm(n):
if not tvm.runtime.enabled("llvm"):
return
- A = tvm.placeholder((n, ), name='A')
- scale = tvm.placeholder((), name='scale')
- k = tvm.reduce_axis((0, n), name="k")
- C = tvm.compute((), lambda : tvm.sum(A[k] * scale(), axis=k), name="C")
- D = tvm.compute((), lambda : C() + 1)
- s = tvm.create_schedule(D.op)
+ A = te.placeholder((n, ), name='A')
+ scale = te.placeholder((), name='scale')
+ k = te.reduce_axis((0, n), name="k")
+ C = te.compute((), lambda : te.sum(A[k] * scale(), axis=k), name="C")
+ D = te.compute((), lambda : C() + 1)
+ s = te.create_schedule(D.op)
# build and invoke the kernel.
f = tvm.build(s, [A, scale, D], "llvm")
ctx = tvm.cpu(0)
def check_llvm(n):
if not tvm.runtime.enabled("llvm"):
return
- with tvm.build_config(instrument_bound_checkers=True):
- A = tvm.placeholder((n, ), name='A')
- scale = tvm.placeholder((), name='scale')
- k = tvm.reduce_axis((0, n), name="k")
- C = tvm.compute((), lambda : tvm.sum(A[k] * scale(), axis=k), name="C")
- D = tvm.compute((), lambda : C() + 1)
- s = tvm.create_schedule(D.op)
+ with tvm.target.build_config(instrument_bound_checkers=True):
+ A = te.placeholder((n, ), name='A')
+ scale = te.placeholder((), name='scale')
+ k = te.reduce_axis((0, n), name="k")
+ C = te.compute((), lambda : te.sum(A[k] * scale(), axis=k), name="C")
+ D = te.compute((), lambda : C() + 1)
+ s = te.create_schedule(D.op)
# build and invoke the kernel.
f = tvm.build(s, [A, scale, D], "llvm")
ctx = tvm.cpu(0)
def test_alignment():
- n = tvm.convert(1024)
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda i: A[i] * 3, name='B')
- s = tvm.create_schedule(B.op)
+ n = tvm.runtime.convert(1024)
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda i: A[i] * 3, name='B')
+ s = te.create_schedule(B.op)
bx, tx = s[B].split(B.op.axis[0], factor=8)
s[B].vectorize(tx)
f = tvm.build(s, [A, B], "llvm")
def test_llvm_div():
"""Check that the semantics of div and mod is correct"""
def check(start, end, dstart, dend, dtype, floor_div=False):
- div = tvm.floordiv if floor_div else tvm.truncdiv
- mod = tvm.floormod if floor_div else tvm.truncmod
+ div = tvm.te.floordiv if floor_div else tvm.tir.truncdiv
+ mod = tvm.te.floormod if floor_div else tvm.tir.truncmod
# A are dividends, B are divisors. Note that we add 1 to make include end in the range.
- A = tvm.placeholder((end - start + 1,), name="A", dtype=dtype)
- B = tvm.placeholder((dend - dstart + 1,), name="B", dtype=dtype)
+ A = te.placeholder((end - start + 1,), name="A", dtype=dtype)
+ B = te.placeholder((dend - dstart + 1,), name="B", dtype=dtype)
# We clip values with min and max so that simplifiers know the ranges of values
- clipa = lambda x: tvm.min(tvm.const(end, dtype), tvm.max(tvm.const(start, dtype), x))
- clipb = lambda x: tvm.min(tvm.const(dend, dtype), tvm.max(tvm.const(dstart, dtype), x))
+ clipa = lambda x: tvm.te.min(tvm.tir.const(end, dtype), tvm.te.max(tvm.tir.const(start, dtype), x))
+ clipb = lambda x: tvm.te.min(tvm.tir.const(dend, dtype), tvm.te.max(tvm.tir.const(dstart, dtype), x))
# If the range is just a single point, use the constant itself
if start == end:
- clipa = lambda x: tvm.const(start, dtype)
+ clipa = lambda x: tvm.tir.const(start, dtype)
if dstart == dend:
- clipb = lambda x: tvm.const(dstart, dtype)
+ clipb = lambda x: tvm.tir.const(dstart, dtype)
# D are division results and M are modulo results
- [D, M] = tvm.compute((end - start + 1, dend - dstart + 1),
+ [D, M] = te.compute((end - start + 1, dend - dstart + 1),
lambda i, j: (div(clipa(A[i]), clipb(B[j])),
mod(clipa(A[i]), clipb(B[j]))))
- s = tvm.create_schedule([D.op, M.op])
+ s = te.create_schedule([D.op, M.op])
f = tvm.build(s, [A, B, D, M], "llvm")
# Fill input arrays with values
def test_llvm_fp_math():
def check_llvm_reciprocal(n):
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute((n,), lambda i: tvm.div(1.0,(1e+37*A[i])), name='B')
+ A = te.placeholder((n,), name='A')
+ B = te.compute((n,), lambda i: te.div(1.0,(1e+37*A[i])), name='B')
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
f = tvm.build(s, [A, B], "llvm")
a = tvm.nd.array(np.full((n,), 100, 'float32'))
check_llvm_reciprocal(16)
def check_llvm_sigmoid(n):
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute((n,), lambda i: tvm.sigmoid(A[i]), name='B')
+ A = te.placeholder((n,), name='A')
+ B = te.compute((n,), lambda i: te.sigmoid(A[i]), name='B')
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
f = tvm.build(s, [A, B], "llvm")
a = tvm.nd.array(np.full((n,), -1000, 'float32'))
def test_dwarf_debug_information():
nn = 1024
- n = tvm.convert(nn)
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
- s = tvm.create_schedule(C.op)
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
+ s = te.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], factor=4)
s[C].parallel(xo)
s[C].vectorize(xi)
def test_llvm_shuffle():
- a = tvm.placeholder((8, ), 'int32')
- b = tvm.placeholder((8, ), 'int32')
- c = tvm.compute((8, ), lambda x: a[x] + b[7-x])
- sch = tvm.create_schedule(c.op)
+ a = te.placeholder((8, ), 'int32')
+ b = te.placeholder((8, ), 'int32')
+ c = te.compute((8, ), lambda x: a[x] + b[7-x])
+ sch = te.create_schedule(c.op)
def my_vectorize(stmt):
def vectorizer(op):
store = op.body
- idx = tvm.tir.Ramp(tvm.const(0, 'int32'), tvm.const(1, 'int32'), 8)
- all_ones = tvm.const(1, 'int32x8')
+ idx = tvm.tir.Ramp(tvm.tir.const(0, 'int32'), tvm.tir.const(1, 'int32'), 8)
+ all_ones = tvm.tir.const(1, 'int32x8')
value = store.value
- b_idx = tvm.tir.Shuffle([idx], [tvm.const(i, 'int32') for i in range(7, -1, -1)])
+ b_idx = tvm.tir.Shuffle([idx], [tvm.tir.const(i, 'int32') for i in range(7, -1, -1)])
new_a = tvm.tir.Load('int32x8', value.a.buffer_var, idx, all_ones)
new_b = tvm.tir.Load('int32x8', value.b.buffer_var, b_idx, all_ones)
value = new_a + new_b
return tvm.tir.Store(store.buffer_var, new_a + new_b, idx, all_ones)
- return tvm.ir_pass.IRTransform(stmt, None, vectorizer, ['For'])
+ return tvm.tir.ir_pass.IRTransform(stmt, None, vectorizer, ['For'])
- with tvm.build_config(add_lower_pass=[(1, my_vectorize)]):
+ with tvm.target.build_config(add_lower_pass=[(1, my_vectorize)]):
ir = tvm.lower(sch, [a, b, c], simple_mode=True)
module = tvm.build(sch, [a, b, c])
a_ = tvm.nd.array(np.arange(1, 9, dtype='int32'))
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
target = 'opencl'
def test_opencl_ternary_expression():
def check_if_then_else(ctx, n, dtype):
- A = tvm.placeholder((n,), name='A', dtype=dtype)
- true_value = tvm.const(1, dtype=dtype)
- false_value = tvm.const(3, dtype=dtype)
- max_lhs = tvm.const(2, dtype=dtype)
- max_rhs = tvm.if_then_else(A[0] > 0, true_value, false_value)
- C = tvm.compute((n,), lambda i: tvm.max(max_lhs, max_rhs), name='C')
- s = tvm.create_schedule(C.op)
- s[C].bind(s[C].op.axis[0], tvm.thread_axis("threadIdx.x"))
+ A = te.placeholder((n,), name='A', dtype=dtype)
+ true_value = tvm.tir.const(1, dtype=dtype)
+ false_value = tvm.tir.const(3, dtype=dtype)
+ max_lhs = tvm.tir.const(2, dtype=dtype)
+ max_rhs = tvm.tir.if_then_else(A[0] > 0, true_value, false_value)
+ C = te.compute((n,), lambda i: tvm.te.max(max_lhs, max_rhs), name='C')
+ s = te.create_schedule(C.op)
+ s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x"))
fun = tvm.build(s, [A, C], target)
a = tvm.nd.empty((n,), A.dtype, ctx)
fun(a, c)
def check_select(ctx, n, dtype):
- A = tvm.placeholder((n,), name='A', dtype=dtype)
- true_value = tvm.const(1, dtype=dtype)
- false_value = tvm.const(3, dtype=dtype)
- max_lhs = tvm.const(2, dtype=dtype)
+ A = te.placeholder((n,), name='A', dtype=dtype)
+ true_value = tvm.tir.const(1, dtype=dtype)
+ false_value = tvm.tir.const(3, dtype=dtype)
+ max_lhs = tvm.tir.const(2, dtype=dtype)
max_rhs = tvm.tir.Select(A[0] > 0, true_value, false_value)
- C = tvm.compute((n,), lambda i: tvm.max(max_lhs, max_rhs), name='C')
- s = tvm.create_schedule(C.op)
- s[C].bind(s[C].op.axis[0], tvm.thread_axis("threadIdx.x"))
+ C = te.compute((n,), lambda i: tvm.te.max(max_lhs, max_rhs), name='C')
+ s = te.create_schedule(C.op)
+ s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x"))
fun = tvm.build(s, [A, C], target)
a = tvm.nd.empty((n,), A.dtype, ctx)
def test_opencl_inf_nan():
def check_inf_nan(ctx, n, value, dtype):
- A = tvm.placeholder((n,), name='A', dtype=dtype)
- inf_value = tvm.const(value, dtype=dtype)
- C = tvm.compute((n,), lambda i: inf_value, name='C')
- s = tvm.create_schedule(C.op)
- s[C].bind(s[C].op.axis[0], tvm.thread_axis("threadIdx.x"))
+ A = te.placeholder((n,), name='A', dtype=dtype)
+ inf_value = tvm.tir.const(value, dtype=dtype)
+ C = te.compute((n,), lambda i: inf_value, name='C')
+ s = te.create_schedule(C.op)
+ s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x"))
fun = tvm.build(s, [A, C], target)
a = tvm.nd.empty((n,), A.dtype, ctx)
c = tvm.nd.empty((n,), A.dtype, ctx)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
import unittest
-tx = tvm.thread_axis("threadIdx.x")
-ty = tvm.thread_axis("threadIdx.y")
-bx = tvm.thread_axis("blockIdx.x")
-by = tvm.thread_axis("blockIdx.y")
+tx = te.thread_axis("threadIdx.x")
+ty = te.thread_axis("threadIdx.y")
+bx = te.thread_axis("blockIdx.x")
+by = te.thread_axis("blockIdx.y")
@unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..")
def test_rocm_cross_thread_reduction():
# based on the reduction tutorial
- n = tvm.size_var("n")
- m = tvm.size_var("m")
- A = tvm.placeholder((n, m), name='A')
- k = tvm.reduce_axis((0, m), "k")
- B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="B")
- s = tvm.create_schedule(B.op)
+ n = te.size_var("n")
+ m = te.size_var("m")
+ A = te.placeholder((n, m), name='A')
+ k = te.reduce_axis((0, m), "k")
+ B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B")
+ s = te.create_schedule(B.op)
ko, ki = s[B].split(B.op.reduce_axis[0], factor=16)
BF = s.rfactor(B, ki)
xo, xi = s[B].split(s[B].op.axis[0], factor=32)
@unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..")
def test_rocm_inf_nan():
def check_inf_nan(ctx, n, value, dtype):
- A = tvm.placeholder((n,), name='A', dtype=dtype)
- inf_value = tvm.const(value, dtype=dtype)
- C = tvm.compute((n,), lambda i: inf_value, name='C')
- s = tvm.create_schedule(C.op)
+ A = te.placeholder((n,), name='A', dtype=dtype)
+ inf_value = tvm.tir.const(value, dtype=dtype)
+ C = te.compute((n,), lambda i: inf_value, name='C')
+ s = te.create_schedule(C.op)
s[C].bind(s[C].op.axis[0], tx)
fun = tvm.build(s, [A, C], "rocm")
a = tvm.nd.empty((n,), A.dtype, ctx)
@unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..")
def test_rocm_reducition_binding():
- k = tvm.reduce_axis((0, 32), 'k')
- A = tvm.placeholder((96, 32), name='A')
- B = tvm.compute( (96,), lambda m:
- tvm.sum(A[m, k], axis=k),
+ k = te.reduce_axis((0, 32), 'k')
+ A = te.placeholder((96, 32), name='A')
+ B = te.compute( (96,), lambda m:
+ te.sum(A[m, k], axis=k),
name='B')
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
s[B].reorder(B.op.reduce_axis[0], B.op.axis[0])
def test_rocm_copy():
def check_rocm(dtype, n):
- A = tvm.placeholder((n,), name='A', dtype=dtype)
+ A = te.placeholder((n,), name='A', dtype=dtype)
ctx = tvm.rocm(0)
a_np = np.random.uniform(size=(n,)).astype(A.dtype)
a = tvm.nd.empty((n,), A.dtype, ctx).copyfrom(a_np)
num_thread = 8
def check_rocm(dtype, n, lanes):
- A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
- B = tvm.compute((n,), lambda i: A[i]+tvm.const(1, A.dtype), name='B')
- s = tvm.create_schedule(B.op)
+ A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
+ B = te.compute((n,), lambda i: A[i]+tvm.tir.const(1, A.dtype), name='B')
+ s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=num_thread)
s[B].bind(xo, bx)
s[B].bind(xi, tx)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import ctypes
import numpy as np
def test_static_callback():
dtype = 'int64'
- n = tvm.size_var('n')
- Ab = tvm.decl_buffer((n, ), dtype)
- i = tvm.size_var('i')
- ib = tvm.ir_builder.create()
+ n = te.size_var('n')
+ Ab = tvm.tir.decl_buffer((n, ), dtype)
+ i = te.size_var('i')
+ ib = tvm.tir.ir_builder.create()
A = ib.buffer_ptr(Ab)
- cp = tvm.thread_axis((0, 1), "cop")
+ cp = te.thread_axis((0, 1), "cop")
finit = tvm.tir.StringImm("TVMBackendRunOnce")
ib.scope_attr(cp, "coproc_uop_scope", finit)
with ib.for_range(0, n, "i", for_type="parallel") as i:
A[i] = A[i] + 1
stmt = ib.get()
- fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
- fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+ fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
+ fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
f = tvm.target.codegen.build_module(fapi, "llvm")
a = tvm.nd.array(np.zeros(10, dtype=dtype))
f(a)
def test_static_init():
dtype = 'int64'
- n = tvm.size_var('n')
- Ab = tvm.decl_buffer((n, ), dtype)
- i = tvm.size_var('i')
- ib = tvm.ir_builder.create()
- handle = tvm.call_intrin("handle", "tvm_static_handle")
+ n = te.size_var('n')
+ Ab = tvm.tir.decl_buffer((n, ), dtype)
+ i = te.size_var('i')
+ ib = tvm.tir.ir_builder.create()
+ handle = tvm.tir.call_intrin("handle", "tvm_static_handle")
ib.emit(
- tvm.call_packed("test_static_callback", handle, Ab))
+ tvm.tir.call_packed("test_static_callback", handle, Ab))
@tvm.register_func("test_static_callback")
def test_cb(sh, A):
return sh
stmt = ib.get()
- fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
- fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+ fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
+ fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
f = tvm.target.codegen.build_module(fapi, "llvm")
a = tvm.nd.array(np.zeros(10, dtype=dtype))
f(a)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
def run_jit(fapi, check):
print(shape0)
assert shape0 == a.shape[0]
- n = tvm.size_var('n')
- Ab = tvm.decl_buffer((n, ), tvm.float32)
- stmt = tvm.tir.Evaluate(tvm.call_packed("tvm_call_back_get_shape", Ab.shape[0]))
- fapi = tvm.ir_pass.MakeAPI(stmt, "print_shape", [Ab], 0, True)
- fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
- fapi = tvm.ir_pass.LowerIntrin(fapi, "stackvm")
+ n = te.size_var('n')
+ Ab = tvm.tir.decl_buffer((n, ), "float32")
+ stmt = tvm.tir.Evaluate(tvm.tir.call_packed("tvm_call_back_get_shape", Ab.shape[0]))
+ fapi = tvm.tir.ir_pass.MakeAPI(stmt, "print_shape", [Ab], 0, True)
+ fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
+ fapi = tvm.tir.ir_pass.LowerIntrin(fapi, "stackvm")
run_jit(fapi, lambda f: f(a))
def test_stack_vm_loop():
dtype = 'int64'
- n = tvm.size_var('n')
- Ab = tvm.decl_buffer((n, ), dtype)
- i = tvm.size_var('i')
+ n = te.size_var('n')
+ Ab = tvm.tir.decl_buffer((n, ), dtype)
+ i = te.size_var('i')
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
A = ib.buffer_ptr(Ab)
with ib.for_range(0, n - 1, "i") as i:
A[i + 1] = A[i] + 1
- ib.emit(tvm.call_packed("tvm_stack_vm_print", i))
+ ib.emit(tvm.tir.call_packed("tvm_stack_vm_print", i))
stmt = ib.get()
- fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
- fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+ fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
+ fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
a = tvm.nd.array(np.zeros(10, dtype=dtype))
def check(f):
f(a)
def test_stack_vm_cond():
dtype = 'int64'
- n = tvm.size_var('n')
- Ab = tvm.decl_buffer((n, ), dtype)
+ n = te.size_var('n')
+ Ab = tvm.tir.decl_buffer((n, ), dtype)
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
A = ib.buffer_ptr(Ab)
with ib.for_range(0, n - 1, "i") as i:
with ib.if_scope(tvm.tir.EQ(i, 4)):
A[i + 1] = A[i] + 2
stmt = ib.get()
- fapi = tvm.ir_pass.MakeAPI(stmt, "test", [Ab], 0, True)
- fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+ fapi = tvm.tir.ir_pass.MakeAPI(stmt, "test", [Ab], 0, True)
+ fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
def check(f):
a = tvm.nd.array(np.zeros(10, dtype=dtype))
f(a)
def test_vm_parallel():
dtype = 'int64'
- n = tvm.size_var('n')
- Ab = tvm.decl_buffer((n, ), dtype)
- i = tvm.size_var('i')
- ib = tvm.ir_builder.create()
+ n = te.size_var('n')
+ Ab = tvm.tir.decl_buffer((n, ), dtype)
+ i = te.size_var('i')
+ ib = tvm.tir.ir_builder.create()
A = ib.buffer_ptr(Ab)
with ib.for_range(0, n, "i", for_type="parallel") as i:
A[i] = A[i] + 1
stmt = ib.get()
- fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
- fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+ fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
+ fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
def check(f):
a = tvm.nd.array(np.zeros(10, dtype=dtype))
f(a)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import re
import numpy as np
def check_correct_assembly(dtype):
n = (1024,)
- A = tvm.placeholder(n, dtype=dtype, name='A')
- B = tvm.compute(
+ A = te.placeholder(n, dtype=dtype, name='A')
+ B = te.compute(
A.shape,
lambda i: tvm.tir.Select(
- A[i] >= 0, A[i] + tvm.const(1, dtype),
- tvm.const(0, dtype)), name='B')
- s = tvm.create_schedule(B.op)
+ A[i] >= 0, A[i] + tvm.tir.const(1, dtype),
+ tvm.tir.const(0, dtype)), name='B')
+ s = te.create_schedule(B.op)
(bx, tx) = s[B].split(s[B].op.axis[0], factor=128)
(tx, vx) = s[B].split(tx, factor=4)
- s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[B].bind(bx, te.thread_axis("blockIdx.x"))
+ s[B].bind(tx, te.thread_axis("threadIdx.x"))
s[B].vectorize(vx)
f = tvm.build(s, [A, B], target)
check_correct_assembly('float16')
-tx = tvm.thread_axis("threadIdx.x")
-bx = tvm.thread_axis("blockIdx.x")
+tx = te.thread_axis("threadIdx.x")
+bx = te.thread_axis("blockIdx.x")
def test_vulkan_copy():
if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"):
print("skip because vulkan is not enabled..")
return
- A = tvm.placeholder((n,), name='A', dtype=dtype)
+ A = te.placeholder((n,), name='A', dtype=dtype)
ctx = tvm.vulkan(0)
a_np = np.random.uniform(size=(n,)).astype(A.dtype)
a = tvm.nd.empty((n,), A.dtype, ctx).copyfrom(a_np)
if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"):
print("skip because vulkan is not enabled..")
return
- A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
- B = tvm.compute((n,), lambda i: A[i]+tvm.const(1, A.dtype), name='B')
- s = tvm.create_schedule(B.op)
+ A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
+ B = te.compute((n,), lambda i: A[i]+tvm.tir.const(1, A.dtype), name='B')
+ s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=num_thread)
s[B].bind(xo, bx)
s[B].bind(xi, tx)
if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"):
print("skip because vulkan is not enabled..")
return
- A = tvm.placeholder((n,), name='A', dtype="float32")
- B = tvm.placeholder((n,), name='B', dtype="float32")
+ A = te.placeholder((n,), name='A', dtype="float32")
+ B = te.placeholder((n,), name='B', dtype="float32")
functions = [
- (lambda: tvm.compute((n,), lambda i: 2 * A[i] + 3 * B[i]),
+ (lambda: te.compute((n,), lambda i: 2 * A[i] + 3 * B[i]),
lambda a, b: 2 * a + 3 * b),
- (lambda: tvm.compute((n,), lambda i: A[i]+B[i]),
+ (lambda: te.compute((n,), lambda i: A[i]+B[i]),
lambda a, b: a + b),
- (lambda: tvm.compute((n,), lambda i: A[i]+2 * B[i]),
+ (lambda: te.compute((n,), lambda i: A[i]+2 * B[i]),
lambda a, b: a + 2 * b),
]
def build_f(f_ref):
(C_f, ref) = f_ref
C = C_f()
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], factor=num_thread)
s[C].bind(xo, bx)
s[C].bind(xi, tx)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import re
def fp16_to_fp32(target, width, match=None, not_match=None):
elements = 64
- n = tvm.convert(elements)
- A = tvm.placeholder((n, width), dtype="float16", name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i).astype("float32"), name='B')
- s = tvm.create_schedule(B.op)
+ n = tvm.runtime.convert(elements)
+ A = te.placeholder((n, width), dtype="float16", name='A')
+ B = te.compute(A.shape, lambda *i: A(*i).astype("float32"), name='B')
+ s = te.create_schedule(B.op)
s[B].vectorize(s[B].op.axis[1])
f = tvm.build(s, [A, B], target)
import numpy as np
import tvm
+from tvm import te
from tvm import nd, relay
from tvm.runtime import container as _container
# under the License.
import tvm
+from tvm import te
from ctypes import *
import topi
import tvm.tir.ir_pass as ir_pass
return tvm.build(flist[0], target=tgt)
def test_bfloat_add_and_cast_1():
- X = tvm.placeholder((3, ), name="X")
- Y = tvm.placeholder((3, ), name="Y")
+ X = te.placeholder((3, ), name="X")
+ Y = te.placeholder((3, ), name="Y")
Z = topi.cast(
topi.cast(X, dtype="custom[bfloat]16") +
topi.cast(Y, dtype="custom[bfloat]16"),
dtype="float")
- s = tvm.create_schedule([Z.op])
+ s = te.create_schedule([Z.op])
built_cast = lower_datatypes_and_build(s, [X,Y,Z])
ctx = tvm.context(tgt, 0)
def test_bfloat_add_and_cast_2():
- X = tvm.placeholder((3, ), name="X")
- Y = tvm.placeholder((3, ), name="Y")
+ X = te.placeholder((3, ), name="X")
+ Y = te.placeholder((3, ), name="Y")
Z = topi.cast(
topi.cast(X, dtype="custom[bfloat]16") +
topi.cast(Y, dtype="custom[bfloat]16"),
dtype="float")
- s = tvm.create_schedule([Z.op])
+ s = te.create_schedule([Z.op])
built_cast = lower_datatypes_and_build(s, [X,Y,Z])
ctx = tvm.context(tgt, 0)
def test_bfloat_add_and_cast_FloatImm():
- X = tvm.placeholder((3, ), name="X")
+ X = te.placeholder((3, ), name="X")
Z = topi.cast(
topi.add(
topi.cast(X, dtype="custom[bfloat]16"),
tvm.tir.FloatImm("custom[bfloat]16", 1.5)),
dtype="float")
- s = tvm.create_schedule([Z.op])
+ s = te.create_schedule([Z.op])
built_cast = lower_datatypes_and_build(s, [X,Z])
ctx = tvm.context(tgt, 0)
import copy
import numpy as np
import tvm
+from tvm import te
import tvm.relay.testing
from tvm import autotvm
records.append((ms_input, ms_output))
ltf_records = []
- ltf_arg = [tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"]
+ ltf_arg = [te.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"]
ltf_task = autotvm.task.create('layout_transform', ltf_arg, target)
ms_input = MeasureInput(target=target, task=ltf_task, config=None)
ms_output = MeasureResult(costs=(1.91224744e-05,), error_no=0, all_cost=-1, timestamp=-1)
ltf_records.append((ms_input, ms_output))
ltf_keys = []
- ltf_arg = [tvm.placeholder((1, 4, 8, 8, 4), dtype=dtype), "NCHW4c", "NCHW8c"]
+ ltf_arg = [te.placeholder((1, 4, 8, 8, 4), dtype=dtype), "NCHW4c", "NCHW8c"]
ltf_wkl = autotvm.task.args_to_workload(ltf_arg, 'layout_transform')
ltf_keys.append(ltf_wkl)
- ltf_arg = [tvm.placeholder((1, 1, 8, 8, 32), dtype=dtype), "NCHW32c", "NCHW4c"]
+ ltf_arg = [te.placeholder((1, 1, 8, 8, 32), dtype=dtype), "NCHW32c", "NCHW4c"]
ltf_wkl = autotvm.task.args_to_workload(ltf_arg, 'layout_transform')
ltf_keys.append(ltf_wkl)
- ltf_arg = [tvm.placeholder((1, 4, 8, 8, 8), dtype=dtype), "NCHW8c", "NCHW32c"]
+ ltf_arg = [te.placeholder((1, 4, 8, 8, 8), dtype=dtype), "NCHW8c", "NCHW32c"]
ltf_wkl = autotvm.task.args_to_workload(ltf_arg, 'layout_transform')
ltf_keys.append(ltf_wkl)
records.append((ms_input, ms_output))
ltf_records = []
- ltf_arg = [tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"]
+ ltf_arg = [te.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"]
ltf_task = autotvm.task.create('layout_transform', ltf_arg, target)
ms_input = MeasureInput(target=target, task=ltf_task, config=None)
ms_output = MeasureResult(costs=(1.91224744e-05,), error_no=0, all_cost=-1, timestamp=-1)
records.append((ms_input, ms_output))
ltf_records = []
- ltf_arg = [tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"]
+ ltf_arg = [te.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"]
ltf_task = autotvm.task.create('layout_transform', ltf_arg, target)
ms_input = MeasureInput(target=target, task=ltf_task, config=None)
ms_output = MeasureResult(costs=(1.91224744e-05,), error_no=0, all_cost=-1, timestamp=-1)
records.append((ms_input, ms_output))
ltf_records = []
- ltf_arg = [tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"]
+ ltf_arg = [te.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"]
ltf_task = autotvm.task.create('layout_transform', ltf_arg, target)
ms_input = MeasureInput(target=target, task=ltf_task, config=None)
ms_output = MeasureResult(costs=(1.91224744e-05,), error_no=0, all_cost=-1, timestamp=-1)
# https://github.com/apache/incubator-tvm/issues/3240
# TODO: restore the file name after this issue is resolved.
import tvm
+from tvm import te
from tvm import autotvm, relay
from tvm.relay.testing import resnet
# specific language governing permissions and limitations
# under the License.
import tvm, inspect, sys, traceback, numpy, pytest, types, os
+
+from tvm import te
from tvm.contrib import util
from tvm.hybrid import script
from tvm.hybrid.runtime import HYBRID_GLOBALS
@pytest.mark.skip
def run_and_check(func, args, var_dict={}, target='llvm', sch=None, outs=None):
def tvm_val_2_py_val(val):
- val = tvm.ir_pass.Substitute(val, var_dict)
- val = tvm.ir_pass.Simplify(val)
+ val = tvm.tir.ir_pass.Substitute(val, var_dict)
+ val = tvm.tir.ir_pass.Simplify(val)
assert isinstance(val, (tvm.tir.IntImm,))
return val.value
op = None
if sch is None:
- outs = func(*tuple(tvm.convert(i) if isinstance(i, list) else i for i in args))
+ outs = func(*tuple(tvm.runtime.convert(i) if isinstance(i, list) else i for i in args))
op = outs[0].op if isinstance(outs, list) else outs.op
- sch = tvm.create_schedule(op)
+ sch = te.create_schedule(op)
else:
assert outs is not None
assert isinstance(outs, list)
emu_args = []
nd_args = []
for i in args:
- if isinstance(i, tvm.tensor.Tensor):
+ if isinstance(i, te.tensor.Tensor):
shape = [tvm_val_2_py_val(j) for j in i.shape]
emu_args.append(numpy.random.randn(*shape).astype(i.dtype))
nd_args.append(tvm.nd.array(emu_args[-1], ctx))
assert isinstance(i, list)
emu_args.append(numpy.array(i))
- compile_args = [i for i in args if isinstance(i, (tvm.tensor.Tensor, tvm.tir.Var))] + \
+ compile_args = [i for i in args if isinstance(i, (te.tensor.Tensor, tvm.tir.Var))] + \
(outs if isinstance(outs, list) else [outs])
module = tvm.build(sch,
compile_args,
for nd, np in zip(out_tensors, ref_data):
tvm.testing.assert_allclose(nd.asnumpy(), np, rtol=1e-5, atol=1e-5)
- module_args = [i for i in args if isinstance(i, (tvm.tensor.Tensor, tvm.tir.Var))]
- module_outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
+ module_args = [i for i in args if isinstance(i, (te.tensor.Tensor, tvm.tir.Var))]
+ module_outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
h_module = tvm.hybrid.build(sch, module_args, module_outs)
return h_module, module_args, module_outs
#Test global function
#Test bridge between frontend and backend
def test_outer_product():
- n = tvm.size_var('n')
- m = tvm.size_var('m')
- a = tvm.placeholder((n, ), name='a')
- b = tvm.placeholder((m, ), name='b')
+ n = te.size_var('n')
+ m = te.size_var('m')
+ a = te.placeholder((n, ), name='a')
+ b = te.placeholder((m, ), name='b')
try:
c = outer_product(n, m, a, b)
b[i] = sigma
return b
- n = tvm.size_var('n')
- a = tvm.placeholder((n, ), 'float32', name='a')
+ n = te.size_var('n')
+ a = te.placeholder((n, ), 'float32', name='a')
try:
b = fanout(n, a)
ir = b.op.body
assert isinstance(ir, tvm.tir.For)
assert ir.loop_var.name == 'i'
assert ir.min.value == 0
- assert tvm.ir_pass.Equal(ir.extent, n - 3)
+ assert tvm.tir.ir_pass.Equal(ir.extent, n - 3)
#Check loopbody
ibody = ir.body
assert isinstance(ibody, tvm.tir.AttrStmt)
assert value.a.args[0].value == 0
assert value.b.name == 'a'
assert len(value.b.args) == 1
- assert tvm.ir_pass.Equal(value.b.args[0], ir.loop_var + jloop.loop_var)
+ assert tvm.tir.ir_pass.Equal(value.b.args[0], ir.loop_var + jloop.loop_var)
divide= rbody[2]
assert isinstance(divide, tvm.tir.Provide)
assert len(divide.args) == 1
f[k] = c[k]
return d, e, f
- a = tvm.placeholder((16, ), name='a', dtype='int32')
- b = tvm.placeholder((16, ), name='b', dtype='int32')
- c = tvm.placeholder((16, ), name='c', dtype='int32')
+ a = te.placeholder((16, ), name='a', dtype='int32')
+ b = te.placeholder((16, ), name='b', dtype='int32')
+ c = te.placeholder((16, ), name='c', dtype='int32')
try:
d, e, f = looptype(a, b, c)
ir = d.op.body
b[i] = -1 if i % 2 == 0 else 1
return b, c
- a = tvm.placeholder((10, ), dtype='int32', name='a')
+ a = te.placeholder((10, ), dtype='int32', name='a')
func, ins, outs = run_and_check(if_then_else, [a])
run_and_check(func, ins, outs=outs)
c[tx] = a[tx] + b[tx]
return c
- a = tvm.placeholder((1000, ), dtype='float32', name='a')
- b = tvm.placeholder((1000, ), dtype='float32', name='b')
+ a = te.placeholder((1000, ), dtype='float32', name='a')
+ b = te.placeholder((1000, ), dtype='float32', name='b')
func, ins, outs = run_and_check(vec_add, [a, b], target='cuda')
run_and_check(func, ins, outs=outs, target='cuda')
return c
c = raw(a, b)
- sch = tvm.create_schedule(c.op)
- x = tvm.thread_axis('threadIdx.x')
+ sch = te.create_schedule(c.op)
+ x = te.thread_axis('threadIdx.x')
sch[c].bind(c.op.axis[0], x)
func, ins, outs = run_and_check(raw, [a, b], sch=sch, outs=[c], target='cuda')
run_and_check(func, ins, outs=outs, target='cuda')
return c
- a = tvm.placeholder((8, 4), 'float32')
+ a = te.placeholder((8, 4), 'float32')
c = foo(a)
- s = tvm.create_schedule(c.op)
+ s = te.create_schedule(c.op)
ir = tvm.lower(s, [a, c], simple_mode=True)
assert not isinstance(ir, tvm.tir.AttrStmt)
func, ins, outs = run_and_check(foo, [a], target='cuda')
b[i * m + j] = a[i * m + j] + a[i * m + j]
return b
- a = tvm.placeholder((10000, ), 'float32')
+ a = te.placeholder((10000, ), 'float32')
with tvm.target.create('cuda'):
func, ins, outs = run_and_check(max_threads, [a], target='cuda')
run_and_check(func, ins, outs=outs, target='cuda')
b[7] = max(a[5], a[6])
return b
- a8 = tvm.placeholder((8, ), dtype='float32', name='a')
+ a8 = te.placeholder((8, ), dtype='float32', name='a')
b8 = intrin_real(a8)
- sch = tvm.create_schedule(b8.op)
+ sch = te.create_schedule(b8.op)
func = tvm.build(sch, [a8, b8])
assert func
a = numpy.arange(2, 10).astype('float32')
b[0] = popcount(a[0])
return b
- a1 = tvm.placeholder((1, ), dtype='int32')
+ a1 = te.placeholder((1, ), dtype='int32')
b1 = intrin_int(a1)
- sch = tvm.create_schedule(b1.op)
+ sch = te.create_schedule(b1.op)
func = tvm.build(sch, [a1, b1])
assert func
a = numpy.array([114514]).astype('int32')
b[i-2, j-2] = s / 9.0
return b
- a = tvm.placeholder((32, 32), 'float32', 'a')
+ a = te.placeholder((32, 32), 'float32', 'a')
func, ins, outs = run_and_check(blur, [a])
run_and_check(func, ins, outs=outs)
c[i, j] = a[i] * b[j]
return c
- a = tvm.placeholder((10, ), dtype='float32', name='a')
- b = tvm.placeholder((10, ), dtype='float32', name='b')
+ a = te.placeholder((10, ), dtype='float32', name='a')
+ b = te.placeholder((10, ), dtype='float32', name='b')
func, ins, outs = run_and_check(triangle, [a, b])
run_and_check(func, ins, outs=outs)
b[i, j] = (ha[0, j] + ha[1, j] + ha[2, j]) / 9.0
return b
- a = tvm.placeholder((32, 32), 'float32', 'a')
+ a = te.placeholder((32, 32), 'float32', 'a')
b = blur2d(a)
- sch = tvm.create_schedule(b.op)
+ sch = te.create_schedule(b.op)
func, ins, outs = run_and_check(blur2d, [a])
run_and_check(func, ins, outs=outs)
c[i] = shared[i] + local[i]
return c
- a = tvm.placeholder((256, ), dtype='float32', name='a')
- b = tvm.placeholder((256, ), dtype='float32', name='b')
+ a = te.placeholder((256, ), dtype='float32', name='a')
+ b = te.placeholder((256, ), dtype='float32', name='b')
c = share_vec_add(a, b)
func, ins, outs = run_and_check(share_vec_add, [a, b], target='cuda')
run_and_check(func, ins, outs=outs, target='cuda')
b[i] = a[i] * i
return b
- a = tvm.placeholder((20, ), 'float32')
- b = tvm.placeholder((20, ), 'float32')
- c = tvm.compute((20, ), lambda x: a[x] + b[x])
+ a = te.placeholder((20, ), 'float32')
+ b = te.placeholder((20, ), 'float32')
+ c = te.compute((20, ), lambda x: a[x] + b[x])
d = upstream(c)
- sch = tvm.create_schedule([c.op, d.op])
+ sch = te.create_schedule([c.op, d.op])
ir = tvm.lower(sch, [a, b, d], simple_mode=True)
func = tvm.build(sch, [a, b, d])
assert(func)
return b
- a = tvm.placeholder((20, ), 'float32')
+ a = te.placeholder((20, ), 'float32')
b = downstream(a)
- c = tvm.compute((20, ), lambda x: b[x] + 1.0)
+ c = te.compute((20, ), lambda x: b[x] + 1.0)
- sch = tvm.create_schedule(c.op)
+ sch = te.create_schedule(c.op)
module = tvm.build(sch, [a, c])
assert module
c[i] = a[i] + b
return c
- a = tvm.placeholder((11, ), dtype='int32', name='a')
- b = tvm.const(11, 'int32')
+ a = te.placeholder((11, ), dtype='int32', name='a')
+ b = tvm.tir.const(11, 'int32')
c = add_something(a, b)
- sch = tvm.create_schedule(c.op)
+ sch = te.create_schedule(c.op)
module = tvm.build(sch, [a, c], 'llvm')
assert(module)
c[i, j] = a[i * 4 + j] * b[i, j]
return c
- a = tvm.placeholder((16, ), 'int32')
+ a = te.placeholder((16, ), 'int32')
b, c = kernel_a(a)
d = kernel_b(c, b)
- sch = tvm.create_schedule(d.op)
+ sch = te.create_schedule(d.op)
module = tvm.build(sch, [a, d])
assert module
d[i, j] = c[i, j] + i * j
return d
- a = tvm.placeholder((10, ), name='a')
- b = tvm.placeholder((10, ), name='b')
+ a = te.placeholder((10, ), name='a')
+ b = te.placeholder((10, ), name='b')
func, ins, outs = run_and_check(foo, [a, b])
run_and_check(func, ins, outs=outs)
else:
b[i] = 0.0
return b
- a = tvm.placeholder((10, ), name='a')
+ a = te.placeholder((10, ), name='a')
func, ins, outs = run_and_check(foo, [a])
run_and_check(func, ins, outs=outs)
return c, d
- a = tvm.placeholder((2, 5), name='a', dtype='float32')
+ a = te.placeholder((2, 5), name='a', dtype='float32')
b = [[1, 2, 3, 4, 5], [5, 4, 3, 2, 1]]
func, ins, outs = run_and_check(foo, [a, b])
run_and_check(func, ins, outs=outs)
else:
c[i - len_b] = a[i - len_b] + b[i - len_b]
return c
- a = tvm.placeholder((5, ), name='a', dtype='int32')
+ a = te.placeholder((5, ), name='a', dtype='int32')
b = [1, 2, 3, 4, 5]
- c = goo(a, tvm.convert(b))
- sch = tvm.create_schedule(c.op)
+ c = goo(a, tvm.runtime.convert(b))
+ sch = te.create_schedule(c.op)
func, ins, outs = run_and_check(goo, [a, b])
run_and_check(func, ins, outs=outs)
d += a[i] + b[j]
c[i] = d
return c
- a = tvm.placeholder((5, ), name='a', dtype='int32')
+ a = te.placeholder((5, ), name='a', dtype='int32')
b = [1, 2, 3, 4, 5]
func, ins, outs = run_and_check(hoo, [a, b])
run_and_check(func, ins, outs=outs)
for j in range(64):
c[i, j] = a[i] * b[j]
return c
- a = tvm.placeholder((64,), name='a', dtype='float32')
- b = tvm.placeholder((64,), name='b', dtype='float32')
+ a = te.placeholder((64,), name='a', dtype='float32')
+ b = te.placeholder((64,), name='b', dtype='float32')
c = outer_product(a, b)
# Test perfect loop split
# Test loop reorder
# Test loop annotation
- sch = tvm.create_schedule(c.op)
+ sch = te.create_schedule(c.op)
i, j = c.op.axis
io, ii = sch[c].split(i, 4)
sch[c].parallel(ii)
run_and_check(func, ins, outs=outs)
# Test fuse
- sch = tvm.create_schedule(c.op)
+ sch = te.create_schedule(c.op)
sch[c].fuse(c.op.axis[0], c.op.axis[1])
ir = tvm.lower(sch, [a, b, c], simple_mode=True)
assert isinstance(ir, tvm.tir.ProducerConsumer)
run_and_check(func, ins, outs=outs)
# Test imperfect loop split
- sch = tvm.create_schedule(c.op)
+ sch = te.create_schedule(c.op)
sch[c].split(c.op.axis[0], 3)
ir = tvm.lower(sch, [a, b, c], simple_mode=True)
func, ins, outs = run_and_check(outer_product, [a, b], sch=sch, outs=[c])
c[i] = a[i] + constant_list[1][const_value]
return c
- a = tvm.placeholder((n, ), dtype='int32', name='a')
+ a = te.placeholder((n, ), dtype='int32', name='a')
func, ins, outs = run_and_check(add_something, [a])
run_and_check(func, ins, outs=outs)
n = 5
inputs = []
for i in range(n):
- inputs.append(tvm.placeholder((10,), name='t%s' % i, dtype='float32'))
+ inputs.append(te.placeholder((10,), name='t%s' % i, dtype='float32'))
- out = sum_array(tvm.convert(inputs))
+ out = sum_array(tvm.runtime.convert(inputs))
assert len(out.op.inputs) == n
- sch = tvm.create_schedule(out.op)
+ sch = te.create_schedule(out.op)
mod = tvm.build(sch, inputs + [out], target='llvm')
assert mod
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
def test_for():
- ib = tvm.ir_builder.create()
- n = tvm.size_var("n")
+ ib = tvm.tir.ir_builder.create()
+ n = te.size_var("n")
A = ib.allocate("float32", n, name="A", scope="global")
with ib.for_range(0, n, name="i") as i:
A[i] = A[i] + 1
assert isinstance(body[1], tvm.tir.For)
def test_if():
- ib = tvm.ir_builder.create()
- n = tvm.size_var("n")
+ ib = tvm.tir.ir_builder.create()
+ n = te.size_var("n")
A = ib.pointer("float32", name="A")
- tmod = tvm.truncmod
+ tmod = tvm.tir.truncmod
with ib.for_range(0, n, name="i") as i:
with ib.if_scope(tmod(i, 2) == 0):
A[i] = A[i] + 1
assert body.else_case.index.value == 0
def test_prefetch():
- A = tvm.placeholder((10, 20), name="A")
- ib = tvm.ir_builder.create()
- n = tvm.size_var("n")
+ A = te.placeholder((10, 20), name="A")
+ ib = tvm.tir.ir_builder.create()
+ n = te.size_var("n")
with ib.for_range(0, n, name="i") as i:
ib.emit(
def test_cpu():
n = 1024
dtype = "float32"
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
def test_device_ir(A, B, C):
n = A.shape[0]
max_threads = 8
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
Aptr = ib.buffer_ptr(A)
Bptr = ib.buffer_ptr(B)
Cptr = ib.buffer_ptr(C)
Cptr[i] = Aptr[i] + Bptr[i]
body = ib.get()
return body
- C = tvm.extern(A.shape, [A, B], lambda ins, outs: test_device_ir(ins[0], ins[1], outs[0]),
+ C = te.extern(A.shape, [A, B], lambda ins, outs: test_device_ir(ins[0], ins[1], outs[0]),
name="vector_add", dtype=dtype)
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
def check_target(target):
if not tvm.runtime.enabled(target):
return
check_target("llvm")
def test_gpu():
- n = tvm.size_var('n')
+ n = te.size_var('n')
dtype = "float32"
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- idxd = tvm.indexdiv
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ idxd = tvm.tir.indexdiv
def test_device_ir(A, B, C):
n = A.shape[0]
max_threads = 32
- ib = tvm.ir_builder.create()
- bx = tvm.thread_axis("blockIdx.x")
- tx = tvm.thread_axis("threadIdx.x")
+ ib = tvm.tir.ir_builder.create()
+ bx = te.thread_axis("blockIdx.x")
+ tx = te.thread_axis("threadIdx.x")
ib.scope_attr(bx, "thread_extent", idxd(n+max_threads-1, max_threads))
ib.scope_attr(tx, "thread_extent", max_threads)
idx = bx.var * max_threads + tx.var
Cptr[idx] = Aptr[idx] + Bptr[idx]
body = ib.get()
return body
- C = tvm.extern(A.shape, [A, B], lambda ins, outs: test_device_ir(ins[0], ins[1], outs[0]),
+ C = te.extern(A.shape, [A, B], lambda ins, outs: test_device_ir(ins[0], ins[1], outs[0]),
name="vector_add", dtype=dtype)
- s = tvm.create_schedule(C.op)
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ s = te.create_schedule(C.op)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def check_target(target):
n = 1024
if not tvm.runtime.enabled(target):
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
def test_const():
- x = tvm.const(1, "int32")
+ x = tvm.tir.const(1, "int32")
print(x.dtype)
- assert x.dtype == tvm.int32
+ assert x.dtype == "int32"
assert isinstance(x, tvm.tir.IntImm)
for data in [True, np.bool(1), np.uint8(1), np.uint16(1), np.uint32(1), np.uint64(1),
np.int8(1), np.int16(1), np.int32(1), np.int64(1),
np.float16(1), np.float32(1), np.float64(1)]:
- assert tvm.const(data).dtype == str(np.array(data).dtype)
- assert tvm.const(1).dtype == 'int32'
- assert tvm.const(1.0).dtype == 'float32'
+ assert tvm.tir.const(data).dtype == str(np.array(data).dtype)
+ assert tvm.tir.const(1).dtype == 'int32'
+ assert tvm.tir.const(1.0).dtype == 'float32'
for data in [True, np.bool(1), np.uint8(1), np.uint16(1), np.uint32(1), np.uint64(1),
np.int8(1), np.int16(1), np.int32(1), np.int64(1),
np.float16(1), np.float32(1), np.float64(1)]:
- assert tvm.convert(data).dtype == str(np.array(data).dtype)
- assert tvm.convert(1).dtype == 'int32'
- assert tvm.convert(1.0).dtype == 'float32'
+ assert tvm.runtime.convert(data).dtype == str(np.array(data).dtype)
+ assert tvm.runtime.convert(1).dtype == 'int32'
+ assert tvm.runtime.convert(1.0).dtype == 'float32'
def test_make():
- x = tvm.const(1, "int32")
- y = tvm.var("x")
+ x = tvm.tir.const(1, "int32")
+ y = te.var("x")
z = x + y
- assert isinstance(tvm.max(x, y), tvm.tir.Max)
- assert isinstance(tvm.min(x, y), tvm.tir.Min)
+ assert isinstance(tvm.te.max(x, y), tvm.tir.Max)
+ assert isinstance(tvm.te.min(x, y), tvm.tir.Min)
def test_ir():
- x = tvm.const(1, "int32")
+ x = tvm.tir.const(1, "int32")
y = tvm.tir.IntImm('int32', 1)
z = x + y
stmt = tvm.tir.Evaluate(z)
def test_ir2():
- x = tvm.var("n")
- a = tvm.var("array", tvm.handle)
+ x = te.var("n")
+ a = te.var("array", "handle")
st = tvm.tir.Store(a, x + 1, 1)
assert isinstance(st, tvm.tir.Store)
assert(st.buffer_var == a)
def test_let():
- x = tvm.var('x')
- y = tvm.var('y')
+ x = te.var('x')
+ y = te.var('y')
stmt = tvm.tir.LetStmt(
x, 10, tvm.tir.Evaluate(x + 1));
def test_cast():
- x = tvm.var('x', dtype="float32")
+ x = te.var('x', dtype="float32")
y = x.astype("int32")
z = x.astype("float32x4")
assert isinstance(y, tvm.tir.Cast)
def test_attr():
- x = tvm.var('x')
- y = tvm.var('y')
+ x = te.var('x')
+ y = te.var('y')
stmt = tvm.tir.AttrStmt(
y, "stride", 10, tvm.tir.Evaluate(x + 1));
assert stmt.node == y
- a = tvm.convert(1)
+ a = tvm.runtime.convert(1)
assert a.value == 1
try:
a.no_field
def test_basic():
- a = tvm.var('a')
- b = tvm.var('b')
+ a = te.var('a')
+ b = te.var('b')
c = a + b
assert str(c) == '(%s + %s)' % (a.name, b.name)
def test_stmt():
x = tvm.tir.Evaluate(0)
- tvm.tir.For(tvm.var('i'), 0, 1,
+ tvm.tir.For(te.var('i'), 0, 1,
tvm.tir.For.Serial, 0,
x)
def test_dir():
- x = tvm.var('x')
+ x = te.var('x')
dir(x)
def test_dtype():
- x = tvm.var('x')
+ x = te.var('x')
assert x.dtype == 'int32'
- y = tvm.var('y')
+ y = te.var('y')
assert (x > y).dtype == 'bool'
def test_any():
- x = tvm.var('x')
- y = tvm.var('y')
- z = tvm.var('z')
+ x = te.var('x')
+ y = te.var('y')
+ z = te.var('z')
try:
t = x or x
assert False
except ValueError:
pass
try:
- tvm.any()
+ tvm.tir.any()
assert False
except ValueError:
pass
- assert str(tvm.any(x < y)) == '(%s < %s)' % (x.name, y.name)
- assert str(tvm.any(x < y, x > z)) == '((%s < %s) || (%s > %s))' % (
+ assert str(tvm.tir.any(x < y)) == '(%s < %s)' % (x.name, y.name)
+ assert str(tvm.tir.any(x < y, x > z)) == '((%s < %s) || (%s > %s))' % (
x.name, y.name, x.name, z.name)
- assert str(tvm.any(x < y, y > z + 1, x < z * 2)) == \
+ assert str(tvm.tir.any(x < y, y > z + 1, x < z * 2)) == \
'(((%s < %s) || (%s > (%s + 1))) || (%s < (%s*2)))' % (
x.name, y.name, y.name, z.name, x.name, z.name)
def test_all():
- x = tvm.var('x')
- y = tvm.var('y')
- z = tvm.var('z')
+ x = te.var('x')
+ y = te.var('y')
+ z = te.var('z')
try:
t = x and x
assert False
except ValueError:
pass
try:
- tvm.all()
+ tvm.tir.all()
assert False
except ValueError:
pass
- assert str(tvm.all(x < y)) == '(%s < %s)' % (x.name, y.name)
- assert str(tvm.all(x < y, x > z)) == '((%s < %s) && (%s > %s))' % (
+ assert str(tvm.tir.all(x < y)) == '(%s < %s)' % (x.name, y.name)
+ assert str(tvm.tir.all(x < y, x > z)) == '((%s < %s) && (%s > %s))' % (
x.name, y.name, x.name, z.name)
- assert str(tvm.all(x < y, y > z + 1, x < z * 2)) == \
+ assert str(tvm.tir.all(x < y, y > z + 1, x < z * 2)) == \
'(((%s < %s) && (%s > (%s + 1))) && (%s < (%s*2)))' % (
x.name, y.name, y.name, z.name, x.name, z.name)
def test_bitwise():
- x = tvm.var('x')
- y = tvm.var('y')
+ x = te.var('x')
+ y = te.var('y')
assert str(x << y) == 'shift_left(x, y)'
assert str(x >> y) == 'shift_right(x, y)'
assert str(x & y) == 'bitwise_and(x, y)'
assert str(10 << x) == 'shift_left(10, x)'
assert str(10 % x) == 'floormod(10, x)'
assert str(~x) == 'bitwise_not(x)'
- assert(tvm.const(1, "int8x2") >> 1).dtype == "int8x2"
- assert(x >> tvm.const(1, "int32x2")).dtype == "int32x2"
- assert(tvm.var("z", "int8x2") << tvm.const(1, "int8x2")).dtype == "int8x2"
+ assert(tvm.tir.const(1, "int8x2") >> 1).dtype == "int8x2"
+ assert(x >> tvm.tir.const(1, "int32x2")).dtype == "int32x2"
+ assert(te.var("z", "int8x2") << tvm.tir.const(1, "int8x2")).dtype == "int8x2"
def test_float_bitwise():
- t = tvm.const(1.5,dtype='float32')
+ t = tvm.tir.const(1.5,dtype='float32')
for test in [lambda lhs, rhs : lhs << rhs,
lambda lhs, rhs : lhs >> rhs,
lambda lhs, rhs : lhs | rhs,
pass
def test_isnan():
- x = tvm.var('x', 'float32')
- assert str(tvm.isnan(x)) == 'isnan(x)'
- assert str(tvm.isnan(x).dtype) == 'bool'
- y = tvm.var('y', 'float16')
- assert str(tvm.isnan(y)) == 'isnan(float32(y))'
- z = tvm.var('z', 'int32')
- assert str(tvm.isnan(z)) == '(bool)0'
- k = tvm.var('k', 'int8x2')
- assert str(tvm.isnan(k).dtype) == 'uint1x2'
+ x = te.var('x', 'float32')
+ assert str(tvm.tir.isnan(x)) == 'isnan(x)'
+ assert str(tvm.tir.isnan(x).dtype) == 'bool'
+ y = te.var('y', 'float16')
+ assert str(tvm.tir.isnan(y)) == 'isnan(float32(y))'
+ z = te.var('z', 'int32')
+ assert str(tvm.tir.isnan(z)) == '(bool)0'
+ k = te.var('k', 'int8x2')
+ assert str(tvm.tir.isnan(k).dtype) == 'uint1x2'
def test_equality():
- a = tvm.var('a')
- b = tvm.var('b')
+ a = te.var('a')
+ b = te.var('b')
c = (a == b)
assert not c
d = (c != c)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm.tir import Buffer
import numpy as np
def test_buffer():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- l = tvm.size_var('l')
- Ab = tvm.decl_buffer((m, n), tvm.float32)
- Bb = tvm.decl_buffer((n, l), tvm.float32)
+ m = te.size_var('m')
+ n = te.size_var('n')
+ l = te.size_var('l')
+ Ab = tvm.tir.decl_buffer((m, n), "float32")
+ Bb = tvm.tir.decl_buffer((n, l), "float32")
assert isinstance(Ab, tvm.tir.Buffer)
- assert Ab.dtype == tvm.float32
+ assert Ab.dtype == "float32"
assert tuple(Ab.shape) == (m, n)
def test_buffer_access_ptr():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- Ab = tvm.decl_buffer((m, n), tvm.float32, strides=[n + 1 , 1])
+ m = te.size_var('m')
+ n = te.size_var('n')
+ Ab = tvm.tir.decl_buffer((m, n), "float32", strides=[n + 1 , 1])
aptr = Ab.access_ptr("rw")
- assert tvm.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m)
+ assert tvm.tir.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m)
assert aptr.args[0].dtype == Ab.dtype
assert aptr.args[4].value == Buffer.READ | Buffer.WRITE
aptr = Ab.access_ptr("w")
def test_buffer_access_ptr_offset():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- Ab = tvm.decl_buffer((m, n), tvm.float32)
+ m = te.size_var('m')
+ n = te.size_var('n')
+ Ab = tvm.tir.decl_buffer((m, n), "float32")
aptr = Ab.access_ptr("rw", offset=100)
- offset = tvm.ir_pass.Simplify(aptr.args[2])
- assert tvm.ir_pass.Equal(offset, 100)
+ offset = tvm.tir.ir_pass.Simplify(aptr.args[2])
+ assert tvm.tir.ir_pass.Equal(offset, 100)
assert aptr.args[4].value == Buffer.READ | Buffer.WRITE
- v = tvm.size_var('int32')
+ v = te.size_var('int32')
aptr = Ab.access_ptr("rw", offset=100 + 100 + v)
- offset = tvm.ir_pass.Simplify(aptr.args[2])
- assert tvm.ir_pass.Equal(offset, 200 + v)
+ offset = tvm.tir.ir_pass.Simplify(aptr.args[2])
+ assert tvm.tir.ir_pass.Equal(offset, 200 + v)
assert aptr.args[4].value == Buffer.READ | Buffer.WRITE
- aptr = Ab.access_ptr("rw", offset=tvm.call_extern('int32', "test_call", 100 + 100 + v))
- offset = tvm.ir_pass.Simplify(aptr.args[2])
- assert tvm.ir_pass.Equal(offset, tvm.call_extern('int32', "test_call", 200 + v))
+ aptr = Ab.access_ptr("rw", offset=tvm.tir.call_extern('int32', "test_call", 100 + 100 + v))
+ offset = tvm.tir.ir_pass.Simplify(aptr.args[2])
+ assert tvm.tir.ir_pass.Equal(offset, tvm.tir.call_extern('int32', "test_call", 200 + v))
assert aptr.args[4].value == Buffer.READ | Buffer.WRITE
def test_buffer_access_ptr_extent():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- Ab = tvm.decl_buffer((m, n), tvm.float32)
+ m = te.size_var('m')
+ n = te.size_var('n')
+ Ab = tvm.tir.decl_buffer((m, n), "float32")
aptr = Ab.access_ptr("rw")
- assert tvm.ir_pass.Equal(aptr.args[3], m * n)
+ assert tvm.tir.ir_pass.Equal(aptr.args[3], m * n)
aptr = Ab.access_ptr("rw", offset=100)
- assert tvm.ir_pass.Equal(aptr.args[3], m * n - 100)
- Ab = tvm.decl_buffer((m, n), tvm.float32, strides=[n + 1 , 1])
+ assert tvm.tir.ir_pass.Equal(aptr.args[3], m * n - 100)
+ Ab = tvm.tir.decl_buffer((m, n), "float32", strides=[n + 1 , 1])
aptr = Ab.access_ptr("rw", offset=100)
- assert tvm.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m - 100)
+ assert tvm.tir.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m - 100)
def test_buffer_vload():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- Ab = tvm.decl_buffer((m, n), tvm.float32, elem_offset=100)
+ m = te.size_var('m')
+ n = te.size_var('n')
+ Ab = tvm.tir.decl_buffer((m, n), "float32", elem_offset=100)
load = Ab.vload([2, 3])
- offset = tvm.ir_pass.Simplify(load.index)
- assert tvm.ir_pass.Equal(offset, n * 2 + 103)
+ offset = tvm.tir.ir_pass.Simplify(load.index)
+ assert tvm.tir.ir_pass.Equal(offset, n * 2 + 103)
def test_buffer_index_merge_mult_mod():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- s = tvm.size_var('s')
- k0 = tvm.size_var('k0')
- k1 = tvm.size_var('k1')
- A = tvm.decl_buffer((m, n), tvm.float32)
- A_stride = tvm.decl_buffer((m, n), tvm.float32, strides=(s, 1))
+ m = te.size_var('m')
+ n = te.size_var('n')
+ s = te.size_var('s')
+ k0 = te.size_var('k0')
+ k1 = te.size_var('k1')
+ A = tvm.tir.decl_buffer((m, n), "float32")
+ A_stride = tvm.tir.decl_buffer((m, n), "float32", strides=(s, 1))
def assert_simplified_equal(index_simplified, index_direct):
- assert tvm.ir_pass.Equal(index_simplified, index_direct),\
+ assert tvm.tir.ir_pass.Equal(index_simplified, index_direct),\
"index_simplified=%s, index_direct=%s" %(index_simplified, index_direct)
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
# Test Case1
index_simplified = A_stride.vload(
(idxd(idxm(k0, k1), s), idxm(idxm(k0, k1), s) + idxd(k0, k1) * k1))
def test_buffer_broadcast():
- m0, m1, m2 = tvm.size_var("m0"), tvm.size_var("m1"), tvm.size_var("m2")
- n0, n1, n2 = tvm.size_var("n0"), tvm.size_var("n1"), tvm.size_var("n2")
- o0, o1, o2 = tvm.size_var("o0"), tvm.size_var("o1"), tvm.size_var("o2")
+ m0, m1, m2 = te.size_var("m0"), te.size_var("m1"), te.size_var("m2")
+ n0, n1, n2 = te.size_var("n0"), te.size_var("n1"), te.size_var("n2")
+ o0, o1, o2 = te.size_var("o0"), te.size_var("o1"), te.size_var("o2")
- A = tvm.placeholder((m0, m1, m2), name='A')
- B = tvm.placeholder((n0, n1, n2), name='B')
+ A = te.placeholder((m0, m1, m2), name='A')
+ B = te.placeholder((n0, n1, n2), name='B')
- C = tvm.compute((o0, o1, o2), lambda i, j, k: A[i, j, k] + B[i, j, k], name='C')
+ C = te.compute((o0, o1, o2), lambda i, j, k: A[i, j, k] + B[i, j, k], name='C')
- Ab = tvm.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast")
- Bb = tvm.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast")
- s = tvm.create_schedule(C.op)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast")
+ Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast")
+ s = te.create_schedule(C.op)
def check():
if not tvm.runtime.enabled("llvm"):
def test_buffer_broadcast_expr():
- n0, m0, x = tvm.size_var('n0'), tvm.size_var('m0'), tvm.size_var('x')
- n1, m1 = tvm.size_var('n1'), tvm.size_var('m1')
- o0, o1 = tvm.size_var('o0'), tvm.size_var('o1')
-
- A = tvm.placeholder((m0, n0), name='A')
- B = tvm.placeholder((m1, n1), name='B')
- C = tvm.compute((o0, o1//x), lambda i, j: A[i, j] + B[i, j], name='C')
-
- Ab = tvm.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast")
- Bb = tvm.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast")
- Cc = tvm.decl_buffer(C.shape, C.dtype, name="Cc", buffer_type="auto_broadcast")
- s = tvm.create_schedule(C.op)
+ n0, m0, x = te.size_var('n0'), te.size_var('m0'), te.size_var('x')
+ n1, m1 = te.size_var('n1'), te.size_var('m1')
+ o0, o1 = te.size_var('o0'), te.size_var('o1')
+
+ A = te.placeholder((m0, n0), name='A')
+ B = te.placeholder((m1, n1), name='B')
+ C = te.compute((o0, o1//x), lambda i, j: A[i, j] + B[i, j], name='C')
+
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast")
+ Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast")
+ Cc = tvm.tir.decl_buffer(C.shape, C.dtype, name="Cc", buffer_type="auto_broadcast")
+ s = te.create_schedule(C.op)
def check_stride():
if not tvm.runtime.enabled("llvm"):
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_expr_constructor():
x = tvm.tir.Var("xx", "float32")
assert x.dtype == "float32"
assert x.value.value == 1
- a = tvm.const(1.0, dtype="float32")
- b = tvm.var("x", dtype="float32")
+ a = tvm.tir.const(1.0, dtype="float32")
+ b = te.var("x", dtype="float32")
for cls in [tvm.tir.Add,
tvm.tir.Sub,
assert x.b.same_as(b)
- a = tvm.convert(tvm.var("x") > 1)
- b = tvm.convert(tvm.var("x") == 1)
+ a = tvm.runtime.convert(te.var("x") > 1)
+ b = tvm.runtime.convert(te.var("x") == 1)
for cls in [tvm.tir.And,
tvm.tir.Or]:
assert x.false_value == b
assert x.condition == a
- buffer_var = tvm.var("x", dtype="handle")
+ buffer_var = te.var("x", dtype="handle")
x = tvm.tir.Load("float32", buffer_var, 1, a)
assert isinstance(x, tvm.tir.Load)
assert x.dtype == "float32"
assert x.func == None
assert x.value_index == 0
- v = tvm.var("aa")
+ v = te.var("aa")
x = tvm.tir.Let(v, 1, v)
assert x.var == v
assert x.value.value == 1
def test_stmt_constructor():
- v = tvm.var("aa")
- buffer_var = tvm.var("buf", dtype="handle")
+ v = te.var("aa")
+ buffer_var = te.var("buf", dtype="handle")
nop = tvm.tir.Evaluate(1)
x = tvm.tir.LetStmt(v, 1, tvm.tir.Evaluate(1))
assert isinstance(x, tvm.tir.LetStmt)
assert isinstance(x, tvm.tir.AttrStmt)
assert x.value.value == 1
- x = tvm.tir.AssertStmt(tvm.const(1, "uint1"),
- tvm.convert("hellow"),
+ x = tvm.tir.AssertStmt(tvm.tir.const(1, "uint1"),
+ tvm.runtime.convert("hellow"),
nop)
assert isinstance(x, tvm.tir.AssertStmt)
assert x.body == nop
assert isinstance(x, tvm.tir.ProducerConsumer)
assert x.body == nop
- x = tvm.tir.For(tvm.var("x"), 0, 10, 0, 0, nop)
+ x = tvm.tir.For(te.var("x"), 0, 10, 0, 0, nop)
assert isinstance(x, tvm.tir.For)
assert x.min.value == 0
assert x.extent.value == 10
assert x.body == nop
- x = tvm.tir.Store(buffer_var, 1, 10, tvm.const(1, "uint1"))
+ x = tvm.tir.Store(buffer_var, 1, 10, tvm.tir.const(1, "uint1"))
assert isinstance(x, tvm.tir.Store)
assert x.buffer_var == buffer_var
assert x.index.value == 10
assert x.value.value == 1
- tensor = tvm.placeholder((), dtype="float32")
+ tensor = te.placeholder((), dtype="float32")
x = tvm.tir.Provide(tensor.op, 0, 10, [])
assert isinstance(x, tvm.tir.Provide)
assert x.value_index == 0
assert x.value.value == 10
x = tvm.tir.Allocate(buffer_var, "float32", [10],
- tvm.const(1, "uint1"), nop)
+ tvm.tir.const(1, "uint1"), nop)
assert isinstance(x, tvm.tir.Allocate)
assert x.dtype == "float32"
assert x.buffer_var == buffer_var
assert isinstance(x, tvm.tir.Free)
assert x.buffer_var == buffer_var
- x = tvm.tir.Realize(None, 0, "float", [], tvm.const(1, "uint1"), nop)
+ x = tvm.tir.Realize(None, 0, "float", [], tvm.tir.const(1, "uint1"), nop)
assert isinstance(x, tvm.tir.Realize)
assert x.body == nop
- x = tvm.tir.IfThenElse(tvm.const(1, "uint1"),
+ x = tvm.tir.IfThenElse(tvm.tir.const(1, "uint1"),
tvm.tir.Evaluate(11),
nop)
assert isinstance(x, tvm.tir.IfThenElse)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
def test_array():
- a = tvm.convert([1,2,3])
+ a = tvm.runtime.convert([1,2,3])
assert len(a) == 3
assert a[-1].value == 3
a_slice = a[-3:-1]
assert (a_slice[0].value, a_slice[1].value) == (1, 2)
def test_array_save_load_json():
- a = tvm.convert([1,2,3])
+ a = tvm.runtime.convert([1,2,3])
json_str = tvm.ir.save_json(a)
a_loaded = tvm.ir.load_json(json_str)
assert(a_loaded[1].value == 2)
def test_map():
- a = tvm.var('a')
- b = tvm.var('b')
- amap = tvm.convert({a: 2,
+ a = te.var('a')
+ b = te.var('b')
+ amap = tvm.runtime.convert({a: 2,
b: 3})
assert a in amap
assert len(amap) == 2
def test_str_map():
- amap = tvm.convert({'a': 2, 'b': 3})
+ amap = tvm.runtime.convert({'a': 2, 'b': 3})
assert 'a' in amap
assert len(amap) == 2
dd = dict(amap.items())
def test_map_save_load_json():
- a = tvm.var('a')
- b = tvm.var('b')
- amap = tvm.convert({a: 2,
+ a = te.var('a')
+ b = te.var('b')
+ amap = tvm.runtime.convert({a: 2,
b: 3})
json_str = tvm.ir.save_json(amap)
amap = tvm.ir.load_json(json_str)
def test_in_container():
- arr = tvm.convert(['a', 'b', 'c'])
+ arr = tvm.runtime.convert(['a', 'b', 'c'])
assert 'a' in arr
assert tvm.tir.StringImm('a') in arr
assert 'd' not in arr
def test_ndarray_container():
x = tvm.nd.array([1,2,3])
- arr = tvm.convert([x, x])
+ arr = tvm.runtime.convert([x, x])
assert arr[0].same_as(x)
assert arr[1].same_as(x)
assert isinstance(arr[0], tvm.nd.NDArray)
"""Test layout and bijective-layout node"""
import tvm
+from tvm import te
from topi.util import get_const_tuple
def test_layout():
def test_bilayout_convertible():
# not convertible
- assert tvm.bijective_layout("NCHW", "ABCD") is None
- assert tvm.bijective_layout("__undef__", "NCHW") is None
- assert tvm.bijective_layout("NCHW", "__undef__") is None
- assert tvm.bijective_layout("__undef__", "__undef__") is None
- assert tvm.bijective_layout("", "NCHW") is None
- assert tvm.bijective_layout("NCHW", "") is None
- assert tvm.bijective_layout("", "") is None
+ assert tvm.tir.bijective_layout("NCHW", "ABCD") is None
+ assert tvm.tir.bijective_layout("__undef__", "NCHW") is None
+ assert tvm.tir.bijective_layout("NCHW", "__undef__") is None
+ assert tvm.tir.bijective_layout("__undef__", "__undef__") is None
+ assert tvm.tir.bijective_layout("", "NCHW") is None
+ assert tvm.tir.bijective_layout("NCHW", "") is None
+ assert tvm.tir.bijective_layout("", "") is None
# convertible
- assert tvm.bijective_layout("NCHW", "NCHW16c") is not None
+ assert tvm.tir.bijective_layout("NCHW", "NCHW16c") is not None
def test_bilayout_shape():
- bilayout = tvm.bijective_layout("NCHW", "NCHW16c")
+ bilayout = tvm.tir.bijective_layout("NCHW", "NCHW16c")
assert isinstance(bilayout, tvm.tir.BijectiveLayout)
dst_shape = bilayout.forward_shape((1, 32, 7, 7))
assert get_const_tuple(src_shape) == (1, 32, 7, 7)
def test_bilayout_index():
- bilayout = tvm.bijective_layout("NCHW", "NCHW16c")
+ bilayout = tvm.tir.bijective_layout("NCHW", "NCHW16c")
dst_index = bilayout.forward_index([0, 18, 6, 6])
assert get_const_tuple(dst_index) == (0, 1, 6, 6, 2)
# under the License.
"""Test group effect"""
import tvm
+from tvm import te
def test_scan_group():
- m = tvm.size_var("m")
- n = tvm.size_var("n")
- x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x")
- s_state = tvm.placeholder((m, n))
- s_init = tvm.compute((1, n), lambda _, i: x[0, i])
+ m = te.size_var("m")
+ n = te.size_var("n")
+ x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x")
+ s_state = te.placeholder((m, n))
+ s_init = te.compute((1, n), lambda _, i: x[0, i])
- s_update1 = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + x[t, i])
- s_update2 = tvm.compute((m, n), lambda t, i: s_update1[t, i] + 1)
- s_update3 = tvm.compute((m, n), lambda t, i: s_update2[t, i] + 1)
- res = tvm.scan(s_init, s_update3, s_state, inputs=x)
+ s_update1 = te.compute((m, n), lambda t, i: s_state[t-1, i] + x[t, i])
+ s_update2 = te.compute((m, n), lambda t, i: s_update1[t, i] + 1)
+ s_update3 = te.compute((m, n), lambda t, i: s_update2[t, i] + 1)
+ res = tvm.te.scan(s_init, s_update3, s_state, inputs=x)
- s = tvm.create_schedule(res.op)
+ s = te.create_schedule(res.op)
assert s[s_update1].group is not None
assert s[s_update2].group == s[s_update1].group
# Assign within group, is valid
pass
def test_compute_group():
- m = tvm.size_var("m")
- n = tvm.size_var("n")
- x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x")
- x1 = tvm.compute(x.shape, lambda *i: x(*i) + 1, name="x1")
- x2 = tvm.compute(x.shape, lambda *i: x1(*i) + 2, name="x2")
- s = tvm.create_schedule(x2.op)
+ m = te.size_var("m")
+ n = te.size_var("n")
+ x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x")
+ x1 = te.compute(x.shape, lambda *i: x(*i) + 1, name="x1")
+ x2 = te.compute(x.shape, lambda *i: x1(*i) + 2, name="x2")
+ s = te.create_schedule(x2.op)
g = s.create_group(outputs=x1, inputs=x, include_inputs=True)
assert s[x1].group == g
assert s[x].group == g
assert g.num_child_stages == 2
def test_nest_group():
- m = tvm.size_var("m")
- n = tvm.size_var("n")
- x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x")
- x1 = tvm.compute(x.shape, lambda *i: x(*i) + 1, name="x1")
- x2 = tvm.compute(x.shape, lambda *i: x1(*i) + 2, name="x2")
- s = tvm.create_schedule(x2.op)
+ m = te.size_var("m")
+ n = te.size_var("n")
+ x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x")
+ x1 = te.compute(x.shape, lambda *i: x(*i) + 1, name="x1")
+ x2 = te.compute(x.shape, lambda *i: x1(*i) + 2, name="x2")
+ s = te.create_schedule(x2.op)
g1 = s.create_group(outputs=x1, inputs=x)
g2 = s.create_group(outputs=x1, inputs=x, include_inputs=True)
assert set(s.groups) == set([g1, g2])
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def check_throws(f):
try:
def test_const_fold():
def check(f, *args):
- x = f(*[tvm.const(x, "int32") for x in args])
+ x = f(*[tvm.tir.const(x, "int32") for x in args])
y = f(*args)
if not isinstance(x, (tvm.tir.IntImm,)) or x.value != int(y):
raise ValueError("check error: %s vs %s " % (x, y))
- tmod = tvm.truncmod
+ tmod = tvm.tir.truncmod
check(lambda x, y: x + y, 3, 4)
check(lambda x, y: x * y, 3, 12)
check(lambda x, y: x * y - 10, 3, 12)
def test_const_fold2():
- x = tvm.var("x")
- tmod = tvm.truncmod
- tdiv = tvm.truncdiv
+ x = te.var("x")
+ tmod = tvm.tir.truncmod
+ tdiv = tvm.tir.truncdiv
assert (x + 0).same_as(x)
assert (0 + x).same_as(x)
assert (x - 0).same_as(x)
def test_const_fold3():
# Test that using ints with logic operations is forbidden
- x = tvm.var("x")
+ x = te.var("x")
for val in [0, 1]:
- for func in [tvm.all, tvm.any]:
- check_throws(lambda: func(tvm.const(val, 'uint1'), x))
- check_throws(lambda: func(x, tvm.const(val, 'uint1')))
+ for func in [tvm.tir.all, tvm.tir.any]:
+ check_throws(lambda: func(tvm.tir.const(val, 'uint1'), x))
+ check_throws(lambda: func(x, tvm.tir.const(val, 'uint1')))
# Test const folding when both arguments are const
- for tvm_func, py_func in [(tvm.all, lambda a, b: a and b), (tvm.any, lambda a, b: a or b)]:
+ for tvm_func, py_func in [(tvm.tir.all, lambda a, b: a and b), (tvm.tir.any, lambda a, b: a or b)]:
for v1 in [0, 1]:
for v2 in [0, 1]:
- assert tvm.ir_pass.Equal(tvm_func(tvm.const(v1, 'uint1'), tvm.const(v2, 'uint1')),
- tvm.const(py_func(v1, v2), 'uint1'))
+ assert tvm.tir.ir_pass.Equal(tvm_func(tvm.tir.const(v1, 'uint1'), tvm.tir.const(v2, 'uint1')),
+ tvm.tir.const(py_func(v1, v2), 'uint1'))
- x = tvm.var("x", 'uint1')
- true = tvm.const(1, 'uint1')
- false = tvm.const(0, 'uint1')
+ x = te.var("x", 'uint1')
+ true = tvm.tir.const(1, 'uint1')
+ false = tvm.tir.const(0, 'uint1')
- assert tvm.all(x, true).same_as(x)
- assert tvm.all(true, x).same_as(x)
- assert tvm.any(x, false).same_as(x)
- assert tvm.any(false, x).same_as(x)
+ assert tvm.tir.all(x, true).same_as(x)
+ assert tvm.tir.all(true, x).same_as(x)
+ assert tvm.tir.any(x, false).same_as(x)
+ assert tvm.tir.any(false, x).same_as(x)
- assert tvm.all(x, false).same_as(false)
- assert tvm.all(false, x).same_as(false)
- assert tvm.any(x, true).same_as(true)
- assert tvm.any(true, x).same_as(true)
+ assert tvm.tir.all(x, false).same_as(false)
+ assert tvm.tir.all(false, x).same_as(false)
+ assert tvm.tir.any(x, true).same_as(true)
+ assert tvm.tir.any(true, x).same_as(true)
def test_const_fold4():
- x1 = tvm.const(4, "int32")
+ x1 = tvm.tir.const(4, "int32")
x2 = x1 + 5
- tdiv = tvm.truncdiv
+ tdiv = tvm.tir.truncdiv
assert isinstance(x2, tvm.tir.IntImm) and x2.value == 9
x3 = tdiv(x2, 3)
assert isinstance(x3, tvm.tir.IntImm) and x3.value == 3
x4 = x3 + 0.55
assert isinstance(x4, tvm.tir.FloatImm) and abs(x4.value - 3.55) < 1e-6
- x5 = tvm.ceil(x4)
+ x5 = te.ceil(x4)
assert isinstance(x5, tvm.tir.FloatImm) and x5.value == 4
x6 = x5.astype('int')
assert isinstance(x6, tvm.tir.IntImm) and x6.value == 4, "x6={}".format(x6)
- y = (tvm.round((tvm.const(6.5, 'float32') - 1) / 1.5) + 2).astype('int')
+ y = (te.round((tvm.tir.const(6.5, 'float32') - 1) / 1.5) + 2).astype('int')
assert isinstance(y, tvm.tir.IntImm) and y.value == 6
[('int32', 'int64'), 'int64'],
[('uint32', 'int32'), 'int32']]
for (lhs_dtype, rhs_dtype), out_dtype in rules:
- lhs = tvm.var('lhs', dtype=lhs_dtype)
- rhs = tvm.var('rhs', dtype=rhs_dtype)
+ lhs = te.var('lhs', dtype=lhs_dtype)
+ rhs = te.var('rhs', dtype=rhs_dtype)
out = f(lhs, rhs)
if not is_conditional:
assert out.dtype == out_dtype
def verify_callop_float_only(f):
for lhs_dtype in ['int32', 'float32', 'float64']:
for rhs_dtype in ['int32', 'float32', 'float64']:
- lhs = tvm.var('lhs', dtype=lhs_dtype)
- rhs = tvm.var('rhs', dtype=rhs_dtype)
+ lhs = te.var('lhs', dtype=lhs_dtype)
+ rhs = te.var('rhs', dtype=rhs_dtype)
if 'float' not in lhs_dtype and 'float' not in rhs_dtype:
check_throws(lambda: f(lhs, rhs))
elif 'float' in lhs_dtype and 'float' in rhs_dtype and lhs_dtype != rhs_dtype:
verify_general_dtype_support(lambda a, b: a * b)
verify_general_dtype_support(lambda a, b: a >= b, is_conditional=True)
verify_general_dtype_support(lambda a, b: a <= b, is_conditional=True)
- verify_callop_float_only(lambda a, b: tvm.power(a, b))
+ verify_callop_float_only(lambda a, b: te.power(a, b))
def test_if_then_else():
- cases = [[(tvm.var('cond', dtype='bool'), 'bool', 'int32'), 'int32'],
+ cases = [[(te.var('cond', dtype='bool'), 'bool', 'int32'), 'int32'],
[(True, 'int32', 'float32'), 'float32'],
[(False, 'int32', 'int64'), 'int64'],
- [(tvm.var('cond', dtype='bool'), 'uint32', 'int32'), 'int32'],
- [(tvm.var('cond', dtype='int32'), 'uint32', 'int32'), 'int32']]
+ [(te.var('cond', dtype='bool'), 'uint32', 'int32'), 'int32'],
+ [(te.var('cond', dtype='int32'), 'uint32', 'int32'), 'int32']]
for (cond, lhs_dtype, rhs_dtype), out_dtype in cases:
- lhs = tvm.var('lhs', dtype=lhs_dtype)
- rhs = tvm.var('rhs', dtype=rhs_dtype)
+ lhs = te.var('lhs', dtype=lhs_dtype)
+ rhs = te.var('rhs', dtype=rhs_dtype)
if cond is True or cond is False:
- out = tvm.if_then_else(cond, lhs, rhs)
- out2 = tvm.if_then_else(not cond, rhs, lhs)
- out3 = tvm.if_then_else(not cond, lhs, rhs)
- assert tvm.ir_pass.Equal(out, out2) == 1
+ out = tvm.tir.if_then_else(cond, lhs, rhs)
+ out2 = tvm.tir.if_then_else(not cond, rhs, lhs)
+ out3 = tvm.tir.if_then_else(not cond, lhs, rhs)
+ assert tvm.tir.ir_pass.Equal(out, out2) == 1
if cond:
- assert tvm.ir_pass.Equal(out, lhs.astype(out_dtype)) == 1
- assert tvm.ir_pass.Equal(out3, rhs.astype(out_dtype)) == 1
+ assert tvm.tir.ir_pass.Equal(out, lhs.astype(out_dtype)) == 1
+ assert tvm.tir.ir_pass.Equal(out3, rhs.astype(out_dtype)) == 1
else:
- assert tvm.ir_pass.Equal(out, rhs.astype(out_dtype)) == 1
- assert tvm.ir_pass.Equal(out3, lhs.astype(out_dtype)) == 1
+ assert tvm.tir.ir_pass.Equal(out, rhs.astype(out_dtype)) == 1
+ assert tvm.tir.ir_pass.Equal(out3, lhs.astype(out_dtype)) == 1
elif cond.dtype == 'bool':
- out = tvm.if_then_else(cond, lhs, rhs)
+ out = tvm.tir.if_then_else(cond, lhs, rhs)
assert out.dtype == out_dtype
assert out.args[1].dtype == out_dtype
assert out.args[2].dtype == out_dtype
elif cond.dtype != 'bool':
- check_throws(lambda: tvm.if_then_else(cond, lhs, rhs))
+ check_throws(lambda: tvm.tir.if_then_else(cond, lhs, rhs))
else:
raise ValueError('Unknown combinations')
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_const_saveload_json():
# save load json
- x = tvm.const(1, "int32")
- y = tvm.const(10, "int32")
+ x = tvm.tir.const(1, "int32")
+ y = tvm.tir.const(10, "int32")
z = x + y
z = z + z
json_str = tvm.ir.save_json(z)
def test_make_smap():
# save load json
- x = tvm.const(1, "int32")
- y = tvm.const(10, "int32")
+ x = tvm.tir.const(1, "int32")
+ y = tvm.tir.const(10, "int32")
z = tvm.tir.Add(x, y)
- smap = tvm.convert({"z": z, "x": x})
- json_str = tvm.ir.save_json(tvm.convert([smap]))
+ smap = tvm.runtime.convert({"z": z, "x": x})
+ json_str = tvm.ir.save_json(tvm.runtime.convert([smap]))
arr = tvm.ir.load_json(json_str)
assert len(arr) == 1
assert arr[0]["z"].a == arr[0]["x"]
x = tvm.ir.make_node("IntImm", dtype="int32", value=10)
assert isinstance(x, tvm.tir.IntImm)
assert x.value == 10
- A = tvm.placeholder((10, ), name='A')
+ A = te.placeholder((10, ), name='A')
AA = tvm.ir.make_node("Tensor",
shape=A.shape,
dtype=A.dtype,
def test_make_sum():
- A = tvm.placeholder((2, 10), name='A')
- k = tvm.reduce_axis((0,10), "k")
- B = tvm.compute((2,), lambda i: tvm.sum(A[i, k], axis=k), name="B")
+ A = te.placeholder((2, 10), name='A')
+ k = te.reduce_axis((0,10), "k")
+ B = te.compute((2,), lambda i: te.sum(A[i, k], axis=k), name="B")
json_str = tvm.ir.save_json(B)
BB = tvm.ir.load_json(json_str)
assert B.op.body[0].combiner is not None
# under the License.
import pytest
import tvm
+from tvm import te
import pickle as pkl
def test_schedule_create():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- l = tvm.size_var('l')
- A = tvm.placeholder((m, l), name='A')
- B = tvm.placeholder((n, l), name='B')
- AA = tvm.compute((m, l), lambda i, j: A[i, j])
- T = tvm.compute((m, n, l), lambda i, j, k: AA(i, k) * B(j, k))
- s = tvm.create_schedule(T.op)
+ m = te.size_var('m')
+ n = te.size_var('n')
+ l = te.size_var('l')
+ A = te.placeholder((m, l), name='A')
+ B = te.placeholder((n, l), name='B')
+ AA = te.compute((m, l), lambda i, j: A[i, j])
+ T = te.compute((m, n, l), lambda i, j, k: AA(i, k) * B(j, k))
+ s = te.create_schedule(T.op)
s[AA].set_scope("shared")
xo, xi = s[T].split(T.op.axis[0], factor=10)
xi1, xi2 = s[T].split(xi, factor=2)
# save load json
json_str = tvm.ir.save_json(s)
s_loaded = tvm.ir.load_json(json_str)
- assert isinstance(s_loaded, tvm.schedule.Schedule)
+ assert isinstance(s_loaded, tvm.te.schedule.Schedule)
assert(str(s_loaded.outputs[0].body) == str(s.outputs[0].body))
# pickle unpickle
dump = pkl.dumps(s)
s_loaded = pkl.loads(dump)
- assert isinstance(s_loaded, tvm.schedule.Schedule)
+ assert isinstance(s_loaded, tvm.te.schedule.Schedule)
assert(str(s_loaded.outputs[0].body) == str(s.outputs[0].body))
def test_reorder():
- m = tvm.size_var('m')
- A = tvm.placeholder((m,), name='A')
- T = tvm.compute(m, lambda i: A[i+1])
+ m = te.size_var('m')
+ A = te.placeholder((m,), name='A')
+ T = te.compute(m, lambda i: A[i+1])
- s = tvm.create_schedule(T.op)
+ s = te.create_schedule(T.op)
xo, xi = s[T].split(T.op.axis[0], factor=10)
xi1, xi2 = s[T].split(xi, factor=2)
order = (xi2, xi1, xo)
pass
def test_split():
- m = tvm.size_var('m')
- A = tvm.placeholder((m,), name='A')
- T = tvm.compute((m,), lambda i: A[i])
+ m = te.size_var('m')
+ A = te.placeholder((m,), name='A')
+ T = te.compute((m,), lambda i: A[i])
- s = tvm.create_schedule(T.op)
+ s = te.create_schedule(T.op)
xo, xi = s[T].split(T.op.axis[0], factor=10)
assert tuple(s[T].leaf_iter_vars) == (xo, xi)
def test_tile():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- A = tvm.placeholder((m, n), name='A')
- T = tvm.compute((m, n), lambda i, j: A[i, j])
+ m = te.size_var('m')
+ n = te.size_var('n')
+ A = te.placeholder((m, n), name='A')
+ T = te.compute((m, n), lambda i, j: A[i, j])
- s = tvm.create_schedule(T.op)
+ s = te.create_schedule(T.op)
xo, yo, xi, yi = s[T].tile(T.op.axis[0], T.op.axis[1], x_factor=10, y_factor=5)
assert tuple(s[T].leaf_iter_vars) == (xo, yo, xi, yi)
def test_fuse():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- A = tvm.placeholder((m, n), name='A')
- T = tvm.compute((m, n), lambda i, j: A[i, j])
+ m = te.size_var('m')
+ n = te.size_var('n')
+ A = te.placeholder((m, n), name='A')
+ T = te.compute((m, n), lambda i, j: A[i, j])
- s = tvm.create_schedule(T.op)
+ s = te.create_schedule(T.op)
xo, yo, xi, yi = s[T].tile(T.op.axis[0], T.op.axis[1], x_factor=10, y_factor=5)
fused = s[T].fuse(xo, yo)
- assert any(isinstance(x, tvm.schedule.Fuse) for x in s[T].relations)
+ assert any(isinstance(x, tvm.te.schedule.Fuse) for x in s[T].relations)
assert tuple(s[T].leaf_iter_vars) == (fused, xi, yi)
def test_singleton():
print("test singleton")
- A = tvm.placeholder((), name='A')
- T = tvm.compute((), lambda : A() + 1)
- s = tvm.create_schedule(T.op)
+ A = te.placeholder((), name='A')
+ T = te.compute((), lambda : A() + 1)
+ s = te.create_schedule(T.op)
print("test singleton fin1")
fused = s[T].fuse()
- assert any(isinstance(x, tvm.schedule.Singleton) for x in s[T].relations)
+ assert any(isinstance(x, tvm.te.schedule.Singleton) for x in s[T].relations)
assert tuple(s[T].leaf_iter_vars) == (fused,)
dump = pkl.dumps(s)
print("test singleton fin3")
s_loaded = pkl.loads(dump)
print("test singleton fin2")
- assert isinstance(s_loaded, tvm.schedule.Schedule)
+ assert isinstance(s_loaded, tvm.te.schedule.Schedule)
print("test singleton fin")
def test_vectorize():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- A = tvm.placeholder((m, n), name='A')
- T = tvm.compute((m, n), lambda i, j: A[i, j])
+ m = te.size_var('m')
+ n = te.size_var('n')
+ A = te.placeholder((m, n), name='A')
+ T = te.compute((m, n), lambda i, j: A[i, j])
- s = tvm.create_schedule(T.op)
+ s = te.create_schedule(T.op)
xo, yo, xi, yi = s[T].tile(T.op.axis[0], T.op.axis[1], x_factor=10, y_factor=5)
s[T].vectorize(yi)
s[T].unroll(xi)
- UNROLL = tvm.schedule.IterVar.Unrolled
- VECTORIZE = tvm.schedule.IterVar.Vectorized
+ UNROLL = tvm.te.schedule.IterVar.Unrolled
+ VECTORIZE = tvm.te.schedule.IterVar.Vectorized
assert s[T].iter_var_attrs[xi].iter_type == UNROLL
assert s[T].iter_var_attrs[yi].iter_type == VECTORIZE
@pytest.mark.xfail
def test_vectorize_commreduce():
- V = tvm.placeholder((128,), name='V')
- ax = tvm.reduce_axis((0, 128), name='ax')
- O = tvm.compute((1,), lambda _: tvm.sum(V[ax], axis=[ax]))
- s = tvm.create_schedule(O.op)
+ V = te.placeholder((128,), name='V')
+ ax = te.reduce_axis((0, 128), name='ax')
+ O = te.compute((1,), lambda _: te.sum(V[ax], axis=[ax]))
+ s = te.create_schedule(O.op)
s[O].vectorize(ax) # should throw here
def test_pragma():
m = 100
- A = tvm.placeholder((m,), name='A')
- T = tvm.compute((m,), lambda i: A[i])
+ A = te.placeholder((m,), name='A')
+ T = te.compute((m,), lambda i: A[i])
- s = tvm.create_schedule(T.op)
+ s = te.create_schedule(T.op)
xo, xi = s[T].split(T.op.axis[0], factor=10)
s[T].pragma(xo, "pragma1")
s[T].pragma(xi, "vectorize")
- VECTORIZE = tvm.schedule.IterVar.Vectorized
+ VECTORIZE = tvm.te.schedule.IterVar.Vectorized
assert s[T].iter_var_attrs[xo].pragma_keys[0].value == "pragma1"
assert s[T].iter_var_attrs[xi].iter_type == VECTORIZE
def test_rfactor():
- n = tvm.size_var('n')
- k1 = tvm.reduce_axis((0, n), name="k1")
- k2 = tvm.reduce_axis((0, n), name="k2")
- A = tvm.placeholder((n, n, n), name='A')
- B = tvm.compute((n, ), lambda i: tvm.sum(A[i, k1, k2], axis=[k1, k2]))
+ n = te.size_var('n')
+ k1 = te.reduce_axis((0, n), name="k1")
+ k2 = te.reduce_axis((0, n), name="k2")
+ A = te.placeholder((n, n, n), name='A')
+ B = te.compute((n, ), lambda i: te.sum(A[i, k1, k2], axis=[k1, k2]))
# normal schedule
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
BF = s.rfactor(B, k1)
assert(tuple(BF.shape) == (n, n))
assert(set(BF.op.body[0].axis) == set([k2]))
assert(s[B].op.body[0].axis[0].dom.extent == n)
assert(len(s[B].all_iter_vars) == 2)
# schedule with splot
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
ko, ki = s[B].split(k1, factor=4)
xo, xi = s[B].split(B.op.axis[0], factor=8)
BF = s.rfactor(B, ki)
assert(BF.op.body[0].axis[1].var == ko.var)
assert(s[B].op.body[0].axis[0].dom.extent.value == 4)
# schedule with factor_axis
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
ko, ki = s[B].split(k1, factor=4)
xo, xi = s[B].split(B.op.axis[0], factor=8)
BF = s.rfactor(B, ki, 1)
def test_tensor_intrin():
n = 16
- x = tvm.placeholder((n,), name='x')
- y = tvm.placeholder((n,), name='y')
- z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z')
+ x = te.placeholder((n,), name='x')
+ y = te.placeholder((n,), name='y')
+ z = te.compute(x.shape, lambda i: x[i] + y[i], name='z')
def intrin_func(ins, outs):
- assert(isinstance(ins[0], tvm.schedule.Buffer))
+ assert(isinstance(ins[0], tvm.te.schedule.Buffer))
assert(ins[0].shape[0].value == n)
- return tvm.call_packed("vadd", ins[0].data, outs[0].data, ins[0].shape[0])
- intrin = tvm.decl_tensor_intrin(z.op, intrin_func)
+ return tvm.tir.call_packed("vadd", ins[0].data, outs[0].data, ins[0].shape[0])
+ intrin = te.decl_tensor_intrin(z.op, intrin_func)
assert intrin.op == z.op
assert intrin.reduce_init is None
assert tuple(intrin.inputs) == tuple(z.op.input_tensors)
assert(intrin.buffers[0].shape[0].value == n)
m = 32
- x = tvm.placeholder((m,), name='x')
- y = tvm.placeholder((m,), name='y')
- z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z')
- s = tvm.create_schedule(z.op)
+ x = te.placeholder((m,), name='x')
+ y = te.placeholder((m,), name='y')
+ z = te.compute(x.shape, lambda i: x[i] + y[i], name='z')
+ s = te.create_schedule(z.op)
xo, xi = s[z].split(z.op.axis[0], factor=n)
s[z].tensorize(xi, intrin)
assert(s[z].iter_var_attrs[xi].tensor_intrin == intrin)
- assert(s[z].iter_var_attrs[xi].iter_type == tvm.schedule.IterVar.Tensorized)
+ assert(s[z].iter_var_attrs[xi].iter_type == tvm.te.schedule.IterVar.Tensorized)
def test_tensor_intrin_scalar_params():
- n = tvm.size_var("n")
- x = tvm.placeholder((n,), name='x')
- v = tvm.size_var("v")
- w = tvm.size_var("w")
- z = tvm.compute((n,), lambda i: x[i]*v + w, name='z')
+ n = te.size_var("n")
+ x = te.placeholder((n,), name='x')
+ v = te.size_var("v")
+ w = te.size_var("w")
+ z = te.compute((n,), lambda i: x[i]*v + w, name='z')
def intrin_func(ins, outs, sp):
- assert(isinstance(ins[0], tvm.schedule.Buffer))
+ assert(isinstance(ins[0], tvm.te.schedule.Buffer))
assert(ins[0].shape[0] == n)
assert(sp[0] == v)
assert(sp[1] == w)
- return tvm.call_packed("hw_func", ins[0].data, outs[0].data, sp[0], sp[1])
+ return tvm.tir.call_packed("hw_func", ins[0].data, outs[0].data, sp[0], sp[1])
- with tvm.build_config(offset_factor=1):
- intrin = tvm.decl_tensor_intrin(z.op, intrin_func, scalar_params=[v, w])
+ with tvm.target.build_config(offset_factor=1):
+ intrin = te.decl_tensor_intrin(z.op, intrin_func, scalar_params=[v, w])
assert intrin.op == z.op
assert intrin.reduce_init is None
assert tuple(intrin.inputs) == tuple(z.op.input_tensors)
assert(intrin.buffers[0].shape[0] == n)
assert tuple(intrin.scalar_params) == tuple((v, w))
- A = tvm.placeholder((10,10), name='A')
+ A = te.placeholder((10,10), name='A')
# Pass scalar inputs to the TensorIntrin, interleaved with tensor inputs
- C = tvm.compute((10,10), lambda i, j: intrin(i*i, A[i, j], i+j), name="C")
- s = tvm.create_schedule(C.op)
+ C = te.compute((10,10), lambda i, j: intrin(i*i, A[i, j], i+j), name="C")
+ s = te.create_schedule(C.op)
stmt = tvm.lower(s, [A, C], simple_mode=True)
assert isinstance(stmt.body.body.body, tvm.tir.Evaluate)
assert len(stmt.body.body.body.value.args) == 5
# under the License.
import json
import tvm
+from tvm import te
+from tvm import te
-@tvm.tag_scope(tag="conv")
+@tvm.te.tag_scope(tag="conv")
def compute_conv(data, weight):
N, IC, H, W = data.shape
OC, IC, KH, KW = weight.shape
OH = H - KH + 1
OW = W - KW + 1
- ic = tvm.reduce_axis((0, IC), name='ic')
- dh = tvm.reduce_axis((0, KH), name='dh')
- dw = tvm.reduce_axis((0, KW), name='dw')
+ ic = te.reduce_axis((0, IC), name='ic')
+ dh = te.reduce_axis((0, KH), name='dh')
+ dw = te.reduce_axis((0, KW), name='dw')
- return tvm.compute((N, OC, OH, OW), lambda i, oc, h, w: \
- tvm.sum(data[i, ic, h+dh, w+dw] * weight[oc, ic, dh, dw],
+ return te.compute((N, OC, OH, OW), lambda i, oc, h, w: \
+ te.sum(data[i, ic, h+dh, w+dw] * weight[oc, ic, dh, dw],
axis=[ic, dh, dw]))
def test_with():
- n = tvm.size_var('n')
- m = tvm.size_var('m')
- l = tvm.size_var('l')
+ n = te.size_var('n')
+ m = te.size_var('m')
+ l = te.size_var('l')
- A = tvm.placeholder((n, l), name='A')
- B = tvm.placeholder((m, l), name='B')
- with tvm.tag_scope(tag="gemm"):
- k = tvm.reduce_axis((0, l), name='k')
- C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k] * B[j, k], axis=k),
+ A = te.placeholder((n, l), name='A')
+ B = te.placeholder((m, l), name='B')
+ with tvm.te.tag_scope(tag="gemm"):
+ k = te.reduce_axis((0, l), name='k')
+ C = te.compute((n, m), lambda i, j: te.sum(A[i, k] * B[j, k], axis=k),
attrs={"hello" : 1, "arr": [10, 12]})
assert C.op.tag == 'gemm'
def test_decorator():
- n = tvm.size_var('n')
- c = tvm.size_var('c')
- h = tvm.size_var('h')
- w = tvm.size_var('w')
- kh = tvm.size_var('kh')
- kw = tvm.size_var('kw')
+ n = te.size_var('n')
+ c = te.size_var('c')
+ h = te.size_var('h')
+ w = te.size_var('w')
+ kh = te.size_var('kh')
+ kw = te.size_var('kw')
- A = tvm.placeholder((n, c, h, w), name='A')
- B = tvm.placeholder((c, c, kh, kw), name='B')
+ A = te.placeholder((n, c, h, w), name='A')
+ B = te.placeholder((c, c, kh, kw), name='B')
C = compute_conv(A, B)
assert C.op.tag == 'conv'
assert len(C.op.attrs) == 0
def test_nested():
- n = tvm.size_var('n')
- c = tvm.size_var('c')
- h = tvm.size_var('h')
- w = tvm.size_var('w')
- kh = tvm.size_var('kh')
- kw = tvm.size_var('kw')
+ n = te.size_var('n')
+ c = te.size_var('c')
+ h = te.size_var('h')
+ w = te.size_var('w')
+ kh = te.size_var('kh')
+ kw = te.size_var('kw')
- A = tvm.placeholder((n, c, h, w), name='A')
- B = tvm.placeholder((c, c, kh, kw), name='B')
+ A = te.placeholder((n, c, h, w), name='A')
+ B = te.placeholder((c, c, kh, kw), name='B')
try:
- with tvm.tag_scope(tag='conv'):
+ with te.tag_scope(tag='conv'):
C = compute_conv(A, B)
assert False
except ValueError:
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
@tvm.target.generic_func
def mygeneric(data):
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from topi.nn.pooling import pool
def test_tensor():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- l = tvm.size_var('l')
- A = tvm.placeholder((m, l), name='A')
- B = tvm.placeholder((n, l), name='B')
- T = tvm.compute((m, n, l), lambda i, j, k: A[i, k] * B[j, k])
+ m = te.size_var('m')
+ n = te.size_var('n')
+ l = te.size_var('l')
+ A = te.placeholder((m, l), name='A')
+ B = te.placeholder((n, l), name='B')
+ T = te.compute((m, n, l), lambda i, j, k: A[i, k] * B[j, k])
print(T)
print(T.op.body)
assert(tuple(T.shape) == (m, n, l))
- assert(isinstance(A.op, tvm.tensor.PlaceholderOp))
+ assert(isinstance(A.op, tvm.te.PlaceholderOp))
assert(A == A)
assert(T.op.output(0) == T)
assert(T.op.output(0).__hash__() == T.__hash__())
def test_rank_zero():
- m = tvm.size_var('m')
- A = tvm.placeholder((m,), name='A')
- scale = tvm.placeholder((), name='s')
- k = tvm.reduce_axis((0, m), name="k")
- T = tvm.compute((), lambda : tvm.sum(A[k] * scale(), axis=k))
+ m = te.size_var('m')
+ A = te.placeholder((m,), name='A')
+ scale = te.placeholder((), name='s')
+ k = te.reduce_axis((0, m), name="k")
+ T = te.compute((), lambda : te.sum(A[k] * scale(), axis=k))
print(T)
print(T.op.body)
assert(tuple(T.shape) == ())
def test_conv1d():
- n = tvm.size_var('n')
- A = tvm.placeholder((n+2), name='A')
+ n = te.size_var('n')
+ A = te.placeholder((n+2), name='A')
def computeB(ii):
i = ii + 1
return A[i-1] + A[i] + A[i+1]
- B = tvm.compute(n, computeB)
+ B = te.compute(n, computeB)
def test_tensor_slice():
- n = tvm.size_var('n')
- A = tvm.compute((n, n), lambda i, j: 1)
- B = tvm.compute((n,), lambda i: A[0][i] + A[0][i])
+ n = te.size_var('n')
+ A = te.compute((n, n), lambda i, j: 1)
+ B = te.compute((n,), lambda i: A[0][i] + A[0][i])
def test_tensor_reduce_multi_axis():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- A = tvm.placeholder((m, n), name='A')
- k1 = tvm.reduce_axis((0, n), "k")
- k2 = tvm.reduce_axis((0, m), "k")
- C = tvm.compute((1,), lambda _: tvm.sum(A[k1, k2], axis=(k1, k2)))
- C = tvm.compute((1,), lambda _: tvm.sum(A[k1, k2], axis=[k1, k2]))
+ m = te.size_var('m')
+ n = te.size_var('n')
+ A = te.placeholder((m, n), name='A')
+ k1 = te.reduce_axis((0, n), "k")
+ k2 = te.reduce_axis((0, m), "k")
+ C = te.compute((1,), lambda _: te.sum(A[k1, k2], axis=(k1, k2)))
+ C = te.compute((1,), lambda _: te.sum(A[k1, k2], axis=[k1, k2]))
def test_tensor_comm_reducer():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- A = tvm.placeholder((m, n), name='A')
- k = tvm.reduce_axis((0, n), "k")
- mysum = tvm.comm_reducer(lambda x, y: x+y, lambda t: tvm.const(0, dtype=t))
- C = tvm.compute((m,), lambda i: mysum(A[i, k], axis=k))
+ m = te.size_var('m')
+ n = te.size_var('n')
+ A = te.placeholder((m, n), name='A')
+ k = te.reduce_axis((0, n), "k")
+ mysum = te.comm_reducer(lambda x, y: x+y, lambda t: tvm.tir.const(0, dtype=t))
+ C = te.compute((m,), lambda i: mysum(A[i, k], axis=k))
def test_tensor_comm_reducer_overload():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- mysum = tvm.comm_reducer(lambda x, y: x+y, lambda t: tvm.const(0, dtype=t))
+ m = te.size_var('m')
+ n = te.size_var('n')
+ mysum = te.comm_reducer(lambda x, y: x+y, lambda t: tvm.tir.const(0, dtype=t))
sum_res = mysum(m, n)
def test_tensor_reduce():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- l = tvm.size_var('l')
- A = tvm.placeholder((m, l), name='A')
- B = tvm.placeholder((n, l), name='B')
- T = tvm.compute((m, n, l), lambda i, j, k: A[i, k] * B[j, k])
- rv = tvm.reduce_axis((0, A.shape[1]), "k")
- C = tvm.compute((m, n), lambda i, j: tvm.sum(T(i, j, rv+1), axis=rv))
+ m = te.size_var('m')
+ n = te.size_var('n')
+ l = te.size_var('l')
+ A = te.placeholder((m, l), name='A')
+ B = te.placeholder((n, l), name='B')
+ T = te.compute((m, n, l), lambda i, j, k: A[i, k] * B[j, k])
+ rv = te.reduce_axis((0, A.shape[1]), "k")
+ C = te.compute((m, n), lambda i, j: te.sum(T(i, j, rv+1), axis=rv))
# json load save
C_json = tvm.ir.save_json(C)
C_loaded = tvm.ir.load_json(C_json)
- assert(isinstance(C_loaded, tvm.tensor.Tensor))
+ assert(isinstance(C_loaded, te.tensor.Tensor))
assert(str(C_loaded) == str(C))
def test_tensor_compute1():
dtype = 'float32'
def intrin_vadd(n):
- x = tvm.placeholder((n,))
- y = tvm.placeholder((n,))
- z = tvm.compute(x.shape, lambda i: x[i] + y[i])
+ x = te.placeholder((n,))
+ y = te.placeholder((n,))
+ z = te.compute(x.shape, lambda i: x[i] + y[i])
def intrin_func(ins, outs):
- ib = tvm.ir_builder.create()
- ib.emit(tvm.call_extern(outs[0].dtype, 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr')))
+ ib = tvm.tir.ir_builder.create()
+ ib.emit(tvm.tir.call_extern(outs[0].dtype, 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr')))
return ib.get()
- with tvm.build_config(offset_factor=n):
- return tvm.decl_tensor_intrin(z.op, intrin_func)
+ with tvm.target.build_config(offset_factor=n):
+ return te.decl_tensor_intrin(z.op, intrin_func)
vadd = intrin_vadd(factor)
- A = tvm.placeholder((m//factor, factor), name="A", dtype=dtype)
- B = tvm.placeholder((m//factor, factor), name="B", dtype=dtype)
- C = tvm.compute((m//factor, factor),
+ A = te.placeholder((m//factor, factor), name="A", dtype=dtype)
+ B = te.placeholder((m//factor, factor), name="B", dtype=dtype)
+ C = te.compute((m//factor, factor),
lambda i: vadd(A[i, 0:factor], B[i, 0:factor]))
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
stmt = tvm.lower(s, [A, B, C], simple_mode=True)
assert isinstance(stmt.body.body, tvm.tir.Evaluate)
dtype = 'float32'
def intrin_gemm(m, n, l):
- k = tvm.reduce_axis((0, l))
- x = tvm.placeholder((m, l))
- y = tvm.placeholder((n, l))
+ k = te.reduce_axis((0, l))
+ x = te.placeholder((m, l))
+ y = te.placeholder((n, l))
# in theory, no relation
- z = tvm.compute((m, n), lambda i, j: tvm.sum(x[i][k] * y[j][k], axis=k))
+ z = te.compute((m, n), lambda i, j: te.sum(x[i][k] * y[j][k], axis=k))
def intrin_func(ins, outs):
x_ptr = ins[0].access_ptr("r")
y_ptr = ins[1].access_ptr("r")
z_ptr = outs[0].access_ptr("w")
- body = tvm.call_packed(
+ body = tvm.tir.call_packed(
"gemv", x_ptr, y_ptr, z_ptr, m, n, l)
- reset = tvm.call_packed(
+ reset = tvm.tir.call_packed(
"fill_zero", z_ptr, m, n)
- update = tvm.call_packed(
+ update = tvm.tir.call_packed(
"gemv_add", x_ptr, y_ptr, z_ptr, m, n, l)
return body, reset, update
- with tvm.build_config(offset_factor=n):
- return tvm.decl_tensor_intrin(z.op, intrin_func)
+ with tvm.target.build_config(offset_factor=n):
+ return te.decl_tensor_intrin(z.op, intrin_func)
vgemm = intrin_gemm(factor1, factor2, factor)
- A = tvm.placeholder((M//factor1, L//factor, factor1, factor), name="A", dtype=dtype)
- B = tvm.placeholder((N//factor2, L//factor, factor2, factor), name="B", dtype=dtype)
- k = tvm.reduce_axis((0, L//factor), name='k')
- C = tvm.compute((M//factor1, N//factor2, factor1, factor2),
+ A = te.placeholder((M//factor1, L//factor, factor1, factor), name="A", dtype=dtype)
+ B = te.placeholder((N//factor2, L//factor, factor2, factor), name="B", dtype=dtype)
+ k = te.reduce_axis((0, L//factor), name='k')
+ C = te.compute((M//factor1, N//factor2, factor1, factor2),
lambda i, j: vgemm(A[i, k, 0:factor1, 0:factor], B[j, k, 0:factor2, 0:factor], reduce_axis=k))
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
stmt = tvm.lower(s, [A, B, C], simple_mode=True)
assert isinstance(stmt.body.body.body[0], tvm.tir.Evaluate)
assert isinstance(stmt.body.body.body[1].body, tvm.tir.Evaluate)
def test_tensor_scan():
- m = tvm.size_var("m")
- n = tvm.size_var("n")
- x = tvm.placeholder((m, n))
- s = tvm.placeholder((m, n))
- res = tvm.scan(tvm.compute((1, n), lambda _, i: x[0, i]),
- tvm.compute((m, n), lambda t, i: s[t-1, i] + x[t, i]),
+ m = te.size_var("m")
+ n = te.size_var("n")
+ x = te.placeholder((m, n))
+ s = te.placeholder((m, n))
+ res = tvm.te.scan(te.compute((1, n), lambda _, i: x[0, i]),
+ te.compute((m, n), lambda t, i: s[t-1, i] + x[t, i]),
s)
assert tuple(res.shape) == (m, n)
def test_scan_multi_out():
- m = tvm.size_var("m")
- n = tvm.size_var("n")
- x1 = tvm.placeholder((m, n))
- s1 = tvm.placeholder((m, n))
- x2 = tvm.placeholder((m, n))
- s2 = tvm.placeholder((m, n))
- s1_init = tvm.compute((1, n), lambda _, i: x1[0, i])
- s2_init = tvm.compute((1, n), lambda _, i: x2[0, i])
- s1_update = tvm.compute((m, n), lambda t, i: s1[t-1, i] + s2[t-1, i] + x1[t, i])
- s2_update = tvm.compute((m, n), lambda t, i: x2[t, i] + s2[t-1,i])
-
- r0, r1 = tvm.scan([s1_init, s2_init],
+ m = te.size_var("m")
+ n = te.size_var("n")
+ x1 = te.placeholder((m, n))
+ s1 = te.placeholder((m, n))
+ x2 = te.placeholder((m, n))
+ s2 = te.placeholder((m, n))
+ s1_init = te.compute((1, n), lambda _, i: x1[0, i])
+ s2_init = te.compute((1, n), lambda _, i: x2[0, i])
+ s1_update = te.compute((m, n), lambda t, i: s1[t-1, i] + s2[t-1, i] + x1[t, i])
+ s2_update = te.compute((m, n), lambda t, i: x2[t, i] + s2[t-1,i])
+
+ r0, r1 = tvm.te.scan([s1_init, s2_init],
[s1_update, s2_update],
[s1, s2])
assert(r0.value_index == 0)
assert(r1.value_index == 1)
json_str = tvm.ir.save_json(r0.op)
zz = tvm.ir.load_json(json_str)
- assert isinstance(zz, tvm.tensor.ScanOp)
+ assert isinstance(zz, tvm.te.ScanOp)
def test_extern():
- m = tvm.size_var('m')
- A = tvm.placeholder((m,), name='A')
+ m = te.size_var('m')
+ A = te.placeholder((m,), name='A')
def extern_func(ins, outs):
- assert(isinstance(ins[0], tvm.schedule.Buffer))
- return tvm.call_packed("myadd", ins[0].data, outs[0].data, m)
- B = tvm.extern((m,), [A], extern_func)
+ assert(isinstance(ins[0], tvm.te.schedule.Buffer))
+ return tvm.tir.call_packed("myadd", ins[0].data, outs[0].data, m)
+ B = te.extern((m,), [A], extern_func)
assert(tuple(B.shape) == (m,))
def test_extern_multi_out():
- m = tvm.size_var('m')
- A = tvm.placeholder((m,), name='A')
- B = tvm.compute((m,), lambda i: A[i] * 10)
+ m = te.size_var('m')
+ A = te.placeholder((m,), name='A')
+ B = te.compute((m,), lambda i: A[i] * 10)
def extern_func(ins, outs):
- assert(isinstance(ins[0], tvm.schedule.Buffer))
- return tvm.call_packed(
+ assert(isinstance(ins[0], tvm.te.schedule.Buffer))
+ return tvm.tir.call_packed(
"myadd", ins[0].data, outs[0].data, outs[1].data, m)
- res = tvm.extern([A.shape, A.shape], [A, B], extern_func)
+ res = te.extern([A.shape, A.shape], [A, B], extern_func)
assert(len(res) == 2)
assert(res[1].value_index == 1)
def test_tuple_inputs():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- A0 = tvm.placeholder((m, n), name='A0')
- A1 = tvm.placeholder((m, n), name='A1')
- T0, T1 = tvm.compute((m, n), lambda i, j: (A0[i, j] * 2, A1[i, j] * 3), name='T')
- s = tvm.create_schedule(T0.op)
+ m = te.size_var('m')
+ n = te.size_var('n')
+ A0 = te.placeholder((m, n), name='A0')
+ A1 = te.placeholder((m, n), name='A1')
+ T0, T1 = te.compute((m, n), lambda i, j: (A0[i, j] * 2, A1[i, j] * 3), name='T')
+ s = te.create_schedule(T0.op)
for i in range(len(T0.shape)):
assert(T0.shape[i] == T1.shape[i])
assert(T1.value_index == 1)
def test_tuple_with_different_deps():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- A0 = tvm.placeholder((m, n), name='A1')
- A1 = tvm.placeholder((m, n), name='A2')
- B0, B1 = tvm.compute((m, n), lambda i, j: (A0[i, j] * 2, A1[i, j] * 3), name='B')
- C = tvm.compute((m, n), lambda i, j: B0[i, j] + 4, name='C')
-
- s = tvm.create_schedule(C.op)
+ m = te.size_var('m')
+ n = te.size_var('n')
+ A0 = te.placeholder((m, n), name='A1')
+ A1 = te.placeholder((m, n), name='A2')
+ B0, B1 = te.compute((m, n), lambda i, j: (A0[i, j] * 2, A1[i, j] * 3), name='B')
+ C = te.compute((m, n), lambda i, j: B0[i, j] + 4, name='C')
+
+ s = te.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], factor=10)
s[B0.op].compute_at(s[C], xo)
sch = s.normalize()
- bounds = tvm.schedule.InferBound(sch)
- stmt = tvm.schedule.ScheduleOps(sch, bounds)
+ bounds = tvm.te.schedule.InferBound(sch)
+ stmt = tvm.te.schedule.ScheduleOps(sch, bounds)
def get_B1_realize(x):
if isinstance(x, tvm.tir.Realize) and \
x.func == B1.op and x.value_index == 1:
ret.append(x)
ret = []
- tvm.ir_pass.PostOrderVisit(stmt, get_B1_realize)
+ tvm.tir.ir_pass.PostOrderVisit(stmt, get_B1_realize)
assert stmt.node == C.op and len(ret) == 1
def test_tensor_inputs():
- x = tvm.placeholder((1,), name='x')
- y = tvm.compute(x.shape, lambda i: x[i] + x[i])
+ x = te.placeholder((1,), name='x')
+ y = te.compute(x.shape, lambda i: x[i] + x[i])
assert tuple(y.op.input_tensors) == (x,)
def test_tensor_pool():
def intrin_pool():
- A = tvm.placeholder((64, 16, 16), name='A')
- kh = tvm.reduce_axis((0, 3), name='kh')
- kw = tvm.reduce_axis((0, 3), name='kw')
- P = tvm.compute((64, 14, 14),
- lambda c, oh, ow: tvm.max(A[c, oh + kh, ow + kw],
+ A = te.placeholder((64, 16, 16), name='A')
+ kh = te.reduce_axis((0, 3), name='kh')
+ kw = te.reduce_axis((0, 3), name='kw')
+ P = te.compute((64, 14, 14),
+ lambda c, oh, ow: tvm.te.max(A[c, oh + kh, ow + kw],
axis=[kh, kw]),
name='p')
def intrin_func(ins, outs):
dinp = ins[0]
dout = outs[0]
- return tvm.call_packed("op", dinp, dout)
+ return tvm.tir.call_packed("op", dinp, dout)
- with tvm.build_config(offset_factor=1):
- return tvm.decl_tensor_intrin(P.op, intrin_func)
+ with tvm.target.build_config(offset_factor=1):
+ return te.decl_tensor_intrin(P.op, intrin_func)
- A = tvm.placeholder((1, 64, 16, 16), name='A')
+ A = te.placeholder((1, 64, 16, 16), name='A')
P = pool(data=A, kernel=(3, 3), stride=(1, 1), padding=(0, 0, 0, 0),
pool_type='max')
- s = tvm.create_schedule(P.op)
+ s = te.create_schedule(P.op)
_, oh, _, _ = P.op.axis
intrin = intrin_pool()
s[P].tensorize(oh, intrin)
# under the License.
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from topi.util import get_const_tuple
def test_operator_type_and_tags():
k = 1
- n = tvm.var('n')
- A = tvm.placeholder((), name='A')
- B = tvm.placeholder((10, 5), name='B')
+ n = te.var('n')
+ A = te.placeholder((), name='A')
+ B = te.placeholder((10, 5), name='B')
B1 = B[0]
B2 = B[0,0]
assert isinstance(k + n, tvm.tir.PrimExpr)
assert isinstance(n + n, tvm.tir.PrimExpr)
- assert isinstance(k + A, tvm.tensor.Tensor)
- assert isinstance(A + k, tvm.tensor.Tensor)
- assert isinstance(n + A, tvm.tensor.Tensor)
- assert isinstance(A + n, tvm.tensor.Tensor)
- assert isinstance(A + A, tvm.tensor.Tensor)
-
- assert isinstance(k + B, tvm.tensor.Tensor)
- assert isinstance(B + k, tvm.tensor.Tensor)
- assert isinstance(n + B, tvm.tensor.Tensor)
- assert isinstance(B + n, tvm.tensor.Tensor)
- assert isinstance(A + B, tvm.tensor.Tensor)
- assert isinstance(B + A, tvm.tensor.Tensor)
- assert isinstance(B + B, tvm.tensor.Tensor)
+ assert isinstance(k + A, te.tensor.Tensor)
+ assert isinstance(A + k, te.tensor.Tensor)
+ assert isinstance(n + A, te.tensor.Tensor)
+ assert isinstance(A + n, te.tensor.Tensor)
+ assert isinstance(A + A, te.tensor.Tensor)
+
+ assert isinstance(k + B, te.tensor.Tensor)
+ assert isinstance(B + k, te.tensor.Tensor)
+ assert isinstance(n + B, te.tensor.Tensor)
+ assert isinstance(B + n, te.tensor.Tensor)
+ assert isinstance(A + B, te.tensor.Tensor)
+ assert isinstance(B + A, te.tensor.Tensor)
+ assert isinstance(B + B, te.tensor.Tensor)
assert (k + B).op.tag == topi.tag.ELEMWISE
assert (B + k).op.tag == topi.tag.ELEMWISE
assert isinstance(n + B2, tvm.tir.PrimExpr)
assert isinstance(B2 + n, tvm.tir.PrimExpr)
assert isinstance(B2 + B2, tvm.tir.PrimExpr)
- assert isinstance(B2 + A, tvm.tensor.Tensor)
- assert isinstance(A + B2, tvm.tensor.Tensor)
- assert isinstance(B2 + B, tvm.tensor.Tensor)
- assert isinstance(B + B2, tvm.tensor.Tensor)
+ assert isinstance(B2 + A, te.tensor.Tensor)
+ assert isinstance(A + B2, te.tensor.Tensor)
+ assert isinstance(B2 + B, te.tensor.Tensor)
+ assert isinstance(B + B2, te.tensor.Tensor)
def test_combination():
k = 3
n = 5
m = 10
- x = tvm.var('x')
- A = tvm.placeholder((n, m), name='A')
- B = tvm.placeholder((n, m), name='B')
- C = tvm.placeholder((n, m), name='C')
+ x = te.var('x')
+ A = te.placeholder((n, m), name='A')
+ B = te.placeholder((n, m), name='B')
+ C = te.placeholder((n, m), name='C')
D = k + A - B * C + x
- s = tvm.create_schedule(D.op)
+ s = te.create_schedule(D.op)
foo = tvm.build(s, [x, A, B, C, D], "llvm")
ctx = tvm.cpu(0)
x = 2
def verify_tensor_scalar_bop(shape, typ="add"):
"""Verify non-constant Tensor and scalar binary operations."""
- sh = [tvm.size_var('n%d' % i) for i in range(0, len(shape))]
- k = tvm.var('k')
- A = tvm.placeholder(sh, name='A')
+ sh = [te.size_var('n%d' % i) for i in range(0, len(shape))]
+ k = te.var('k')
+ A = te.placeholder(sh, name='A')
if typ == "add":
B = A + k
elif typ == "sub":
def verify_broadcast_bop(lhs_shape, rhs_shape, typ="add"):
- A = tvm.placeholder(shape=lhs_shape, name="A")
- B = tvm.placeholder(shape=rhs_shape, name="B")
+ A = te.placeholder(shape=lhs_shape, name="A")
+ B = te.placeholder(shape=rhs_shape, name="B")
if typ == "add":
C = A + B
elif typ == "sub":
k = 10.0
dilation = (1, 1)
with tvm.target.create(device):
- A = tvm.placeholder((batch, in_channel, in_size, in_size), name='A')
- W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W')
+ A = te.placeholder((batch, in_channel, in_size, in_size), name='A')
+ W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W')
B = conv2d_nchw(A, W, stride, padding, dilation, A.dtype)
if typ == "add":
C = B + k
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_verify_compute():
- n = tvm.size_var("n")
- m = tvm.size_var("m")
- A = tvm.placeholder((n, m), name='A')
- k = tvm.reduce_axis((0, m), "k")
- k_ = tvm.reduce_axis((0, m-1), "k_")
- f1 = lambda i: tvm.sum(A[i, k], axis=k)
+ n = te.size_var("n")
+ m = te.size_var("m")
+ A = te.placeholder((n, m), name='A')
+ k = te.reduce_axis((0, m), "k")
+ k_ = te.reduce_axis((0, m-1), "k_")
+ f1 = lambda i: te.sum(A[i, k], axis=k)
f2 = lambda i: A[i,0] + 1
- f3 = lambda i: tvm.sum(A[i, k], axis=k) + 1
- f4 = lambda i: A[i,0] * (tvm.sum(A[i, k], axis=k) + 1)
- f5 = lambda i: (tvm.sum(A[i, k], axis=k), A[i,0] + 1)
- f6 = lambda i: (tvm.sum(A[i, k], axis=k), tvm.sum(A[i, k_], axis=k_))
+ f3 = lambda i: te.sum(A[i, k], axis=k) + 1
+ f4 = lambda i: A[i,0] * (te.sum(A[i, k], axis=k) + 1)
+ f5 = lambda i: (te.sum(A[i, k], axis=k), A[i,0] + 1)
+ f6 = lambda i: (te.sum(A[i, k], axis=k), te.sum(A[i, k_], axis=k_))
#
# Valid compute
try:
- B = tvm.compute((n,), f1, name="B")
+ B = te.compute((n,), f1, name="B")
except tvm._ffi.base.TVMError as ex:
assert False
#
# Valid compute
try:
- B = tvm.compute((n,), f2, name="B")
+ B = te.compute((n,), f2, name="B")
except tvm._ffi.base.TVMError as ex:
assert False
#
# Invalid compute with non top level reduction
try:
- B = tvm.compute((n,), f3, name="B")
+ B = te.compute((n,), f3, name="B")
assert False
except tvm._ffi.base.TVMError as ex:
pass
#
# Invalid compute with non top level reduction
try:
- B = tvm.compute((n,), f4, name="B")
+ B = te.compute((n,), f4, name="B")
assert False
except tvm._ffi.base.TVMError as ex:
pass
#
# Invalid compute with reduction and non-reduction batch ops
try:
- B0, B1 = tvm.compute((n,), f5, name="B")
+ B0, B1 = te.compute((n,), f5, name="B")
assert False
except tvm._ffi.base.TVMError as ex:
pass
#
# Invalid compute with unequal batch reduction ops
try:
- B0, B1 = tvm.compute((n,), f6, name="B")
+ B0, B1 = te.compute((n,), f6, name="B")
assert False
except tvm._ffi.base.TVMError as ex:
pass
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_attrs_equal():
x = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3, 4))
y = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3, 4))
z = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3,4,1))
- assert tvm.ir_pass.AttrsEqual(x, y)
- assert not tvm.ir_pass.AttrsEqual(x, z)
+ assert tvm.tir.ir_pass.AttrsEqual(x, y)
+ assert not tvm.tir.ir_pass.AttrsEqual(x, z)
dattr = tvm.ir.make_node("DictAttrs", x=1, y=10, name="xyz", padding=(0,0))
- assert not tvm.ir_pass.AttrsEqual(dattr, x)
+ assert not tvm.tir.ir_pass.AttrsEqual(dattr, x)
dattr2 = tvm.ir.make_node("DictAttrs", x=1, y=10, name="xyz", padding=(0,0))
- assert tvm.ir_pass.AttrsEqual(dattr, dattr2)
+ assert tvm.tir.ir_pass.AttrsEqual(dattr, dattr2)
- assert tvm.ir_pass.AttrsEqual({"x": x}, {"x": y})
+ assert tvm.tir.ir_pass.AttrsEqual({"x": x}, {"x": y})
# array related checks
- assert tvm.ir_pass.AttrsEqual({"x": [x, x]}, {"x": [y, x]})
- assert not tvm.ir_pass.AttrsEqual({"x": [x, 1]}, {"x": [y, 2]})
+ assert tvm.tir.ir_pass.AttrsEqual({"x": [x, x]}, {"x": [y, x]})
+ assert not tvm.tir.ir_pass.AttrsEqual({"x": [x, 1]}, {"x": [y, 2]})
- n = tvm.var("n")
- assert tvm.ir_pass.AttrsEqual({"x": n+1}, {"x": n+1})
+ n = te.var("n")
+ assert tvm.tir.ir_pass.AttrsEqual({"x": n+1}, {"x": n+1})
def test_attrs_hash():
- fhash = tvm.ir_pass.AttrsHash
+ fhash = tvm.tir.ir_pass.AttrsHash
x = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3, 4))
y = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3, 4))
assert fhash({"x": x}) == fhash({"x": y})
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_simplify():
- tdiv = tvm.truncdiv
- tmod = tvm.truncmod
- x = tvm.var('x')
- e1 = tvm.ir_pass.Simplify(x + 2 + 1)
- assert(tvm.ir_pass.Equal(e1, x + 3))
- e2 = tvm.ir_pass.Simplify(x * 3 + 5 * x)
- assert(tvm.ir_pass.Equal(e2, x * 8))
- e3 = tvm.ir_pass.Simplify(x - tdiv(x, 3) * 3)
- assert(tvm.ir_pass.Equal(e3, tmod(x, 3)))
+ tdiv = tvm.tir.truncdiv
+ tmod = tvm.tir.truncmod
+ x = te.var('x')
+ e1 = tvm.tir.ir_pass.Simplify(x + 2 + 1)
+ assert(tvm.tir.ir_pass.Equal(e1, x + 3))
+ e2 = tvm.tir.ir_pass.Simplify(x * 3 + 5 * x)
+ assert(tvm.tir.ir_pass.Equal(e2, x * 8))
+ e3 = tvm.tir.ir_pass.Simplify(x - tdiv(x, 3) * 3)
+ assert(tvm.tir.ir_pass.Equal(e3, tmod(x, 3)))
def test_verify_ssa():
- x = tvm.var('x')
- y = tvm.var()
+ x = te.var('x')
+ y = te.var()
z = tvm.tir.Evaluate(x + y)
- assert(tvm.ir_pass.VerifySSA(z))
+ assert(tvm.tir.ir_pass.VerifySSA(z))
def test_convert_ssa():
- x = tvm.var('x')
- y = tvm.var()
+ x = te.var('x')
+ y = te.var()
let1 = tvm.tir.Let(x, 1, x + 1)
let2 = tvm.tir.Let(x, 1, x + y)
z = tvm.tir.Evaluate(let1 + let2)
- assert(not tvm.ir_pass.VerifySSA(z))
- z_ssa = tvm.ir_pass.ConvertSSA(z)
- assert(tvm.ir_pass.VerifySSA(z_ssa))
+ assert(not tvm.tir.ir_pass.VerifySSA(z))
+ z_ssa = tvm.tir.ir_pass.ConvertSSA(z)
+ assert(tvm.tir.ir_pass.VerifySSA(z_ssa))
def test_expr_use_var():
- x = tvm.var('x')
- assert(tvm.ir_pass.ExprUseVar(x+1, x))
- assert(not tvm.ir_pass.ExprUseVar(1+10, x))
+ x = te.var('x')
+ assert(tvm.tir.ir_pass.ExprUseVar(x+1, x))
+ assert(not tvm.tir.ir_pass.ExprUseVar(1+10, x))
if __name__ == "__main__":
# under the License.
import pytest
import tvm
+from tvm import te
import numpy as np
def collect_visit(stmt, f):
ret = []
- tvm.ir_pass.PostOrderVisit(stmt, lambda x: ret.append(f(x)))
+ tvm.tir.ir_pass.PostOrderVisit(stmt, lambda x: ret.append(f(x)))
return ret
def lower(sch, args):
binds = {}
arg_list = []
for x in args:
- if isinstance(x, tvm.tensor.Tensor):
- buf = tvm.decl_buffer(x.shape, dtype=x.dtype, name=x.name)
+ if isinstance(x, te.tensor.Tensor):
+ buf = tvm.tir.decl_buffer(x.shape, dtype=x.dtype, name=x.name)
assert x not in binds
binds[x] = buf
arg_list.append(buf)
else:
raise ValueError("args must be Tensor, Buffer or Var")
sch = sch.normalize()
- bounds = tvm.schedule.InferBound(sch)
- stmt = tvm.schedule.ScheduleOps(sch, bounds)
- stmt = tvm.ir_pass.LoopPartition(stmt, True)
- stmt = tvm.ir_pass.RemoveNoOp(stmt)
- stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 64, True)
- stmt = tvm.ir_pass.CanonicalSimplify(stmt)
- stmt = tvm.ir_pass.VectorizeLoop(stmt)
- stmt = tvm.ir_pass.Simplify(stmt)
+ bounds = tvm.te.schedule.InferBound(sch)
+ stmt = tvm.te.schedule.ScheduleOps(sch, bounds)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+ stmt = tvm.tir.ir_pass.RemoveNoOp(stmt)
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, binds, 64, True)
+ stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+ stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
return stmt
@pytest.mark.xfail
def test_out_of_bounds_llvm(index_a, index_b):
- n = tvm.size_var("n")
- A = tvm.placeholder ((n,), name='A')
- B = tvm.placeholder ((n,), name='B')
- C = tvm.compute(A.shape, lambda i: A[i + index_a] + B[i + index_b], name='C')
- s = tvm.create_schedule (C.op)
+ n = te.size_var("n")
+ A = te.placeholder ((n,), name='A')
+ B = te.placeholder ((n,), name='B')
+ C = te.compute(A.shape, lambda i: A[i + index_a] + B[i + index_b], name='C')
+ s = te.create_schedule (C.op)
tgt = "llvm"
tgt_host = "llvm"
stmt = tvm.lower (s, [A, B, C], simple_mode=True)
fadd (a, b, c)
def test_in_bounds_llvm():
- n = tvm.size_var("n")
- A = tvm.placeholder ((n,), name='A')
- B = tvm.placeholder ((n,), name='B')
- C = tvm.compute(A.shape, lambda i: A[i] + B[i], name='C')
- s = tvm.create_schedule (C.op)
+ n = te.size_var("n")
+ A = te.placeholder ((n,), name='A')
+ B = te.placeholder ((n,), name='B')
+ C = te.compute(A.shape, lambda i: A[i] + B[i], name='C')
+ s = te.create_schedule (C.op)
tgt = "llvm"
tgt_host = "llvm"
stmt = tvm.lower (s, [A, B, C], simple_mode=True)
@pytest.mark.xfail
def test_out_of_bounds_vectorize_llvm(nn, index_a, index_b):
- n = tvm.convert(nn)
- a = tvm.placeholder((n), name='a')
- b = tvm.placeholder((n), name='b')
- c = tvm.compute((n,), lambda i: a[i + index_a] + b[i + index_b], name='c')
- s = tvm.create_schedule(c.op)
+ n = tvm.runtime.convert(nn)
+ a = te.placeholder((n), name='a')
+ b = te.placeholder((n), name='b')
+ c = te.compute((n,), lambda i: a[i + index_a] + b[i + index_b], name='c')
+ s = te.create_schedule(c.op)
xo, xi = s[c].split(c.op.axis[0], factor=8)
s[c].parallel(xo)
s[c].vectorize(xi)
def test_in_bounds_vectorize_llvm():
n = 512
lanes = 2
- A = tvm.placeholder((n,), name='A', dtype="float32x%d" % lanes)
- B = tvm.compute((n,), lambda i: A[i], name='B')
- C = tvm.compute((n,), lambda i: B[i] + tvm.const(1, A.dtype), name='C')
- s = tvm.create_schedule(C.op)
+ A = te.placeholder((n,), name='A', dtype="float32x%d" % lanes)
+ B = te.compute((n,), lambda i: A[i], name='B')
+ C = te.compute((n,), lambda i: B[i] + tvm.tir.const(1, A.dtype), name='C')
+ s = te.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], nparts=2)
_, xi = s[C].split(xi, factor=2)
s[C].parallel(xo)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1)
def test_in_bounds_loop_partition_basic_llvm():
- n = tvm.size_var('n')
- A = tvm.placeholder((n, ), name='A')
- B = tvm.placeholder((n, ), name='B')
+ n = te.size_var('n')
+ A = te.placeholder((n, ), name='A')
+ B = te.placeholder((n, ), name='B')
- T = tvm.compute((n, ), lambda i: A[i]+B[i])
- s = tvm.create_schedule(T.op)
+ T = te.compute((n, ), lambda i: A[i]+B[i])
+ s = te.create_schedule(T.op)
xo, xi = s[T].split(T.op.axis[0], factor=4)
lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False)
print (lowered_func.body)
@pytest.mark.xfail
def test_out_of_bounds_loop_partition_basic_llvm(index_a, index_b):
- n = tvm.size_var('n')
- A = tvm.placeholder((n, ), name='A')
- B = tvm.placeholder((n, ), name='B')
+ n = te.size_var('n')
+ A = te.placeholder((n, ), name='A')
+ B = te.placeholder((n, ), name='B')
- T = tvm.compute((n, ), lambda i: A[i + index_a]+B[i + index_b])
- s = tvm.create_schedule(T.op)
+ T = te.compute((n, ), lambda i: A[i + index_a]+B[i + index_b])
+ s = te.create_schedule(T.op)
xo, xi = s[T].split(T.op.axis[0], factor=4)
lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False)
print (lowered_func.body)
branch_collector.append(x)
n = 21
- A = tvm.placeholder((n, ), name='A')
- B = tvm.placeholder((n, ), name='B')
+ A = te.placeholder((n, ), name='A')
+ B = te.placeholder((n, ), name='B')
- T = tvm.compute((n, ), lambda i: A[i]+B[i])
- s = tvm.create_schedule(T.op)
+ T = te.compute((n, ), lambda i: A[i]+B[i])
+ s = te.create_schedule(T.op)
xo, xi = s[T].split(T.op.axis[0], factor=4)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
stmt = lower (s, [A, B, T])
# num_attributes = num_buffers * num_splits = 2 * 3
# before instrumentation
assert_bound_instrumentation(stmt, check_attr_stmt, 2 * 3)
assert_bound_instrumentation(stmt, check_branch_stmt, 0)
- stmt = tvm.ir_pass.InstrumentBoundCheckers(stmt)
+ stmt = tvm.tir.ir_pass.InstrumentBoundCheckers(stmt)
# after instrumentation
assert_bound_instrumentation(stmt, check_attr_stmt, 2 * 3)
assert_bound_instrumentation(stmt, check_branch_stmt, 2)
print (branch_collector[1].condition)
def test_in_bounds_const_loop_partition_llvm():
- with tvm.build_config(instrument_bound_checkers=True, partition_const_loop=True):
+ with tvm.target.build_config(instrument_bound_checkers=True, partition_const_loop=True):
n = 21
- A = tvm.placeholder((n, ), name='A')
- B = tvm.placeholder((n, ), name='B')
+ A = te.placeholder((n, ), name='A')
+ B = te.placeholder((n, ), name='B')
- T = tvm.compute((n, ), lambda i: A[i]+B[i])
- s = tvm.create_schedule(T.op)
+ T = te.compute((n, ), lambda i: A[i]+B[i])
+ s = te.create_schedule(T.op)
xo, xi = s[T].split(T.op.axis[0], factor=4)
lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False)
print (lowered_func.body)
@pytest.mark.xfail
def test_out_of_bounds_const_loop_partition_llvm(index_a, index_b):
- with tvm.build_config(instrument_bound_checkers=True, partition_const_loop=True):
+ with tvm.target.build_config(instrument_bound_checkers=True, partition_const_loop=True):
n = 21
- A = tvm.placeholder((n, ), name='A')
- B = tvm.placeholder((n, ), name='B')
+ A = te.placeholder((n, ), name='A')
+ B = te.placeholder((n, ), name='B')
- T = tvm.compute((n, ), lambda i: A[i + index_a]+B[i + index_b])
- s = tvm.create_schedule(T.op)
+ T = te.compute((n, ), lambda i: A[i + index_a]+B[i + index_b])
+ s = te.create_schedule(T.op)
xo, xi = s[T].split(T.op.axis[0], factor=4)
lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False)
print (lowered_func.body)
batch_size = 1
in_height = in_width = 64
out_height = out_width = in_height - kernel_height + 1
- data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data')
- kernel = tvm.placeholder((kernel_height, kernel_width, in_channel,
+ data = te.placeholder((batch_size, in_channel, in_height, in_width), name='data')
+ kernel = te.placeholder((kernel_height, kernel_width, in_channel,
out_channel), name='kernel')
- ic = tvm.reduce_axis((0, in_channel), name='ic')
- kh = tvm.reduce_axis((0, kernel_height), name='kh')
- kw = tvm.reduce_axis((0, kernel_width), name='kw')
- conv = tvm.compute((batch_size, out_channel, out_height, out_width),
- lambda n, oc, oh, ow: tvm.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] *
+ ic = te.reduce_axis((0, in_channel), name='ic')
+ kh = te.reduce_axis((0, kernel_height), name='kh')
+ kw = te.reduce_axis((0, kernel_width), name='kw')
+ conv = te.compute((batch_size, out_channel, out_height, out_width),
+ lambda n, oc, oh, ow: te.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] *
kernel[kh, kw, ic, oc],
axis=[ic, kh, kw]),
name="conv2d")
- s = tvm.create_schedule(conv.op)
+ s = te.create_schedule(conv.op)
n, oc, oh, ow = conv.op.axis
if loop_tiling:
f = tvm.build(s, [data, kernel, conv], "llvm")
data_input = tvm.nd.array(np.random.uniform(
- size=(batch_size, in_channel, in_height, in_width)).astype(tvm.float32), ctx)
+ size=(batch_size, in_channel, in_height, in_width)).astype("float32"), ctx)
kernel_input = tvm.nd.array(np.random.uniform(
- size=(kernel_height, kernel_width, in_channel, out_channel)).astype(tvm.float32), ctx)
- conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), tvm.float32, ctx)
+ size=(kernel_height, kernel_width, in_channel, out_channel)).astype("float32"), ctx)
+ conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), "float32", ctx)
f(data_input, kernel_input, conv_out)
@pytest.mark.xfail
batch_size = 1
in_height = in_width = 64
out_height = out_width = in_height - kernel_height + 1
- data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data')
- kernel = tvm.placeholder((kernel_height, kernel_width, in_channel,
+ data = te.placeholder((batch_size, in_channel, in_height, in_width), name='data')
+ kernel = te.placeholder((kernel_height, kernel_width, in_channel,
out_channel), name='kernel')
- ic = tvm.reduce_axis((0, in_channel), name='ic')
- kh = tvm.reduce_axis((0, kernel_height), name='kh')
- kw = tvm.reduce_axis((0, kernel_width), name='kw')
- conv = tvm.compute((batch_size, out_channel, out_height, out_width),
- lambda n, oc, oh, ow: tvm.sum(data[n + data_offsets[0],
+ ic = te.reduce_axis((0, in_channel), name='ic')
+ kh = te.reduce_axis((0, kernel_height), name='kh')
+ kw = te.reduce_axis((0, kernel_width), name='kw')
+ conv = te.compute((batch_size, out_channel, out_height, out_width),
+ lambda n, oc, oh, ow: te.sum(data[n + data_offsets[0],
ic + data_offsets[1],
oh*HSTR + kh + data_offsets[2],
ow*WSTR + kw + data_offsets[3]]
oc + kernel_offsets[3]],
axis=[ic, kh, kw]),
name="conv2d")
- s = tvm.create_schedule(conv.op)
+ s = te.create_schedule(conv.op)
n, oc, oh, ow = conv.op.axis
if loop_tiling:
f = tvm.build(s, [data, kernel, conv], "llvm")
data_input = tvm.nd.array(np.random.uniform(
- size=(batch_size, in_channel, in_height, in_width)).astype(tvm.float32), ctx)
+ size=(batch_size, in_channel, in_height, in_width)).astype("float32"), ctx)
kernel_input = tvm.nd.array(np.random.uniform(
- size=(kernel_height, kernel_width, in_channel, out_channel)).astype(tvm.float32), ctx)
- conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), tvm.float32, ctx)
+ size=(kernel_height, kernel_width, in_channel, out_channel)).astype("float32"), ctx)
+ conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), "float32", ctx)
f(data_input, kernel_input, conv_out)
def test_in_bounds_tensors_with_same_shapes1D_llvm():
- n = tvm.size_var('n')
- k = tvm.size_var('k')
- m = tvm.size_var('m')
- A = tvm.placeholder((n, ), name='A')
- B = tvm.placeholder((k, ), name='B')
-
- T = tvm.compute((m, ), lambda i: A[i]*B[i])
- s = tvm.create_schedule(T.op)
+ n = te.size_var('n')
+ k = te.size_var('k')
+ m = te.size_var('m')
+ A = te.placeholder((n, ), name='A')
+ B = te.placeholder((k, ), name='B')
+
+ T = te.compute((m, ), lambda i: A[i]*B[i])
+ s = te.create_schedule(T.op)
lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False)
print (lowered_func.body)
ctx = tvm.cpu(0)
@pytest.mark.xfail
def test_out_of_bounds_tensors_with_diff_shapes1D_llvm(a_shape, b_shape, c_shape):
- n = tvm.size_var('n')
- k = tvm.size_var('k')
- m = tvm.size_var('m')
- A = tvm.placeholder((n, ), name='A')
- B = tvm.placeholder((k, ), name='B')
-
- T = tvm.compute((m, ), lambda i: A[i]*B[i])
- s = tvm.create_schedule(T.op)
+ n = te.size_var('n')
+ k = te.size_var('k')
+ m = te.size_var('m')
+ A = te.placeholder((n, ), name='A')
+ B = te.placeholder((k, ), name='B')
+
+ T = te.compute((m, ), lambda i: A[i]*B[i])
+ s = te.create_schedule(T.op)
lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False)
print (lowered_func.body)
ctx = tvm.cpu(0)
f(a, b, t)
def test_in_bounds_tensors_with_same_shapes2D_llvm():
- n = tvm.size_var('n')
- k = tvm.size_var('k')
- m = tvm.size_var('m')
- A = tvm.placeholder((n, n), name='A')
- B = tvm.placeholder((k, k), name='B')
-
- T = tvm.compute((m, m), lambda i, j: A[i][j]*B[i][j])
- s = tvm.create_schedule(T.op)
+ n = te.size_var('n')
+ k = te.size_var('k')
+ m = te.size_var('m')
+ A = te.placeholder((n, n), name='A')
+ B = te.placeholder((k, k), name='B')
+
+ T = te.compute((m, m), lambda i, j: A[i][j]*B[i][j])
+ s = te.create_schedule(T.op)
lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False)
print (lowered_func.body)
ctx = tvm.cpu(0)
@pytest.mark.xfail
def test_out_of_bounds_tensors_with_diff_shapes2D_llvm(a_shape, b_shape, c_shape):
- n = tvm.size_var('n')
- k = tvm.size_var('k')
- m = tvm.size_var('m')
- A = tvm.placeholder((n, n), name='A')
- B = tvm.placeholder((k, k), name='B')
-
- T = tvm.compute((m, m), lambda i, j: A[i][j]*B[i][j])
- s = tvm.create_schedule(T.op)
+ n = te.size_var('n')
+ k = te.size_var('k')
+ m = te.size_var('m')
+ A = te.placeholder((n, n), name='A')
+ B = te.placeholder((k, k), name='B')
+
+ T = te.compute((m, m), lambda i, j: A[i][j]*B[i][j])
+ s = te.create_schedule(T.op)
lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False)
print (lowered_func.body)
ctx = tvm.cpu(0)
f(a, b, t)
def test_in_bounds_tensors_with_same_shapes3D_llvm():
- n = tvm.size_var('n')
- k = tvm.size_var('k')
- m = tvm.size_var('m')
- A = tvm.placeholder((n, n, n), name='A')
- B = tvm.placeholder((k, k, k), name='B')
-
- T = tvm.compute((m, m, m), lambda i, j, p: A[i][j][p]*B[i][j][p])
- s = tvm.create_schedule(T.op)
+ n = te.size_var('n')
+ k = te.size_var('k')
+ m = te.size_var('m')
+ A = te.placeholder((n, n, n), name='A')
+ B = te.placeholder((k, k, k), name='B')
+
+ T = te.compute((m, m, m), lambda i, j, p: A[i][j][p]*B[i][j][p])
+ s = te.create_schedule(T.op)
lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False)
print (lowered_func.body)
ctx = tvm.cpu(0)
@pytest.mark.xfail
def test_out_of_bounds_tensors_with_diff_shapes3D_llvm(a_shape, b_shape, c_shape):
- n = tvm.size_var('n')
- k = tvm.size_var('k')
- m = tvm.size_var('m')
- A = tvm.placeholder((n, n, n), name='A')
- B = tvm.placeholder((k, k, k), name='B')
-
- T = tvm.compute((m, m, m), lambda i, j, p: A[i][j][p]*B[i][j][p])
- s = tvm.create_schedule(T.op)
+ n = te.size_var('n')
+ k = te.size_var('k')
+ m = te.size_var('m')
+ A = te.placeholder((n, n, n), name='A')
+ B = te.placeholder((k, k, k), name='B')
+
+ T = te.compute((m, m, m), lambda i, j, p: A[i][j][p]*B[i][j][p])
+ s = te.create_schedule(T.op)
lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False)
print (lowered_func.body)
ctx = tvm.cpu(0)
if not tvm.runtime.enabled("llvm"):
return
n = 64
- A = tvm.placeholder((n, ), name='A')
- scale = tvm.placeholder((), name='scale')
- k = tvm.reduce_axis((0, n), name="k")
- C = tvm.compute((), lambda : tvm.sum(A[k + k + k] * scale, axis=k), name="C")
- D = tvm.compute((), lambda : C + 1)
- s = tvm.create_schedule(D.op)
+ A = te.placeholder((n, ), name='A')
+ scale = te.placeholder((), name='scale')
+ k = te.reduce_axis((0, n), name="k")
+ C = te.compute((), lambda : te.sum(A[k + k + k] * scale, axis=k), name="C")
+ D = te.compute((), lambda : C + 1)
+ s = te.create_schedule(D.op)
stmt = tvm.lower (s, [A, scale, D], simple_mode=True)
print (stmt)
# build and invoke the kernel.
tvm.testing.assert_allclose(d.asnumpy(), d_np)
if __name__ == "__main__":
- with tvm.build_config(instrument_bound_checkers=True):
+ with tvm.target.build_config(instrument_bound_checkers=True):
# zero scale
test_out_of_bounds_tensors_with_zero_shape_op_with_not_zero_shape_llvm()
# in bound
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_for():
- dev_type = tvm.var("dev_type")
+ dev_type = te.var("dev_type")
def device_context(dev_id):
- ctx = tvm.call_extern("handle", "device_context", dev_type, dev_id)
+ ctx = tvm.tir.call_extern("handle", "device_context", dev_type, dev_id)
return tvm.tir.Call(
"handle", "tvm_thread_context", [ctx], tvm.tir.Call.Intrinsic, None, 0)
- ib = tvm.ir_builder.create()
- n = tvm.var("n")
+ ib = tvm.tir.ir_builder.create()
+ n = te.var("n")
A = ib.allocate("float32", n, name="A", scope="global")
with ib.for_range(0, n, name="i") as i:
- ib.emit(tvm.call_extern
+ ib.emit(tvm.tir.call_extern
("int32", "fadd", device_context(0), A))
with ib.for_range(0, 10, name="j") as j:
- ib.emit(tvm.call_extern
+ ib.emit(tvm.tir.call_extern
("int32", "fadd", device_context(1), A))
- ib.emit(tvm.call_extern
+ ib.emit(tvm.tir.call_extern
("int32", "fadd", device_context(0), A))
body = ib.get()
- f = tvm.ir_pass.MakeAPI(body, "func", [dev_type, n], 2, True)
- f = tvm.ir_pass.CombineContextCall(f)
+ f = tvm.tir.ir_pass.MakeAPI(body, "func", [dev_type, n], 2, True)
+ f = tvm.tir.ir_pass.CombineContextCall(f)
assert f.body.value.dtype == "handle"
assert f.body.body.value.dtype == "handle"
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_decorate_device():
- m = tvm.size_var('m')
- l = tvm.size_var('l')
- A = tvm.placeholder((m, l), name='A')
+ m = te.size_var('m')
+ l = te.size_var('l')
+ A = te.placeholder((m, l), name='A')
- A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
- A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
+ A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1')
+ A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
- s = tvm.create_schedule(A2.op)
+ s = te.create_schedule(A2.op)
xo, xi = s[A2].split(A2.op.axis[0], factor=8)
s[A1].compute_at(s[A2], xo)
s[A1].set_scope("shared")
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- stmt1 = tvm.ir_pass.Simplify(stmt)
- stmt2 = tvm.ir_pass.DecorateDeviceScope(stmt1)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ stmt1 = tvm.tir.ir_pass.Simplify(stmt)
+ stmt2 = tvm.tir.ir_pass.DecorateDeviceScope(stmt1)
assert isinstance(stmt2, tvm.tir.AttrStmt)
assert stmt2.attr_key == "device_scope"
assert stmt1 == stmt2.body
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_equal_expr():
- x = tvm.var('x')
- y = tvm.var('y')
+ x = te.var('x')
+ y = te.var('y')
def func1():
return x + y + 1
def func2():
- return tvm.exp(tvm.truncdiv((x + y + 1) * y, 4))
+ return te.exp(tvm.tir.truncdiv((x + y + 1) * y, 4))
- assert tvm.ir_pass.Equal(func1(), func1())
- assert tvm.ir_pass.Equal(func2(), func2())
- assert not tvm.ir_pass.Equal(func2(), func1())
+ assert tvm.tir.ir_pass.Equal(func1(), func1())
+ assert tvm.tir.ir_pass.Equal(func2(), func2())
+ assert not tvm.tir.ir_pass.Equal(func2(), func1())
def test_equal_compute():
- x = tvm.var('x')
- y = tvm.var('y')
+ x = te.var('x')
+ y = te.var('y')
n = 128
- A = tvm.placeholder((n, n), name='A')
- B = tvm.placeholder((n, n), name='B')
- ii = tvm.var('i')
- jj = tvm.var('j')
+ A = te.placeholder((n, n), name='A')
+ B = te.placeholder((n, n), name='B')
+ ii = te.var('i')
+ jj = te.var('j')
def func1():
- k = tvm.reduce_axis((0, n), name='k')
- return tvm.sum(A[ii, k] * B[jj, k], axis=k)
+ k = te.reduce_axis((0, n), name='k')
+ return te.sum(A[ii, k] * B[jj, k], axis=k)
- Ab = tvm.decl_buffer((n,), name='A')
- n = tvm.var("n")
+ Ab = tvm.tir.decl_buffer((n,), name='A')
+ n = te.var("n")
def func2():
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
A = ib.buffer_ptr(Ab)
with ib.for_range(0, n, name="i") as i:
A[i] = A[i] + 1
A[j] = A[j] + 2
return ib.get()
- assert tvm.ir_pass.Equal(func1(), func1())
- assert tvm.ir_pass.Equal(func2(), func2())
+ assert tvm.tir.ir_pass.Equal(func1(), func1())
+ assert tvm.tir.ir_pass.Equal(func2(), func2())
if __name__ == "__main__":
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
var_list = []
key = op
if isinstance(op, tvm.tir.IfThenElse):
global var_list
- tvm.ir_pass.PostOrderVisit(op.condition, _extract_vars)
+ tvm.tir.ir_pass.PostOrderVisit(op.condition, _extract_vars)
val = [(op.then_case, op.else_case), ("IfThenElse", tuple(var_list))]
var_list.clear()
elif isinstance(op, tvm.tir.For):
return
node_dict[key] = val
- tvm.ir_pass.PostOrderVisit(stmt, _visit)
+ tvm.tir.ir_pass.PostOrderVisit(stmt, _visit)
for key, val in node_dict.items():
struct[val[1]] = tuple(node_dict[child][1] if child in node_dict
else None for child in val[0])
var_list.clear()
def test_basic():
- ib = tvm.ir_builder.create()
- l = tvm.var('l')
- m = tvm.var('m')
- n = tvm.var('n')
+ ib = tvm.tir.ir_builder.create()
+ l = te.var('l')
+ m = te.var('m')
+ n = te.var('n')
with ib.for_range(0, l, "i") as i:
with ib.for_range(0, m, "j") as j:
ib.emit(tvm.tir.Evaluate(n))
stmt = ib.get()
- new_stmt = tvm.ir_pass.HoistIfThenElse(stmt)
+ new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt)
expected_struct = {('For', 'k'): (None,), ('For', 'j'): (('For', 'k'),),
('IfThenElse', ('i',)): (('For', 'j'), ('For', 'j')),
('For', 'i'): (('IfThenElse', ('i',)),)}
verify_structure(new_stmt, expected_struct)
def test_no_else():
- ib = tvm.ir_builder.create()
- l = tvm.var('l')
- m = tvm.var('m')
- n = tvm.var('n')
+ ib = tvm.tir.ir_builder.create()
+ l = te.var('l')
+ m = te.var('m')
+ n = te.var('n')
with ib.for_range(0, l, "i") as i:
with ib.for_range(0, m, "j") as j:
ib.emit(tvm.tir.Evaluate(m))
stmt = ib.get()
- new_stmt = tvm.ir_pass.HoistIfThenElse(stmt)
+ new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt)
expected_struct = {('For', 'k'): (None,), ('For', 'j'): (('For', 'k'),),
('IfThenElse', ('i',)): (('For', 'j'), None),
('For', 'i'): (('IfThenElse', ('i',)),)}
verify_structure(new_stmt, expected_struct)
def test_attr_stmt():
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
dshape = (32, 64)
data = ib.pointer("float32", name="data")
- l = tvm.var('l')
- m = tvm.var('m')
- n = tvm.var('n')
+ l = te.var('l')
+ m = te.var('m')
+ n = te.var('n')
- tx = tvm.thread_axis("threadIdx.x")
- bx = tvm.thread_axis("blockIdx.x")
+ tx = te.thread_axis("threadIdx.x")
+ bx = te.thread_axis("blockIdx.x")
ib.scope_attr(tx, "thread_extent", dshape[0])
ib.scope_attr(bx, "thread_extent", dshape[1])
with ib.for_range(0, l, "i") as i:
with ib.for_range(0, m, "j") as j:
with ib.for_range(0, n, "k") as k:
- with ib.if_scope(tvm.any(i < 4, j >= 8)):
+ with ib.if_scope(tvm.tir.any(i < 4, j >= 8)):
data[bx * j + tx * j * k] = data[bx * j + tx * j * k] + 0.5
with ib.else_scope():
data[bx * j + tx * j * k] = data[bx * j + tx * j * k] + 1.0
stmt = ib.get()
- new_stmt = tvm.ir_pass.HoistIfThenElse(stmt)
+ new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt)
expected_struct = {('For', 'k'): (None,), ('IfThenElse', ('i', 'j')): (('For', 'k'), ('For', 'k')),
('For', 'j'): (('IfThenElse', ('i', 'j')),), ('For', 'i'): (('For', 'j'),),
('AttrStmt', 'thread_extent', 64): (('For', 'i'),),
verify_structure(new_stmt, expected_struct)
def test_nested_for():
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
data = ib.pointer("float32", name="data")
data[i * 3 + j] = data[i * 3 + j] + 0.5
with ib.for_range(0, 15, "k") as k:
with ib.for_range(0, 20, "l") as l:
- with ib.if_scope(tvm.any(i < 4, j >= 8)):
+ with ib.if_scope(tvm.tir.any(i < 4, j >= 8)):
data[i * 3 + j + k + l] = data[i * 3 + j + k + l] * 2
with ib.else_scope():
data[i * 3 + j + k + l] = data[i * 3 + j + k + l] * 1.5
stmt = ib.get()
- new_stmt = tvm.ir_pass.HoistIfThenElse(stmt)
+ new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt)
expected_struct = {('IfThenElse', ('i', 'j')): (None, None), ('For', 'l'): (('IfThenElse', ('i', 'j')),),
('For', 'k'): (('For', 'l'),), ('For', 'j'): (None,), ('IfThenElse', ('i',)): (('For', 'j'), None),
('For', 'i'): (('IfThenElse', ('i',)),)}
verify_structure(new_stmt, expected_struct)
def test_if_block():
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
data = ib.pointer("float32", name="data")
- n = tvm.var("n")
+ n = te.var("n")
with ib.for_range(0, 5, "i") as i:
data[i * 3 + j] = data[i * 3 + j] + 0.5
with ib.for_range(0, 15, "k") as k:
with ib.for_range(0, 20, "l") as l:
- with ib.if_scope(tvm.any(i < 4, j >= 8)):
+ with ib.if_scope(tvm.tir.any(i < 4, j >= 8)):
data[i * 3 + j + k + l] = data[i * 3 + j + k + l] * 2
with ib.else_scope():
data[i * 3 + j + k + l] = data[i * 3 + j + k + l] * 1.5
data[i * 3 + j + k] = data[i * 3 + j + k] + 0.6
stmt = ib.get()
- new_stmt = tvm.ir_pass.HoistIfThenElse(stmt)
+ new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt)
expected_struct = {('IfThenElse', ('i', 'j')): (None, None), ('IfThenElse', ('j',)): (None, None),
('For', 'l'): (None,), ('For', 'k'): (None,), ('For', 'j'): (('For', 'j'),),
('IfThenElse', ('i',)): (('For', 'j'), None), ('For', 'i'): (('IfThenElse', ('i',)),),
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_copy2d():
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((m, l), name='A')
- B = tvm.compute((m, l), lambda i, j: A[i, j], name='B')
- s = tvm.create_schedule(B.op)
+ m = te.var('m')
+ l = te.var('l')
+ A = te.placeholder((m, l), name='A')
+ B = te.compute((m, l), lambda i, j: A[i, j], name='B')
+ s = te.create_schedule(B.op)
s[B].pragma(B.op.axis[0], "memcpy")
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
- Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
+ Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
def cb(src, dst, pad_before, pad_after, pad_value):
assert dst.strides[0] == l
assert dst.strides[1].value == 1
assert src.strides[0] == l
assert tuple(src.shape) == (m, l)
return tvm.tir.Evaluate(0)
- stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
+ stmt = tvm.tir.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
def test_copy_pad():
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((m, l), name='A')
- B = tvm.compute((m + 2, l), lambda i, j:
- tvm.if_then_else(tvm.all(i >= 1, i < m + 1),
+ m = te.var('m')
+ l = te.var('l')
+ A = te.placeholder((m, l), name='A')
+ B = te.compute((m + 2, l), lambda i, j:
+ tvm.tir.if_then_else(tvm.tir.all(i >= 1, i < m + 1),
A[i - 1, j], 1.0), name='B')
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
s[B].pragma(B.op.axis[0], "memcpy")
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
- Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
+ Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
def cb(src, dst, pad_before, pad_after, pad_value):
- assert tvm.ir_pass.Simplify(src.elem_offset).value == 0
+ assert tvm.tir.ir_pass.Simplify(src.elem_offset).value == 0
assert pad_before[0].value == 1
assert pad_before[1].value == 0
assert pad_after[0].value == 1
assert pad_after[1].value == 0
assert pad_value.value == 1.0
return tvm.tir.Evaluate(0)
- stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
+ stmt = tvm.tir.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
def test_single_point_test():
- A = tvm.placeholder((1,), name='A')
- B = tvm.compute((1,), lambda i:
+ A = te.placeholder((1,), name='A')
+ B = te.compute((1,), lambda i:
A[i], name='B')
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
s[B].pragma(B.op.axis[0], "memcpy")
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
- Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
+ Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
def cb(src, dst, pad_before, pad_after, pad_value):
- assert tvm.ir_pass.Simplify(src.elem_offset).value == 0
- assert tvm.ir_pass.Simplify(dst.elem_offset).value == 0
- assert tvm.ir_pass.Simplify(src.strides[0]).value == 1
- assert tvm.ir_pass.Simplify(dst.strides[0]).value == 1
+ assert tvm.tir.ir_pass.Simplify(src.elem_offset).value == 0
+ assert tvm.tir.ir_pass.Simplify(dst.elem_offset).value == 0
+ assert tvm.tir.ir_pass.Simplify(src.strides[0]).value == 1
+ assert tvm.tir.ir_pass.Simplify(dst.strides[0]).value == 1
return tvm.tir.Evaluate(0)
- stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
+ stmt = tvm.tir.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
def assert_expr_equal(a, b):
- assert tvm.ir_pass.Simplify(a - b).value == 0
+ assert tvm.tir.ir_pass.Simplify(a - b).value == 0
def test_copy_pad_split():
m = 4 * 3
- A = tvm.placeholder((m, ), name="A")
- Apad = tvm.compute((m + 2,), lambda i:
- tvm.if_then_else(tvm.all(i >= 1, i <= m),
+ A = te.placeholder((m, ), name="A")
+ Apad = te.compute((m + 2,), lambda i:
+ tvm.tir.if_then_else(tvm.tir.all(i >= 1, i <= m),
A[i - 1], 0.0), "Apad")
- B = tvm.compute((m,), lambda i: Apad[i] + Apad[i + 1] + Apad[i + 2])
- s = tvm.create_schedule(B.op)
+ B = te.compute((m,), lambda i: Apad[i] + Apad[i + 1] + Apad[i + 2])
+ s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=4)
s[Apad].compute_at(s[B], xo)
s[Apad].pragma(s[Apad].op.axis[0], "memcpy")
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
- Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
- stmt = tvm.ir_pass.Simplify(stmt)
- stmt = tvm.ir_pass.CanonicalSimplify(stmt)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
+ Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
def cb(src, dst, pad_before, pad_after, pad_value):
assert(dst.elem_offset.value == 0)
- assert_expr_equal(src.elem_offset, tvm.max(xo * 4, 1) - 1)
+ assert_expr_equal(src.elem_offset, tvm.te.max(xo * 4, 1) - 1)
- rpad_before = tvm.max(1 - xo * 4, 0)
- rpad_after = tvm.max(xo * 4 - 7, 0)
+ rpad_before = tvm.te.max(1 - xo * 4, 0)
+ rpad_after = tvm.te.max(xo * 4 - 7, 0)
assert_expr_equal(pad_before[0], rpad_before)
assert_expr_equal(pad_after[0], rpad_after)
assert_expr_equal(src.shape[0], 6 - rpad_before - rpad_after)
return tvm.tir.Evaluate(0)
- stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
+ stmt = tvm.tir.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
if __name__ == "__main__":
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_double_buffer():
dtype = 'int64'
n = 100
m = 4
- tx = tvm.thread_axis("threadIdx.x")
- ib = tvm.ir_builder.create()
+ tx = te.thread_axis("threadIdx.x")
+ ib = tvm.tir.ir_builder.create()
A = ib.pointer("float32", name="A")
C = ib.pointer("float32", name="C")
ib.scope_attr(tx, "thread_extent", 1)
C[j] = B[j] + 1
stmt = ib.get()
- stmt = tvm.ir_pass.InjectDoubleBuffer(stmt, 2)
- stmt = tvm.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.InjectDoubleBuffer(stmt, 2)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert isinstance(stmt.body.body, tvm.tir.Allocate)
assert stmt.body.body.extents[0].value == 2
- f = tvm.ir_pass.MakeAPI(stmt, "db", [A.asobject(), C.asobject()], 2, True)
- f = tvm.ir_pass.ThreadSync(f, "shared")
+ f = tvm.tir.ir_pass.MakeAPI(stmt, "db", [A.asobject(), C.asobject()], 2, True)
+ f = tvm.tir.ir_pass.ThreadSync(f, "shared")
count = [0]
def count_sync(op):
if isinstance(op, tvm.tir.Call) and op.name == "tvm_storage_sync":
count[0] += 1
- tvm.ir_pass.PostOrderVisit(f.body, count_sync)
+ tvm.tir.ir_pass.PostOrderVisit(f.body, count_sync)
assert count[0] == 4
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_vthread():
dtype = 'int64'
m = 4
nthread = 2
def get_vthread(name):
- tx = tvm.thread_axis(name)
- ty = tvm.thread_axis(name)
- ib = tvm.ir_builder.create()
+ tx = te.thread_axis(name)
+ ty = te.thread_axis(name)
+ ib = tvm.tir.ir_builder.create()
A = ib.pointer("float32", name="A")
C = ib.pointer("float32", name="C")
with ib.for_range(0, n) as i:
ib.scope_attr(ty, "virtual_thread", nthread)
B = ib.allocate("float32", m, name="B", scope="shared")
B[i] = A[i * nthread + tx]
- bbuffer = tvm.decl_buffer((m,), dtype=B.dtype, data=B.asobject())
- ib.emit(tvm.call_extern("int32", "Run",
+ bbuffer = tvm.tir.decl_buffer((m,), dtype=B.dtype, data=B.asobject())
+ ib.emit(tvm.tir.call_extern("int32", "Run",
bbuffer.access_ptr("r"),
- tvm.call_pure_intrin("int32", "tvm_context_id")))
+ tvm.tir.call_pure_intrin("int32", "tvm_context_id")))
C[i * nthread + tx] = B[i] + 1
return ib.get()
- stmt = tvm.ir_pass.InjectVirtualThread(get_vthread("vthread"))
+ stmt = tvm.tir.ir_pass.InjectVirtualThread(get_vthread("vthread"))
assert stmt.body.body.extents[0].value == 2
- stmt = tvm.ir_pass.InjectVirtualThread(get_vthread("cthread"))
+ stmt = tvm.tir.ir_pass.InjectVirtualThread(get_vthread("cthread"))
assert len(stmt.body.body.extents) == 3
m = 4
nthread = 2
def get_vthread(name):
- tx = tvm.thread_axis(name)
- ty = tvm.thread_axis(name)
- ib = tvm.ir_builder.create()
+ tx = te.thread_axis(name)
+ ty = te.thread_axis(name)
+ ib = tvm.tir.ir_builder.create()
with ib.for_range(0, n) as i:
ib.scope_attr(tx, "virtual_thread", nthread)
ib.scope_attr(ty, "virtual_thread", nthread)
A = ib.allocate("float32", m, name="A", scope="shared")
B = ib.allocate("float32", m, name="B", scope="shared")
C = ib.allocate("float32", m, name="C", scope="shared")
- cbuffer = tvm.decl_buffer((m,), dtype=C.dtype, data=C.asobject())
- abuffer = tvm.decl_buffer((m,), dtype=A.dtype, data=A.asobject())
- bbuffer = tvm.decl_buffer((m,), dtype=B.dtype, data=B.asobject())
+ cbuffer = tvm.tir.decl_buffer((m,), dtype=C.dtype, data=C.asobject())
+ abuffer = tvm.tir.decl_buffer((m,), dtype=A.dtype, data=A.asobject())
+ bbuffer = tvm.tir.decl_buffer((m,), dtype=B.dtype, data=B.asobject())
A[tx] = tx + 1.0
B[ty] = ty + 1.0
- ib.emit(tvm.call_extern("int32", "Run",
+ ib.emit(tvm.tir.call_extern("int32", "Run",
abuffer.access_ptr("r"),
bbuffer.access_ptr("r"),
cbuffer.access_ptr("rw")))
return ib.get()
- stmt = tvm.ir_pass.InjectVirtualThread(get_vthread("vthread"))
+ stmt = tvm.tir.ir_pass.InjectVirtualThread(get_vthread("vthread"))
assert stmt.body.body.extents[0].value == 2
assert stmt.body.body.body.body.body.body.extents[0].value == 2
assert len(stmt.body.body.body.body.body.body.extents) == 3
def test_vthread_if_then_else():
nthread = 2
- tx = tvm.thread_axis("vthread")
- ib = tvm.ir_builder.create()
+ tx = te.thread_axis("vthread")
+ ib = tvm.tir.ir_builder.create()
A = ib.pointer("float32", name="A")
with ib.for_range(0, 100) as i:
ib.scope_attr(tx, "virtual_thread", nthread)
with ib.if_scope(i == 0):
B[i] = A[i * nthread + tx] + 2
stmt = ib.get()
- stmt = tvm.ir_pass.InjectVirtualThread(stmt)
+ stmt = tvm.tir.ir_pass.InjectVirtualThread(stmt)
assert stmt.body.body.body[0].else_case != None
assert stmt.body.body.body[1].else_case == None
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_inline():
- m = tvm.size_var('m')
- A = tvm.placeholder((m,), name='A')
- T = tvm.compute((m,), lambda i,: A[i] + 10, name='T')
+ m = te.size_var('m')
+ A = te.placeholder((m,), name='A')
+ T = te.compute((m,), lambda i,: A[i] + 10, name='T')
stmt = tvm.tir.Evaluate(T[10] + 11 * T[100])
- stmt = tvm.ir_pass.Inline(
+ stmt = tvm.tir.ir_pass.Inline(
stmt, T.op, [x.var for x in T.op.axis], T.op.body[0])
print(stmt)
- assert(tvm.ir_pass.VerifySSA(stmt))
+ assert(tvm.tir.ir_pass.VerifySSA(stmt))
try:
# pass in int array(wrong argument type)
# must raise an error
- stmt = tvm.ir_pass.Inline(
+ stmt = tvm.tir.ir_pass.Inline(
T.op, [1,2,3], T.op.body, stmt)
assert False
except tvm.error.TVMError:
pass
def test_inline2():
- m = tvm.size_var('m')
- A = tvm.placeholder((m,), name='A')
- T = tvm.compute((m,), lambda i,: A[i] + 10, name='T')
- stmt = tvm.tir.Evaluate(tvm.exp(T[10]) + 11 * T[100])
- stmt = tvm.ir_pass.Inline(
+ m = te.size_var('m')
+ A = te.placeholder((m,), name='A')
+ T = te.compute((m,), lambda i,: A[i] + 10, name='T')
+ stmt = tvm.tir.Evaluate(te.exp(T[10]) + 11 * T[100])
+ stmt = tvm.tir.ir_pass.Inline(
stmt, T.op, [x.var for x in T.op.axis], T.op.body[0])
def check(op):
if isinstance(op, tvm.tir.Call):
assert op.func != T.op
- tvm.ir_pass.PostOrderVisit(stmt, check)
+ tvm.tir.ir_pass.PostOrderVisit(stmt, check)
if __name__ == "__main__":
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_ir_transform():
- ib = tvm.ir_builder.create()
- n = tvm.var("n")
+ ib = tvm.tir.ir_builder.create()
+ n = te.var("n")
with ib.for_range(0, n, name="i") as i:
with ib.for_range(0, 10, name="j") as j:
- x = tvm.call_extern("int32", "TestA", i * 3 + j * 1)
- ib.emit(tvm.call_extern("int32", "TestB", x))
- ib.emit(tvm.call_extern("int32", "TestC", x))
+ x = tvm.tir.call_extern("int32", "TestA", i * 3 + j * 1)
+ ib.emit(tvm.tir.call_extern("int32", "TestB", x))
+ ib.emit(tvm.tir.call_extern("int32", "TestC", x))
body = ib.get()
def preorder(op):
if op.name == "TestC":
- return tvm.const(0, "int32")
+ return tvm.tir.const(0, "int32")
return None
def postorder(op):
assert isinstance(op, tvm.tir.Call)
if op.name == "TestA":
- return tvm.call_extern("int32", "TestB", op.args[0] + 1)
+ return tvm.tir.call_extern("int32", "TestB", op.args[0] + 1)
return op
- body = tvm.ir_pass.IRTransform(body, preorder, postorder, ["Call"])
+ body = tvm.tir.ir_pass.IRTransform(body, preorder, postorder, ["Call"])
stmt_list = tvm.tir.stmt_list(body.body.body)
assert stmt_list[0].value.args[0].name == "TestB"
assert stmt_list[1].value.value == 0
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_coproc_lift():
- ib = tvm.ir_builder.create()
- n = tvm.var("n")
- cp = tvm.thread_axis((0, 1), "cop")
+ ib = tvm.tir.ir_builder.create()
+ n = te.var("n")
+ cp = te.thread_axis((0, 1), "cop")
value = tvm.tir.StringImm("xxx")
A = ib.allocate("float32", n, name="A", scope="global")
A[j] = A[j] + 3
A[j] = A[j] + 3
body = ib.get()
- body = tvm.ir_pass.LiftAttrScope(body, "coproc_uop_scope")
+ body = tvm.tir.ir_pass.LiftAttrScope(body, "coproc_uop_scope")
assert body.body.body.node == cp
# only able to lift to the common pattern of the last two fors.
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
A = ib.allocate("float32", n, name="A", scope="global")
with ib.for_range(0, n, name="i") as i:
with ib.for_range(0, 10, name="j") as j:
A[i] = A[i] + 2
body = ib.get()
- body = tvm.ir_pass.LiftAttrScope(body, "coproc_uop_scope")
+ body = tvm.tir.ir_pass.LiftAttrScope(body, "coproc_uop_scope")
assert body.body.body.body[1].node == cp
assert len(body.body.body.body) == 2
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy
def collect_visit(stmt, f):
ret = []
- tvm.ir_pass.PostOrderVisit(stmt, lambda x : ret.append(f(x)))
+ tvm.tir.ir_pass.PostOrderVisit(stmt, lambda x : ret.append(f(x)))
return ret
def find_top_produce(stmt):
if isinstance(x, tvm.tir.ProducerConsumer):
ret.append(x)
ret = []
- tvm.ir_pass.PostOrderVisit(stmt, lambda x : f(x, ret))
+ tvm.tir.ir_pass.PostOrderVisit(stmt, lambda x : f(x, ret))
return ret[-1]
def lower(sch, args):
binds = {}
arg_list = []
for x in args:
- if isinstance(x, tvm.tensor.Tensor):
- buf = tvm.decl_buffer(x.shape, dtype=x.dtype, name=x.name)
+ if isinstance(x, te.tensor.Tensor):
+ buf = tvm.tir.decl_buffer(x.shape, dtype=x.dtype, name=x.name)
assert x not in binds
binds[x] = buf
arg_list.append(buf)
else:
raise ValueError("args must be Tensor, Buffer or Var")
sch = sch.normalize()
- bounds = tvm.schedule.InferBound(sch)
- stmt = tvm.schedule.ScheduleOps(sch, bounds)
- stmt = tvm.ir_pass.LoopPartition(stmt, False)
- stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 64)
- stmt = tvm.ir_pass.CanonicalSimplify(stmt)
- stmt = tvm.ir_pass.VectorizeLoop(stmt)
- stmt = tvm.ir_pass.Simplify(stmt)
+ bounds = tvm.te.schedule.InferBound(sch)
+ stmt = tvm.te.schedule.ScheduleOps(sch, bounds)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, binds, 64)
+ stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+ stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
return stmt
def test_basic():
- n = tvm.size_var('n')
- A = tvm.placeholder((n, ), name='A')
- B = tvm.placeholder((n, ), name='B')
+ n = te.size_var('n')
+ A = te.placeholder((n, ), name='A')
+ B = te.placeholder((n, ), name='B')
- T = tvm.compute((n, ), lambda i: A[i]+B[i])
- s = tvm.create_schedule(T.op)
+ T = te.compute((n, ), lambda i: A[i]+B[i])
+ s = te.create_schedule(T.op)
xo, xi = s[T].split(T.op.axis[0], factor=4)
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- stmt = tvm.ir_pass.LoopPartition(stmt, False)
- stmt = tvm.ir_pass.Simplify(stmt)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert('if' not in str(stmt.body.body.body[0]))
assert('if' in str(stmt.body.body.body[1]))
def test_const_loop():
n = 21
- A = tvm.placeholder((n, ), name='A')
- B = tvm.placeholder((n, ), name='B')
+ A = te.placeholder((n, ), name='A')
+ B = te.placeholder((n, ), name='B')
- T = tvm.compute((n, ), lambda i: A[i]+B[i])
- s = tvm.create_schedule(T.op)
+ T = te.compute((n, ), lambda i: A[i]+B[i])
+ s = te.create_schedule(T.op)
xo, xi = s[T].split(T.op.axis[0], factor=4)
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- stmt = tvm.ir_pass.LoopPartition(stmt, True)
- stmt = tvm.ir_pass.Simplify(stmt)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert('if' not in str(stmt.body.body.body[0]))
def test_multi_loop():
- ib = tvm.ir_builder.create()
- m = tvm.size_var('m')
- n = tvm.size_var('n')
+ ib = tvm.tir.ir_builder.create()
+ m = te.size_var('m')
+ n = te.size_var('n')
with ib.for_range(0, 4, "i") as i:
with ib.for_range(0, n, "j") as j:
with ib.for_range(0, m, "k") as k:
with ib.else_scope():
ib.emit(tvm.tir.Evaluate(n))
stmt = ib.get()
- stmt = tvm.ir_pass.LoopPartition(stmt, False)
- stmt = tvm.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert(not any(collect_visit(stmt.body[0], lambda x: isinstance(x, tvm.tir.IfThenElse))))
def test_multi_if():
- ib = tvm.ir_builder.create()
- m = tvm.size_var('m')
- n = tvm.size_var('n')
+ ib = tvm.tir.ir_builder.create()
+ m = te.size_var('m')
+ n = te.size_var('n')
with ib.for_range(0, 4, 'i') as i:
with ib.for_range(0, n, 'j') as j:
with ib.for_range(0, m, 'k') as k:
with ib.else_scope():
ib.emit(tvm.tir.Evaluate(n))
stmt = ib.get()
- stmt = tvm.ir_pass.LoopPartition(stmt, False)
- stmt = tvm.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert('if' not in str(stmt.body[0]))
def test_thread_axis():
- m = tvm.size_var('m')
- l = tvm.size_var('l')
- A = tvm.placeholder((m, l), name='A')
- B = tvm.compute((m, l), lambda i, j: A[i, j] + 3, name='B')
- s = tvm.create_schedule(B.op)
+ m = te.size_var('m')
+ l = te.size_var('l')
+ A = te.placeholder((m, l), name='A')
+ B = te.compute((m, l), lambda i, j: A[i, j] + 3, name='B')
+ s = te.create_schedule(B.op)
s[B].set_scope("shared")
num_thread = 16
xo, xi = s[B].split(B.op.axis[0], 32)
xi0, xi1 = s[B].split(xi, nparts=num_thread)
- s[B].bind(xi0, tvm.thread_axis("threadIdx.x"))
+ s[B].bind(xi0, te.thread_axis("threadIdx.x"))
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- stmt = tvm.ir_pass.LoopPartition(stmt, False)
- stmt = tvm.ir_pass.Simplify(stmt)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert('if' not in str(stmt.body.body.body[0]))
def test_vectorize():
- n = tvm.size_var('n')
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- bias = tvm.size_var("bias", dtype="float32")
- scale = tvm.size_var("scale", dtype="float32")
- C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i) * scale + bias, name='C')
+ n = te.size_var('n')
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ bias = te.size_var("bias", dtype="float32")
+ scale = te.size_var("scale", dtype="float32")
+ C = te.compute(A.shape, lambda *i: A(*i) + B(*i) * scale + bias, name='C')
# schedule
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
# create iter var and assign them tags.
num_thread = 32
bx, x = s[C].split(C.op.axis[0], factor=num_thread*4)
tx, x = s[C].split(x, nparts=num_thread)
_, x = s[C].split(x, factor=4)
- s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[C].bind(bx, te.thread_axis("blockIdx.x"))
+ s[C].bind(tx, te.thread_axis("threadIdx.x"))
s[C].vectorize(x)
stmt = lower(s, [A, B])
body = stmt.body.body.body.body.body
assert(any(collect_visit(body.then_case, lambda x: isinstance(x, tvm.tir.Ramp))))
def test_condition():
- ib = tvm.ir_builder.create()
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- with ib.for_range(0, tvm.truncdiv(n+3,4), 'i') as i:
+ ib = tvm.tir.ir_builder.create()
+ m = te.size_var('m')
+ n = te.size_var('n')
+ with ib.for_range(0, tvm.tir.truncdiv(n+3,4), 'i') as i:
with ib.for_range(0, 4, 'j') as j:
ib.emit(tvm.tir.Evaluate(
tvm.tir.Select(ib.likely(i*4+j<n), m, n)))
stmt = ib.get()
- stmt = tvm.ir_pass.LoopPartition(stmt, False)
- stmt = tvm.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert(not any(collect_visit(stmt[0], lambda x: isinstance(x, tvm.tir.Select))))
def test_condition_EQ():
- ib = tvm.ir_builder.create()
- m = tvm.size_var('m')
- n = tvm.size_var('n')
+ ib = tvm.tir.ir_builder.create()
+ m = te.size_var('m')
+ n = te.size_var('n')
with ib.for_range(0, 10, 'i') as i:
ib.emit(tvm.tir.Evaluate(
tvm.tir.Select(ib.likely(tvm.tir.EQ(i, 5)), m, n)))
stmt = ib.get()
- stmt = tvm.ir_pass.LoopPartition(stmt, True)
- stmt = tvm.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert(not any(collect_visit(stmt[0], lambda x: isinstance(x, tvm.tir.Select))))
def test_thread_axis2():
- n = tvm.convert(4096)
- m = tvm.size_var('m')
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda i: A[i] + B[i], name='C')
- s = tvm.create_schedule(C.op)
+ n = tvm.runtime.convert(4096)
+ m = te.size_var('m')
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda i: A[i] + B[i], name='C')
+ s = te.create_schedule(C.op)
num_thread = 32
bx, x = s[C].split(C.op.axis[0], factor=32)
tx, x = s[C].split(x, nparts=num_thread)
_, x = s[C].split(x, factor=m)
- s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[C].bind(bx, te.thread_axis("blockIdx.x"))
+ s[C].bind(tx, te.thread_axis("threadIdx.x"))
stmt = lower(s, [A, B])
for_body = stmt.body.body.body.body.body[0]
assert('threadIdx' not in str(for_body.extent))
def test_everything_during_deduction():
- m = tvm.size_var('m')
- n = tvm.size_var('n')
- ib = tvm.ir_builder.create()
+ m = te.size_var('m')
+ n = te.size_var('n')
+ ib = tvm.tir.ir_builder.create()
with ib.for_range(0, n, 'i') as i:
with ib.for_range(0, 32, 'j') as j:
- with ib.if_scope(ib.likely(tvm.truncdiv(i,j) < m)):
+ with ib.if_scope(ib.likely(tvm.tir.truncdiv(i,j) < m)):
# this guard will produce everything during deduction
ib.emit(tvm.tir.Evaluate(m))
stmt = ib.get()
- stmt = tvm.ir_pass.LoopPartition(stmt, False)
- stmt = tvm.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert(isinstance(stmt.body.body, tvm.tir.IfThenElse))
def test_single_likely():
n = 60
- A = tvm.placeholder((n, ), name='A')
- B = tvm.placeholder((n, ), name='B')
+ A = te.placeholder((n, ), name='A')
+ B = te.placeholder((n, ), name='B')
- T = tvm.compute((n, ), lambda i: A[i]+B[i])
- s = tvm.create_schedule(T.op)
+ T = te.compute((n, ), lambda i: A[i]+B[i])
+ s = te.create_schedule(T.op)
x = T.op.axis[0]
xo, xi = s[T].split(x, factor=16)
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- stmt = tvm.ir_pass.LoopPartition(stmt, True)
- stmt = tvm.ir_pass.Simplify(stmt)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse))))
def test_multi_likely():
n = 94
m = 62
- A = tvm.placeholder((n, m), name='A')
- B = tvm.placeholder((n, m), name='B')
+ A = te.placeholder((n, m), name='A')
+ B = te.placeholder((n, m), name='B')
- T = tvm.compute((n, m), lambda i, j: A[i, j]+B[i, j])
- s = tvm.create_schedule(T.op)
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ T = te.compute((n, m), lambda i, j: A[i, j]+B[i, j])
+ s = te.create_schedule(T.op)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
x, y = T.op.axis
xo, xi = s[T].split(x, factor=16)
yo, yi = s[T].split(y, factor=16)
s[T].reorder(xo, yo, xi, yi)
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- stmt = tvm.ir_pass.LoopPartition(stmt, True)
- stmt = tvm.ir_pass.Simplify(stmt)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse))))
def test_oneD_pool():
- m = tvm.size_var('m')
- ib = tvm.ir_builder.create()
- #data = tvm.placeholder((16,), name = 'data')
+ m = te.size_var('m')
+ ib = tvm.tir.ir_builder.create()
+ #data = te.placeholder((16,), name = 'data')
data = ib.pointer("float32", name="A")
out = ib.pointer("float32", name="A")
with ib.for_range(0, 16, 'ow') as ow:
with ib.for_range(0, 3, 'kw') as kw:
with ib.if_scope(ib.likely(ow > 0)):
with ib.if_scope(ib.likely(ow < 15)):
- out[ow] = tvm.max(out[ow], data[ow + kw - 1])
+ out[ow] = tvm.te.max(out[ow], data[ow + kw - 1])
with ib.for_range(0, 16, 'ow') as ow:
with ib.for_range(0, 3, 'kw') as kw:
with ib.if_scope(ib.likely(ow < 1)):
with ib.if_scope(ib.likely(kw > 0)):
- out[ow] = tvm.max(out[ow], data[ow + kw - 1])
+ out[ow] = tvm.te.max(out[ow], data[ow + kw - 1])
with ib.for_range(0, 16, 'ow') as ow:
with ib.for_range(0, 3, 'kw') as kw:
with ib.if_scope(ib.likely(ow > 14)):
with ib.if_scope(ib.likely(kw < 2)):
- out[ow] = tvm.max(out[ow], data[ow + kw - 1])
+ out[ow] = tvm.te.max(out[ow], data[ow + kw - 1])
stmt = ib.get()
- stmt = tvm.ir_pass.LoopPartition(stmt, True)
- stmt = tvm.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse))))
def test_cce_loop_1():
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
dtype = 'float16'
n = 514
m = 514
- _A = tvm.placeholder((n*m,), name = 'A')
- Ab = tvm.decl_buffer((n*m,), dtype, name="A")
+ _A = te.placeholder((n*m,), name = 'A')
+ Ab = tvm.tir.decl_buffer((n*m,), dtype, name="A")
A = ib.buffer_ptr(Ab)
- _B = tvm.placeholder((n*m,), name = 'B')
- Bb = tvm.decl_buffer((n*m,), dtype, name="B")
+ _B = te.placeholder((n*m,), name = 'B')
+ Bb = tvm.tir.decl_buffer((n*m,), dtype, name="B")
B = ib.buffer_ptr(Bb)
#for i in 0 to n-1:
with ib.for_range(0, 11, name="i") as i:
with ib.if_scope(ib.likely(((i*160) + j) < 1600)):
A[(i+1)*m+j+1] = B[(i)*m+j+1] + B[(i+1)*m+j+1] + B[(i+2)*m+j+1]
stmt = ib.get()
- stmt = tvm.ir_pass.LoopPartition(stmt, True)
- stmt = tvm.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse))))
def test_cce_loop_2():
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
len = 112
tile = 32
loop = (len + tile - 1) // tile
head = i * tile
with ib.if_scope(ib.likely(head + tile > len)):
tail = len
- ib.emit(tvm.call_extern('float32', "cce_intrisic", head, tail))
+ ib.emit(tvm.tir.call_extern('float32', "cce_intrisic", head, tail))
with ib.else_scope():
tail = head + tile
- ib.emit(tvm.call_extern('float32', "cce_intrisic", head, tail))
+ ib.emit(tvm.tir.call_extern('float32', "cce_intrisic", head, tail))
stmt = ib.get()
- stmt = tvm.ir_pass.LoopPartition(stmt, True)
- stmt = tvm.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse))))
def test_cce_loop_3():
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
loop1 = 4
loop2 = 9998
tile = 39991
head1 = i
head2 = j
with ib.if_scope(ib.likely(head1*loop1 + head2 < tile)):
- ib.emit(tvm.call_extern('float16',"cce_intrisic",head1))
+ ib.emit(tvm.tir.call_extern('float16',"cce_intrisic",head1))
stmt = ib.get()
- stmt = tvm.ir_pass.LoopPartition(stmt,True)
- stmt = tvm.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt,True)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse))))
def test_conv_tiling():
batch_size = 1
in_height = in_width = 64
out_height = out_width = in_height - kernel_height + 1
- data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data')
- kernel = tvm.placeholder((kernel_height, kernel_width, in_channel,
+ data = te.placeholder((batch_size, in_channel, in_height, in_width), name='data')
+ kernel = te.placeholder((kernel_height, kernel_width, in_channel,
out_channel), name='kernel')
- ic = tvm.reduce_axis((0, in_channel), name='ic')
- kh = tvm.reduce_axis((0, kernel_height), name='kh')
- kw = tvm.reduce_axis((0, kernel_width), name='kw')
- conv = tvm.compute((batch_size, out_channel, out_height, out_width),
- lambda n, oc, oh, ow: tvm.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] *
+ ic = te.reduce_axis((0, in_channel), name='ic')
+ kh = te.reduce_axis((0, kernel_height), name='kh')
+ kw = te.reduce_axis((0, kernel_width), name='kw')
+ conv = te.compute((batch_size, out_channel, out_height, out_width),
+ lambda n, oc, oh, ow: te.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] *
kernel[kh, kw, ic, oc],
axis=[ic, kh, kw]),
name="conv2d")
- s = tvm.create_schedule(conv.op)
+ s = te.create_schedule(conv.op)
n, oc, oh, ow = conv.op.axis
oho, owo, ohi, owi = s[conv].tile(oh, ow, 16, 16)
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- stmt = tvm.ir_pass.LoopPartition(stmt, True)
- stmt = tvm.ir_pass.Simplify(stmt)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, True)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse))))
def test_multilevel_splitting_with_indivisble_factors():
import topi
- A = tvm.placeholder((130,), dtype="float32")
+ A = te.placeholder((130,), dtype="float32")
B = topi.nn.relu(A)
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
(y,) = s[B].op.axis
(yo, yi) = s[B].split(y, factor=8)
(yoo, yoi) = s[B].split(yo, factor=16)
s[B].unroll(yi)
## But this does the right thing.
- with tvm.build_config(partition_const_loop=True):
+ with tvm.target.build_config(partition_const_loop=True):
lowered_body = tvm.lower(s, [A, B]).body
def visit_stmt(op):
return(isinstance(op, tvm.tir.Max))
def test_double_splitting_with_indivisible_factors():
m = 48
dtype="float32"
- A = tvm.placeholder((m,), name='A', dtype=dtype)
- C = tvm.compute((m,), lambda i: A[i], name='C')
- D = tvm.compute((m,), lambda i: C[i], name='D')
+ A = te.placeholder((m,), name='A', dtype=dtype)
+ C = te.compute((m,), lambda i: A[i], name='C')
+ D = te.compute((m,), lambda i: C[i], name='D')
- s = tvm.create_schedule(D.op)
+ s = te.create_schedule(D.op)
co, ci = s[C].split(C.op.axis[0], factor=10)
do, di = s[D].split(D.op.axis[0], 32)
s[C].compute_at(s[D], do)
target = 'llvm'
- with tvm.build_config(partition_const_loop=True):
+ with tvm.target.build_config(partition_const_loop=True):
f = tvm.lower(s, [A, C, D], name="fadd1", simple_mode=False)
func = tvm.build(f, target=target)
def test_simple_rfactor():
K = 16*4+4
- k = tvm.reduce_axis((0, K), 'k')
+ k = te.reduce_axis((0, K), 'k')
- A = tvm.placeholder((1, K), name='A')
+ A = te.placeholder((1, K), name='A')
- B = tvm.compute( (1,), lambda b:
- tvm.sum(A[b, k], axis=k),
+ B = te.compute( (1,), lambda b:
+ te.sum(A[b, k], axis=k),
name='B'
)
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
ko, _ = s[B].split(s[B].op.reduce_axis[0], 16)
BF = s.rfactor(B, ko, 0)
s.normalize()
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
- stmt1 = tvm.schedule.ScheduleOps(s, bounds)
- stmt1 = tvm.ir_pass.Simplify(stmt1)
+ stmt1 = tvm.te.schedule.ScheduleOps(s, bounds)
+ stmt1 = tvm.tir.ir_pass.Simplify(stmt1)
- stmt2 = tvm.ir_pass.LoopPartition(stmt1, True)
- stmt2 = tvm.ir_pass.Simplify(stmt2)
+ stmt2 = tvm.tir.ir_pass.LoopPartition(stmt1, True)
+ stmt2 = tvm.tir.ir_pass.Simplify(stmt2)
#make sure loop partition actually did something
- assert not tvm.ir_pass.Equal(stmt1.body, stmt2.body)
+ assert not tvm.tir.ir_pass.Equal(stmt1.body, stmt2.body)
if __name__ == "__main__":
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
def lower_intrin(stmt):
"""wrapper to call transformation in stmt"""
lower_expr = isinstance(stmt, tvm.tir.PrimExpr)
stmt = tvm.tir.Evaluate(stmt) if lower_expr else stmt
- stmt = tvm.ir_pass.CanonicalSimplify(stmt)
- stmt = tvm.ir_pass._LowerIntrinStmt(stmt, "llvm")
+ stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+ stmt = tvm.tir.ir_pass._LowerIntrinStmt(stmt, "llvm")
return stmt.value if lower_expr else stmt.body
def check_value(expr, vx, vy, data, fref):
n = len(data)
- A = tvm.placeholder((n,), name="A", dtype=expr.dtype)
- B = tvm.placeholder((n,), name="B", dtype=expr.dtype)
+ A = te.placeholder((n,), name="A", dtype=expr.dtype)
+ B = te.placeholder((n,), name="B", dtype=expr.dtype)
def make_binds(i):
x = expr
x = tvm.tir.Let(vy, B[i], x)
return x
- C = tvm.compute((n,), make_binds)
- s = tvm.create_schedule([C.op])
+ C = te.compute((n,), make_binds)
+ s = te.create_schedule([C.op])
if not tvm.runtime.enabled("llvm"):
return
def test_lower_floordiv():
data = get_ref_data()
for dtype in ["int32", "int64", "int16"]:
- x = tvm.var("x", dtype=dtype)
- y = tvm.var("y", dtype=dtype)
- zero = tvm.const(0, dtype)
+ x = te.var("x", dtype=dtype)
+ y = te.var("y", dtype=dtype)
+ zero = tvm.tir.const(0, dtype)
# no constraints
- res = lower_intrin(tvm.floordiv(x, y))
+ res = lower_intrin(tvm.te.floordiv(x, y))
check_value(res, x, y, data, lambda a, b: a // b)
# rhs >= 0
- res = lower_intrin(tvm.tir.Select(y >= 0, tvm.floordiv(x, y), zero))
+ res = lower_intrin(tvm.tir.Select(y >= 0, tvm.te.floordiv(x, y), zero))
check_value(res, x, y, data, lambda a, b: a // b if b > 0 else 0)
# involves max
- res = lower_intrin(tvm.tir.Select(y >= 0, tvm.max(tvm.floordiv(x, y), zero), zero))
+ res = lower_intrin(tvm.tir.Select(y >= 0, tvm.te.max(tvm.te.floordiv(x, y), zero), zero))
check_value(res, x, y, data, lambda a, b: max(a // b, 0) if b > 0 else 0)
# lhs >= 0
- res = lower_intrin(tvm.tir.Select(tvm.all(y >= 0, x >= 0), tvm.floordiv(x, y), zero))
+ res = lower_intrin(tvm.tir.Select(tvm.tir.all(y >= 0, x >= 0), tvm.te.floordiv(x, y), zero))
check_value(res, x, y, data, lambda a, b: a // b if b > 0 and a >= 0 else 0)
# const power of two
- res = lower_intrin(tvm.floordiv(x, tvm.const(8, dtype=dtype)))
+ res = lower_intrin(tvm.te.floordiv(x, tvm.tir.const(8, dtype=dtype)))
check_value(res, x, y, [(a, b) for a, b in data if b == 8], lambda a, b: a // b)
def test_lower_floormod():
data = get_ref_data()
for dtype in ["int32", "int64", "int16"]:
- x = tvm.var("x", dtype=dtype)
- y = tvm.var("y", dtype=dtype)
- zero = tvm.const(0, dtype)
+ x = te.var("x", dtype=dtype)
+ y = te.var("y", dtype=dtype)
+ zero = tvm.tir.const(0, dtype)
# no constraints
- res = lower_intrin(tvm.floormod(x, y))
+ res = lower_intrin(tvm.te.floormod(x, y))
check_value(res, x, y, data, lambda a, b: a % b)
# rhs >= 0
- res = lower_intrin(tvm.tir.Select(y >= 0, tvm.floormod(x, y), zero))
+ res = lower_intrin(tvm.tir.Select(y >= 0, tvm.te.floormod(x, y), zero))
check_value(res, x, y, data, lambda a, b: a % b if b > 0 else 0)
# lhs >= 0
- res = lower_intrin(tvm.tir.Select(tvm.all(y >= 0, x >= 0), tvm.floormod(x, y), zero))
+ res = lower_intrin(tvm.tir.Select(tvm.tir.all(y >= 0, x >= 0), tvm.te.floormod(x, y), zero))
check_value(res, x, y, data, lambda a, b: a % b if b > 0 and a >= 0 else 0)
# const power of two
- res = lower_intrin(tvm.floormod(x, tvm.const(8, dtype=dtype)))
+ res = lower_intrin(tvm.te.floormod(x, tvm.tir.const(8, dtype=dtype)))
check_value(res, x, y, [(a, b) for a, b in data if b == 8], lambda a, b: a % b)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_lower_warp_mem():
m = 128
- A = tvm.placeholder((m,), name='A')
- B = tvm.compute((m,), lambda i: A[i] + 3, name='B')
+ A = te.placeholder((m,), name='A')
+ B = te.compute((m,), lambda i: A[i] + 3, name='B')
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
AA = s.cache_read(A, "warp", [B])
xo, xi = s[B].split(B.op.axis[0], 32)
xi0, xi1 = s[B].split(xi, factor=16)
- tx = tvm.thread_axis("threadIdx.x")
+ tx = te.thread_axis("threadIdx.x")
s[B].bind(xi1, tx)
- s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
+ s[B].bind(xo, te.thread_axis("blockIdx.x"))
s[AA].compute_at(s[B], xo)
xo, xi = s[AA].split(s[AA].op.axis[0], 16)
s[AA].bind(xi, tx)
f = tvm.lower(s, [A, B])
- fhost, fdevice = tvm.ir_pass.SplitHostDevice(f)
- fdevice = tvm.ir_pass.LowerWarpMemory(fdevice, 16)
+ fhost, fdevice = tvm.tir.ir_pass.SplitHostDevice(f)
+ fdevice = tvm.tir.ir_pass.LowerWarpMemory(fdevice, 16)
assert(fdevice.body.body.value.value == "local")
assert(fdevice.body.body.body.extents[0].value == 2)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy
def test_makeapi():
"""Not yet working, mock design"""
- n = tvm.size_var('n')
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
- s = tvm.create_schedule(C.op)
+ n = te.size_var('n')
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
+ s = te.create_schedule(C.op)
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
- Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
- Cb = tvm.decl_buffer(C.shape, C.dtype, name='C')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, C:Cb}, 64)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
+ Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
+ Cb = tvm.tir.decl_buffer(C.shape, C.dtype, name='C')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, C:Cb}, 64)
num_unpacked_args = 2
- f = tvm.ir_pass.MakeAPI(
+ f = tvm.tir.ir_pass.MakeAPI(
stmt, "myadd", [n, Ab, Bb, Cb], num_unpacked_args, True)
assert(f.handle_data_type[Ab.data].dtype == Ab.dtype)
assert(len(f.args) == 7)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def nop():
return tvm.tir.Evaluate(0)
def test_remove_no_op():
- i = tvm.var('i')
- j = tvm.var('j')
- k = tvm.var('k')
- m = tvm.var('m')
- n = tvm.var('n')
+ i = te.var('i')
+ j = te.var('j')
+ k = te.var('k')
+ m = te.var('m')
+ n = te.var('n')
dtype = 'int64'
- Ab = tvm.decl_buffer((n, ), dtype)
+ Ab = tvm.tir.decl_buffer((n, ), dtype)
stmt = tvm.tir.For(
i, 0, 4, 0, 0,
tvm.tir.For(
k, 0, m, 0, 0,
tvm.tir.IfThenElse(
(i*m+j+k < n), tvm.tir.Evaluate(m), tvm.tir.Evaluate(n)))))
- ret = tvm.ir_pass.RemoveNoOp(stmt)
+ ret = tvm.tir.ir_pass.RemoveNoOp(stmt)
assert(isinstance(ret, tvm.tir.Evaluate))
store = tvm.tir.Store(Ab.data,
tvm.tir.Load(dtype, Ab.data, i) + 1,
i + 1)
stmt2 = tvm.tir.SeqStmt([nop(), tvm.tir.SeqStmt([store, nop()])])
- assert(tvm.ir_pass.RemoveNoOp(stmt2) == store)
+ assert(tvm.tir.ir_pass.RemoveNoOp(stmt2) == store)
# remove zero extent loop
stmt3 = tvm.tir.For(i, 0, 0, 0, 0, store)
- ret = tvm.ir_pass.RemoveNoOp(stmt3)
+ ret = tvm.tir.ir_pass.RemoveNoOp(stmt3)
assert(isinstance(ret, tvm.tir.Evaluate))
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import topi
import numpy as np
from tvm.contrib import nvcc
def tensor_core_matmul(warp_tile_m=16, m=64, n=32, l=96):
- A = tvm.placeholder((n, l), name='A', dtype='float16')
- B = tvm.placeholder((l, m), name='B', dtype='float16')
- k = tvm.reduce_axis((0, l), name='k')
- C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k].astype('float32') * B[k, j].astype('float32'), axis=k))
- s = tvm.create_schedule(C.op)
+ A = te.placeholder((n, l), name='A', dtype='float16')
+ B = te.placeholder((l, m), name='B', dtype='float16')
+ k = te.reduce_axis((0, l), name='k')
+ C = te.compute((n, m), lambda i, j: te.sum(A[i, k].astype('float32') * B[k, j].astype('float32'), axis=k))
+ s = te.create_schedule(C.op)
y, x = s[C].op.axis
k = s[C].op.reduce_axis[0]
kl, ki = s[CL].split(ki, tile_k)
s[C].reorder(yo, xo, tz, ty, tx, yi, xi)
- s[C].bind(yo, tvm.thread_axis("blockIdx.y"))
- s[C].bind(xo, tvm.thread_axis("blockIdx.x"))
- s[C].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[C].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
- s[C].bind(vy, tvm.thread_axis((0, vthread), "vthread", name="vy"))
+ s[C].bind(yo, te.thread_axis("blockIdx.y"))
+ s[C].bind(xo, te.thread_axis("blockIdx.x"))
+ s[C].bind(ty, te.thread_axis("threadIdx.y"))
+ s[C].bind(tz, te.thread_axis("threadIdx.z"))
+ s[C].bind(tx, te.thread_axis("threadIdx.x"))
+ s[C].bind(vy, te.thread_axis((0, vthread), "vthread", name="vy"))
s[CL].compute_at(s[C], tx)
yo, xo = CL.op.axis
s[CL].reorder(ko, kl, ki, yo, xo)
tx, vec = s[AA].split(tx, factor=v)
fused = s[AA].fuse(s[AA].op.axis[0], xo)
_, ty = s[AA].split(fused, factor=by)
- s[AA].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[AA].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[AA].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[AA].bind(ty, te.thread_axis("threadIdx.y"))
+ s[AA].bind(tz, te.thread_axis("threadIdx.z"))
+ s[AA].bind(tx, te.thread_axis("threadIdx.x"))
s[AA].vectorize(vec)
s[BB].compute_at(s[CL], ko)
tx, vec = s[BB].split(tx, factor=v)
fused = s[BB].fuse(s[BB].op.axis[0], xo)
_, ty = s[BB].split(fused, factor=by)
- s[BB].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[BB].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[BB].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[BB].bind(ty, te.thread_axis("threadIdx.y"))
+ s[BB].bind(tz, te.thread_axis("threadIdx.z"))
+ s[BB].bind(tx, te.thread_axis("threadIdx.x"))
s[BB].vectorize(vec)
s[AL].compute_at(s[CL], kl)
np.testing.assert_allclose(c_np, c.asnumpy(), rtol=1e-3)
def tensor_core_batch_matmul(warp_tile_m=16, m=64, n=32, l=96, batch=2):
- A = tvm.placeholder((batch, n, l), name='A', dtype='float16')
- B = tvm.placeholder((batch, l, m), name='B', dtype='float16')
- k = tvm.reduce_axis((0, l), name='k')
- C = tvm.compute((batch, n, m), lambda b, i, j: tvm.sum((A[b, i, k] * B[b, k, j]).astype('float32'), axis=k))
- s = tvm.create_schedule(C.op)
+ A = te.placeholder((batch, n, l), name='A', dtype='float16')
+ B = te.placeholder((batch, l, m), name='B', dtype='float16')
+ k = te.reduce_axis((0, l), name='k')
+ C = te.compute((batch, n, m), lambda b, i, j: te.sum((A[b, i, k] * B[b, k, j]).astype('float32'), axis=k))
+ s = te.create_schedule(C.op)
z, y, x = s[C].op.axis
k = s[C].op.reduce_axis[0]
kl, ki = s[CL].split(ki, tile_k)
s[C].reorder(z, yo, xo, tz, ty, tx, yi, xi)
- s[C].bind(z, tvm.thread_axis("blockIdx.z"))
- s[C].bind(yo, tvm.thread_axis("blockIdx.y"))
- s[C].bind(xo, tvm.thread_axis("blockIdx.x"))
- s[C].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[C].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
- s[C].bind(vy, tvm.thread_axis((0, vthread), "vthread", name="vy"))
+ s[C].bind(z, te.thread_axis("blockIdx.z"))
+ s[C].bind(yo, te.thread_axis("blockIdx.y"))
+ s[C].bind(xo, te.thread_axis("blockIdx.x"))
+ s[C].bind(ty, te.thread_axis("threadIdx.y"))
+ s[C].bind(tz, te.thread_axis("threadIdx.z"))
+ s[C].bind(tx, te.thread_axis("threadIdx.x"))
+ s[C].bind(vy, te.thread_axis((0, vthread), "vthread", name="vy"))
s[CL].compute_at(s[C], tx)
zo, yo, xo = CL.op.axis
s[CL].reorder(ko, kl, ki, zo, yo, xo)
tx, vec = s[AA].split(tx, factor=v)
fused = s[AA].fuse(s[AA].op.axis[1], xo)
_, ty = s[AA].split(fused, factor=by)
- s[AA].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[AA].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[AA].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[AA].bind(ty, te.thread_axis("threadIdx.y"))
+ s[AA].bind(tz, te.thread_axis("threadIdx.z"))
+ s[AA].bind(tx, te.thread_axis("threadIdx.x"))
s[AA].vectorize(vec)
s[BB].compute_at(s[CL], ko)
tx, vec = s[BB].split(tx, factor=v)
fused = s[BB].fuse(s[BB].op.axis[1], xo)
_, ty = s[BB].split(fused, factor=by)
- s[BB].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[BB].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[BB].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[BB].bind(ty, te.thread_axis("threadIdx.y"))
+ s[BB].bind(tz, te.thread_axis("threadIdx.z"))
+ s[BB].bind(tx, te.thread_axis("threadIdx.x"))
s[BB].vectorize(vec)
s[AL].compute_at(s[CL], kl)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_rewrite_Select():
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
A = ib.allocate("float32", 100, name="A", scope="global")
- i = tvm.var("i")
+ i = te.var("i")
y = tvm.tir.Select(i > 1, A[i-1], 1.0)
- yy = tvm.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(y)).value
+ yy = tvm.tir.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(y)).value
z = tvm.tir.Select(
tvm.tir.Select(i > 1, A[i-1], 1.0) > 0.0, A[i], 0.1)
- zz = tvm.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(z)).value
+ zz = tvm.tir.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(z)).value
- a = tvm.tir.Select(tvm.floordiv(i, 4) > 10, y, z)
- aa = tvm.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(a)).value
+ a = tvm.tir.Select(tvm.te.floordiv(i, 4) > 10, y, z)
+ aa = tvm.tir.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(a)).value
assert yy.name == "tvm_if_then_else"
assert zz.name == "tvm_if_then_else"
assert isinstance(aa, tvm.tir.Select)
# under the License.
import pytest
import tvm
+from tvm import te
@pytest.mark.xfail
def test_loop_dependent_allocate():
- N = tvm.size_var("N")
- A = tvm.placeholder((2*N,), "float32", "A")
- C = tvm.compute((N, ), lambda i: A[2*i] + A[i+1], name='C')
- s = tvm.create_schedule(C.op)
+ N = te.size_var("N")
+ A = te.placeholder((2*N,), "float32", "A")
+ C = te.compute((N, ), lambda i: A[2*i] + A[i+1], name='C')
+ s = te.create_schedule(C.op)
AA = s.cache_read(A, "local", [C])
s[AA].compute_at(s[C], s[C].op.axis[0])
# this line should fail due to IRUseDefAnalysis sees an allocate statement
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_flatten2():
- m = tvm.size_var('m')
- l = tvm.size_var('l')
- A = tvm.placeholder((m, l), name='A')
- A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
- A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
+ m = te.size_var('m')
+ l = te.size_var('l')
+ A = te.placeholder((m, l), name='A')
+ A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1')
+ A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
- s = tvm.create_schedule(A2.op)
+ s = te.create_schedule(A2.op)
xo, xi = s[A2].split(A2.op.axis[0], 8)
s[A1].compute_at(s[A2], xo)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
- A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
- stmt = tvm.ir_pass.Simplify(stmt)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
+ A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
def test_flatten_prefetch():
- A = tvm.placeholder((25, 100, 4), name = 'A')
- _A= tvm.decl_buffer(A.shape, A.dtype, name = 'A');
- i = tvm.size_var('i')
- j = tvm.size_var('j')
+ A = te.placeholder((25, 100, 4), name = 'A')
+ _A= tvm.tir.decl_buffer(A.shape, A.dtype, name = 'A');
+ i = te.size_var('i')
+ j = te.size_var('j')
region = [tvm.ir.Range.make_by_min_extent(i[0], i[1]) for i in [(i, 2), (j, 8), (0, 4)]]
stmt = tvm.tir.Prefetch(A.op, 0, A.dtype, region)
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: _A}, 64)
- stmt = tvm.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: _A}, 64)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert stmt.extent.value == 2
assert isinstance(stmt.body, tvm.tir.For)
assert stmt.body.extent.value == 2
def test_flatten_storage_align():
m = 8
l = 16
- A = tvm.placeholder((m, l), name='A')
- A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
- A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
+ A = te.placeholder((m, l), name='A')
+ A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1')
+ A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
- s = tvm.create_schedule(A2.op)
+ s = te.create_schedule(A2.op)
s[A1].storage_align(A1.op.axis[0], 2, 1)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
- A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
- stmt = tvm.ir_pass.Simplify(stmt)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
+ A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert(stmt.body.extents[0].value == 17 * 8)
def test_flatten_double_buffer():
dtype = 'int64'
n = 100
m = 4
- tx = tvm.thread_axis("threadIdx.x")
- ib = tvm.ir_builder.create()
+ tx = te.thread_axis("threadIdx.x")
+ ib = tvm.tir.ir_builder.create()
A = ib.pointer("float32", name="A")
C = ib.pointer("float32", name="C")
ib.scope_attr(tx, "thread_extent", 1)
C[j] = B[j] + 1
stmt = ib.get()
- stmt = tvm.ir_pass.StorageFlatten(stmt, {}, 64)
- stmt = tvm.ir_pass.InjectDoubleBuffer(stmt, 2)
- stmt = tvm.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {}, 64)
+ stmt = tvm.tir.ir_pass.InjectDoubleBuffer(stmt, 2)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
assert isinstance(stmt.body.body, tvm.tir.Allocate)
assert stmt.body.body.extents[0].value == 2
- f = tvm.ir_pass.MakeAPI(stmt, "db", [A.asobject(), C.asobject()], 2, True)
- f = tvm.ir_pass.ThreadSync(f, "shared")
+ f = tvm.tir.ir_pass.MakeAPI(stmt, "db", [A.asobject(), C.asobject()], 2, True)
+ f = tvm.tir.ir_pass.ThreadSync(f, "shared")
count = [0]
def count_sync(op):
if isinstance(op, tvm.tir.Call) and op.name == "tvm_storage_sync":
count[0] += 1
- tvm.ir_pass.PostOrderVisit(f.body, count_sync)
+ tvm.tir.ir_pass.PostOrderVisit(f.body, count_sync)
assert count[0] == 4
if __name__ == "__main__":
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_storage_share():
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((m, l), name='A')
+ m = te.var('m')
+ l = te.var('l')
+ A = te.placeholder((m, l), name='A')
num_stage = 5
B = A
for t in range(num_stage):
- B = tvm.compute((m, l), lambda i, j: B[i, j] + (t+1), name='A%d' % t)
+ B = te.compute((m, l), lambda i, j: B[i, j] + (t+1), name='A%d' % t)
- s = tvm.create_schedule(B.op)
- bounds = tvm.schedule.InferBound(s)
+ s = te.create_schedule(B.op)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
- Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
- stmt = tvm.ir_pass.CanonicalSimplify(stmt)
- stmt = tvm.ir_pass.Simplify(stmt)
- stmt = tvm.ir_pass.StorageRewrite(stmt)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
+ Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
+ stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.StorageRewrite(stmt)
# verify only have one allocations.
# verify inplace folding works
num_alloc = [0]
def verify(n):
if isinstance(n, tvm.tir.Allocate):
num_alloc[0] += 1
- tvm.ir_pass.PostOrderVisit(stmt, verify)
+ tvm.tir.ir_pass.PostOrderVisit(stmt, verify)
assert num_alloc[0] == 1
def register_mem(scope_tb, max_bits):
register_mem(scope_tb, max_bits)
- ib = tvm.ir_builder.create()
- n = tvm.var("n")
+ ib = tvm.tir.ir_builder.create()
+ n = te.var("n")
with ib.for_range(0, n, name="i") as i:
with ib.for_range(0, 10, name="j") as j:
A = ib.allocate("float32", 200, name="A", scope=scope_tb)
A[j] = 1.3
body = ib.get()
- body = tvm.ir_pass.StorageRewrite(body)
+ body = tvm.tir.ir_pass.StorageRewrite(body)
num_alloc = [0]
def verify(n):
if isinstance(n, tvm.tir.Allocate):
num_alloc[0] += 1
assert n.extents[0].value == 200
- tvm.ir_pass.PostOrderVisit(body, verify)
+ tvm.tir.ir_pass.PostOrderVisit(body, verify)
assert num_alloc[0] == 1
def test_alloc_different_dtypes():
def stmt_generater(dtype_list, length):
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
base_dtype = dtype_list[0]
- global_a = tvm.placeholder((length,), name = "global_a", dtype = base_dtype)
+ global_a = te.placeholder((length,), name = "global_a", dtype = base_dtype)
assert len(dtype_list) == 4
with ib.for_range(0, length, name="j") as j:
dtype = dtype_list[0]
A = ib.allocate(dtype, length, name="A", scope="local.L0A")
- A[j] = tvm.const(1, dtype = dtype)
+ A[j] = tvm.tir.const(1, dtype = dtype)
with ib.for_range(0, length, name="j") as j:
dtype = dtype_list[1]
B = ib.allocate(dtype, length, name="B", scope="local.L0A")
- B[j] = tvm.const(1, dtype = dtype)
+ B[j] = tvm.tir.const(1, dtype = dtype)
with ib.for_range(0, length, name="j") as j:
dtype = dtype_list[2]
C = ib.allocate(dtype, length, name="C", scope="local.L0A")
- C[j] = tvm.const(1, dtype = dtype)
+ C[j] = tvm.tir.const(1, dtype = dtype)
with ib.for_range(0, length, name="j") as j:
dtype = dtype_list[3]
D = ib.allocate(dtype, length, name="D", scope="local.L0A")
- D[j] = tvm.const(1, dtype = dtype)
+ D[j] = tvm.tir.const(1, dtype = dtype)
with ib.for_range(0, length, name="j") as j:
dtype = "int8"
E = ib.allocate(dtype, length, name="E", scope="local.L0A")
body = stmt_generater(dtype_list, length)
offset = offset_generater(dtype_list, length)
- body = tvm.ir_pass.StorageRewrite(body)
- tvm.ir_pass.PostOrderVisit(body, verify)
+ body = tvm.tir.ir_pass.StorageRewrite(body)
+ tvm.tir.ir_pass.PostOrderVisit(body, verify)
length = 1024
dtype_list = ["float16", "int32", "uint16", "int8"]
def test_inplace_rule():
m = 10
- A = tvm.placeholder((m,), name='A')
- A0 = tvm.compute((m,), lambda i: A[i], name='A0')
- A1 = tvm.compute((m,), lambda i: A[i] + 1, name='A1')
- AA = tvm.compute((m,), lambda i: A0[i] + A1[i] + A1[0], name='AA')
- B = tvm.compute((m,), lambda i: AA[i] + 1, name='B')
- s = tvm.create_schedule(B.op)
- bounds = tvm.schedule.InferBound(s)
+ A = te.placeholder((m,), name='A')
+ A0 = te.compute((m,), lambda i: A[i], name='A0')
+ A1 = te.compute((m,), lambda i: A[i] + 1, name='A1')
+ AA = te.compute((m,), lambda i: A0[i] + A1[i] + A1[0], name='AA')
+ B = te.compute((m,), lambda i: AA[i] + 1, name='B')
+ s = te.create_schedule(B.op)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
- Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
- stmt = tvm.ir_pass.CanonicalSimplify(stmt)
- stmt = tvm.ir_pass.Simplify(stmt)
- stmt = tvm.ir_pass.StorageRewrite(stmt)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
+ Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
+ stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.StorageRewrite(stmt)
# verify only have one allocations.
# verify inplace folding works
num_alloc = [0]
def verify(n):
if isinstance(n, tvm.tir.Allocate):
num_alloc[0] += 1
- tvm.ir_pass.PostOrderVisit(stmt, verify)
+ tvm.tir.ir_pass.PostOrderVisit(stmt, verify)
assert num_alloc[0] == 2
def test_storage_combine():
n = 8
- A = tvm.placeholder((4,), name='A')
+ A = te.placeholder((4,), name='A')
num_stage = 5
B = A
stages = []
for t in range(num_stage):
- B = tvm.compute((n, ), lambda i: B[i] + B[0] + (t+1), name='A%d' % t)
+ B = te.compute((n, ), lambda i: B[i] + B[0] + (t+1), name='A%d' % t)
stages.append(B)
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
for S in stages[:-1]:
s[S].set_scope("global:tag")
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
- Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
- stmt = tvm.ir_pass.CanonicalSimplify(stmt)
- stmt = tvm.ir_pass.Simplify(stmt)
- stmt = tvm.ir_pass.StorageRewrite(stmt)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
+ Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
+ stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.StorageRewrite(stmt)
num_alloc = [0]
def verify(n):
if isinstance(n, tvm.tir.Allocate):
num_alloc[0] += 1
assert (n.extents[0].value == 16)
- tvm.ir_pass.PostOrderVisit(stmt, verify)
+ tvm.tir.ir_pass.PostOrderVisit(stmt, verify)
assert num_alloc[0] == 1
def test_storage_share_gpu():
- m = tvm.var('m')
- A = [tvm.placeholder((m), name='A')]
+ m = te.var('m')
+ A = [te.placeholder((m), name='A')]
num_stage = 5
for t in range(num_stage):
- A.append(tvm.compute((m,), lambda i: A[-1][i] + (t+1), name='A%d_s' % t))
- A.append(tvm.compute((m,), lambda i: A[-1][i], name='A%d' % t))
- s = tvm.create_schedule(A[-1].op)
+ A.append(te.compute((m,), lambda i: A[-1][i] + (t+1), name='A%d_s' % t))
+ A.append(te.compute((m,), lambda i: A[-1][i], name='A%d' % t))
+ s = te.create_schedule(A[-1].op)
for t in range(num_stage):
x = A[2*t+2].op.axis[0]
bx, tx = s[A[2*t+2]].split(x, factor=32)
- s[A[2*t+2]].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[A[2*t+2]].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[A[2*t+2]].bind(bx, te.thread_axis("blockIdx.x"))
+ s[A[2*t+2]].bind(tx, te.thread_axis("threadIdx.x"))
s[A[2*t+1]].compute_at(s[A[2*t+2]], tx)
s[A[2*t+1]].set_scope("shared")
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A[0].shape, A[0].dtype, name='A')
- Bb = tvm.decl_buffer(A[0].shape, A[0].dtype, name='B')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A[0]: Ab, A[-1]: Bb}, 64)
- stmt = tvm.ir_pass.CanonicalSimplify(stmt)
- stmt = tvm.ir_pass.Simplify(stmt)
- stmt = tvm.ir_pass.StorageRewrite(stmt)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ Ab = tvm.tir.decl_buffer(A[0].shape, A[0].dtype, name='A')
+ Bb = tvm.tir.decl_buffer(A[0].shape, A[0].dtype, name='B')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A[0]: Ab, A[-1]: Bb}, 64)
+ stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.StorageRewrite(stmt)
alloc_stats = {"global": 0, "shared": 0}
def verify(n):
if isinstance(n, tvm.tir.AttrStmt):
if n.attr_key == "storage_scope":
alloc_stats[n.value.value] += 1
- tvm.ir_pass.PostOrderVisit(stmt, verify)
+ tvm.tir.ir_pass.PostOrderVisit(stmt, verify)
assert alloc_stats["global"] == 2
assert alloc_stats["shared"] == num_stage
def test_parallel_alloc():
- ib = tvm.ir_builder.create()
- n = tvm.var("n")
+ ib = tvm.tir.ir_builder.create()
+ n = te.var("n")
with ib.for_range(0, n, name="i", for_type="parallel") as i:
with ib.for_range(0, 10, name="j") as j:
A = ib.allocate("float32", n, name="A", scope="global")
A[j] = A[j] + 2
body = ib.get()
- body = tvm.ir_pass.StorageRewrite(body)
+ body = tvm.tir.ir_pass.StorageRewrite(body)
assert (isinstance(body.body.body, tvm.tir.Allocate))
- ib = tvm.ir_builder.create()
- n = tvm.var("n")
+ ib = tvm.tir.ir_builder.create()
+ n = te.var("n")
with ib.for_range(0, n, name="t") as i:
ib.scope_attr(
- tvm.const(1, "int32") , "pragma_scope",
+ tvm.tir.const(1, "int32") , "pragma_scope",
tvm.tir.StringImm("parallel_launch_point"))
with ib.for_range(0, n, name="i", for_type="parallel") as i:
with ib.for_range(0, 10, name="j") as j:
A = ib.allocate("float32", n, name="A", scope="global")
A[j] = A[j] + 2
body = ib.get()
- body = tvm.ir_pass.StorageRewrite(body)
+ body = tvm.tir.ir_pass.StorageRewrite(body)
assert(isinstance(body.body.body.body.body, tvm.tir.Allocate))
#Test Buffer
register_mem(scope_tb, max_bits)
m = 10
- A = tvm.placeholder((m,), name='A')
- C = tvm.placeholder((m,), name='C')
- D = tvm.placeholder((m,), name='D')
- A0 = tvm.compute((m,), lambda i: A[i] + C[i], name='A0')
- A1 = tvm.compute((m,), lambda i: D[i] * D[i], name='A1')
- A2 = tvm.compute((m,), lambda i: A0[i] + A1[i], name='A2')
- B = tvm.compute((m,), lambda i: A2[i], name='B')
- s = tvm.create_schedule(B.op)
+ A = te.placeholder((m,), name='A')
+ C = te.placeholder((m,), name='C')
+ D = te.placeholder((m,), name='D')
+ A0 = te.compute((m,), lambda i: A[i] + C[i], name='A0')
+ A1 = te.compute((m,), lambda i: D[i] * D[i], name='A1')
+ A2 = te.compute((m,), lambda i: A0[i] + A1[i], name='A2')
+ B = te.compute((m,), lambda i: A2[i], name='B')
+ s = te.create_schedule(B.op)
A0L = s.cache_read(A0, scope_tb, [A2])
A1L = s.cache_read(A1, scope_tb, [A2])
A2L = s.cache_read(A2, scope_tb, [B])
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
- Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
- Cc = tvm.decl_buffer(C.shape, B.dtype, name='C')
- Dd = tvm.decl_buffer(D.shape, B.dtype, name='D')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb, C: Cc, D:Dd}, 64)
- stmt = tvm.ir_pass.CanonicalSimplify(stmt)
- stmt = tvm.ir_pass.Simplify(stmt)
- stmt = tvm.ir_pass.StorageRewrite(stmt)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
+ Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
+ Cc = tvm.tir.decl_buffer(C.shape, B.dtype, name='C')
+ Dd = tvm.tir.decl_buffer(D.shape, B.dtype, name='D')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb, C: Cc, D:Dd}, 64)
+ stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.StorageRewrite(stmt)
# verify only have one allocations.
# verify inplace folding works
num_alloc = [0]
def verify(n):
if isinstance(n, tvm.tir.Allocate):
num_alloc[0] += 1
- tvm.ir_pass.PostOrderVisit(stmt, verify)
+ tvm.tir.ir_pass.PostOrderVisit(stmt, verify)
assert num_alloc[0] == 2
def test_exceed_mem():
register_mem(scope_tb, max_bits)
m = 10
- B0 = tvm.placeholder((m,), name='B0')
- B1 = tvm.placeholder((m,), name='B1')
- B2 = tvm.placeholder((m,), name='B2')
- B3 = tvm.placeholder((m,), name='B3')
- B4 = tvm.placeholder((m,), name='B4')
- B5 = tvm.placeholder((m,), name='B5')
+ B0 = te.placeholder((m,), name='B0')
+ B1 = te.placeholder((m,), name='B1')
+ B2 = te.placeholder((m,), name='B2')
+ B3 = te.placeholder((m,), name='B3')
+ B4 = te.placeholder((m,), name='B4')
+ B5 = te.placeholder((m,), name='B5')
- B6 = tvm.compute((m,), lambda i: B1[i] * B5[i], name='B6')
- B7 = tvm.compute((m,), lambda i: B2[i] * B4[i], name='B7')
- B8 = tvm.compute((m,), lambda i: B6[i] - B7[i], name='B8')
+ B6 = te.compute((m,), lambda i: B1[i] * B5[i], name='B6')
+ B7 = te.compute((m,), lambda i: B2[i] * B4[i], name='B7')
+ B8 = te.compute((m,), lambda i: B6[i] - B7[i], name='B8')
- B9 = tvm.compute((m,), lambda i: B2[i] * B3[i], name='B9')
- B10 = tvm.compute((m,), lambda i: B0[i] * B5[i], name='B10')
- B11 = tvm.compute((m,), lambda i: B9[i] - B10[i], name='B11')
+ B9 = te.compute((m,), lambda i: B2[i] * B3[i], name='B9')
+ B10 = te.compute((m,), lambda i: B0[i] * B5[i], name='B10')
+ B11 = te.compute((m,), lambda i: B9[i] - B10[i], name='B11')
- B12 = tvm.compute((m,), lambda i: B0[i] * B4[i], name='B12')
- B13 = tvm.compute((m,), lambda i: B1[i] * B3[i], name='B13')
- B14 = tvm.compute((m,), lambda i: B12[i] - B13[i], name='B14')
+ B12 = te.compute((m,), lambda i: B0[i] * B4[i], name='B12')
+ B13 = te.compute((m,), lambda i: B1[i] * B3[i], name='B13')
+ B14 = te.compute((m,), lambda i: B12[i] - B13[i], name='B14')
- B = tvm.compute((m,), lambda i: B8[i] * B11[i] + B14[i], name='B')
- s = tvm.create_schedule(B.op)
+ B = te.compute((m,), lambda i: B8[i] * B11[i] + B14[i], name='B')
+ s = te.create_schedule(B.op)
B1L = s.cache_read(B1, scope_tb, [B6, B13])
B5L = s.cache_read(B5, scope_tb, [B6, B10])
s[B10].compute_inline()
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
-
- B0a = tvm.decl_buffer(B0.shape, B0.dtype, name='B0')
- B1a = tvm.decl_buffer(B1.shape, B1.dtype, name='B1')
- B2a = tvm.decl_buffer(B2.shape, B2.dtype, name='B2')
- B3a = tvm.decl_buffer(B3.shape, B3.dtype, name='B3')
- B4a = tvm.decl_buffer(B4.shape, B4.dtype, name='B4')
- B5a = tvm.decl_buffer(B5.shape, B5.dtype, name='B5')
-
- Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {B0: B0a, B1: B1a, B2: B2a, B3: B2a, B4: B4a, B5: B5a, B: Bb}, 64)
- stmt = tvm.ir_pass.CanonicalSimplify(stmt)
- stmt = tvm.ir_pass.Simplify(stmt)
- stmt = tvm.ir_pass.StorageRewrite(stmt)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+
+ B0a = tvm.tir.decl_buffer(B0.shape, B0.dtype, name='B0')
+ B1a = tvm.tir.decl_buffer(B1.shape, B1.dtype, name='B1')
+ B2a = tvm.tir.decl_buffer(B2.shape, B2.dtype, name='B2')
+ B3a = tvm.tir.decl_buffer(B3.shape, B3.dtype, name='B3')
+ B4a = tvm.tir.decl_buffer(B4.shape, B4.dtype, name='B4')
+ B5a = tvm.tir.decl_buffer(B5.shape, B5.dtype, name='B5')
+
+ Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {B0: B0a, B1: B1a, B2: B2a, B3: B2a, B4: B4a, B5: B5a, B: Bb}, 64)
+ stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.StorageRewrite(stmt)
# verify only have one allocations.
# verify inplace folding works
def verify(n):
if isinstance(n, tvm.tir.Allocate):
assert n.extents[0].value == 70
- tvm.ir_pass.PostOrderVisit(stmt, verify)
+ tvm.tir.ir_pass.PostOrderVisit(stmt, verify)
def test_alloc_seq_type():
- ib = tvm.ir_builder.create()
- n = tvm.var("n")
+ ib = tvm.tir.ir_builder.create()
+ n = te.var("n")
with ib.for_range(0, n, name="i") as i:
with ib.for_range(0, 10, name="j") as j:
A = ib.allocate("float32", 200, name="A", scope="local.L0A")
A[j] = 1.2
A1[j] = 1.3
B = ib.allocate("int16", 200, name="B", scope="local.L0A")
- B[j] = tvm.const(1, "int16")
+ B[j] = tvm.tir.const(1, "int16")
C = ib.allocate("int16", 200, name="C", scope="local.L0A")
- C[j] = tvm.const(1, "int16")
+ C[j] = tvm.tir.const(1, "int16")
D = ib.allocate("int16", 200, name="D", scope="local.L0A")
D[j] = B[j] + C[j]
A2 = ib.allocate("float32", 200, name="A2", scope="local.L0A")
A2[j] = A[j]
body = ib.get()
- body = tvm.ir_pass.StorageRewrite(body)
+ body = tvm.tir.ir_pass.StorageRewrite(body)
num_alloc = [0]
def verify(n):
if isinstance(n, tvm.tir.Allocate):
num_alloc[0] += 1
assert n.extents[0].value == 500
- tvm.ir_pass.PostOrderVisit(body, verify)
+ tvm.tir.ir_pass.PostOrderVisit(body, verify)
assert num_alloc[0] == 1
def test_alloc_seq_type2():
register_mem(scope_tb, max_bits)
- ib = tvm.ir_builder.create()
- n = tvm.var("n")
+ ib = tvm.tir.ir_builder.create()
+ n = te.var("n")
with ib.for_range(0, n, name="i") as i:
with ib.for_range(0, 10, name="j") as j:
A = ib.allocate("float32", 200, name="A", scope=scope_tb)
A[j] = 1.2
with ib.for_range(0, 20, name="j") as j:
B = ib.allocate("int16", 400, name="B", scope=scope_tb)
- B[j] = tvm.const(1, "int16")
+ B[j] = tvm.tir.const(1, "int16")
with ib.for_range(0, 10, name="j") as j:
C = ib.allocate("float32", 200, name="C", scope=scope_tb)
C[j] = 1.2
body = ib.get()
- body = tvm.ir_pass.StorageRewrite(body)
+ body = tvm.tir.ir_pass.StorageRewrite(body)
num_alloc = [0]
def verify(n):
if isinstance(n, tvm.tir.Allocate):
num_alloc[0] += 1
assert n.extents[0].value == 200
- tvm.ir_pass.PostOrderVisit(body, verify)
+ tvm.tir.ir_pass.PostOrderVisit(body, verify)
assert num_alloc[0] == 1
def test_reuse_small_buffer():
- ib = tvm.ir_builder.create()
- n = tvm.var("n")
+ ib = tvm.tir.ir_builder.create()
+ n = te.var("n")
with ib.for_range(0, n, name="i") as i:
with ib.for_range(0, 10, name="j") as j:
A = ib.allocate("int16", 200, name="A", scope="local.L0A")
- A[j] = tvm.const(1, "int16")
+ A[j] = tvm.tir.const(1, "int16")
B = ib.allocate("int16", 200, name="B", scope="local.L0A")
- B[j] = tvm.const(1, "int16")
+ B[j] = tvm.tir.const(1, "int16")
B1 = ib.allocate("int16", 200, name="B1", scope="local.L0A")
B1[j] = A[j] + B[j]
C = ib.allocate("int16", 400, name="C", scope="local.L0A")
- C[j] = tvm.const(1, "int16")
+ C[j] = tvm.tir.const(1, "int16")
D = ib.allocate("int16", 400, name="D", scope="local.L0A")
- D[j] = tvm.const(1, "int16")
+ D[j] = tvm.tir.const(1, "int16")
E = ib.allocate("int16", 400, name="E", scope="local.L0A")
E[j] = C[j]
body = ib.get()
- body = tvm.ir_pass.StorageRewrite(body)
+ body = tvm.tir.ir_pass.StorageRewrite(body)
num_alloc = [0]
if isinstance(n, tvm.tir.Allocate):
num_alloc[0] += 1
assert n.extents[0].value == 800
- tvm.ir_pass.PostOrderVisit(body, verify)
+ tvm.tir.ir_pass.PostOrderVisit(body, verify)
assert num_alloc[0] == 1
def test_replace_dataflow():
shape = (255,)
- A = tvm.placeholder(shape, name = "A")
- B = tvm.compute(shape, lambda i: A[i] + A[i], name = "B")
- C = tvm.compute(shape, lambda i: A[i] + B[i], name = "C")
- D = tvm.compute(shape, lambda i: A[i] + C[i], name = "D")
- E = tvm.compute(shape, lambda i: A[i] + D[i], name = "E")
+ A = te.placeholder(shape, name = "A")
+ B = te.compute(shape, lambda i: A[i] + A[i], name = "B")
+ C = te.compute(shape, lambda i: A[i] + B[i], name = "C")
+ D = te.compute(shape, lambda i: A[i] + C[i], name = "D")
+ E = te.compute(shape, lambda i: A[i] + D[i], name = "E")
- s = tvm.create_schedule(E.op)
+ s = te.create_schedule(E.op)
s.cache_read(A, "local", [B, C, D, E])
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
n = 16384
shape = (n, n)
- a = tvm.placeholder(shape, name='a', dtype='int32')
- b = tvm.placeholder(shape, name='b', dtype='int32')
- c = tvm.compute(shape, lambda i, j: compute(a, b)[i, j])
- c = tvm.compute(shape, lambda i, j: 1 + c[i, j])
- s = tvm.create_schedule(c.op)
+ a = te.placeholder(shape, name='a', dtype='int32')
+ b = te.placeholder(shape, name='b', dtype='int32')
+ c = te.compute(shape, lambda i, j: compute(a, b)[i, j])
+ c = te.compute(shape, lambda i, j: 1 + c[i, j])
+ s = te.create_schedule(c.op)
stmt = tvm.lower(s, [a, b, c], simple_mode=True)
def verify(n):
if isinstance(n, tvm.tir.Allocate):
assert n.extents[0].value == 268435456
- tvm.ir_pass.PostOrderVisit(stmt, verify)
+ tvm.tir.ir_pass.PostOrderVisit(stmt, verify)
if __name__ == "__main__":
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_storage_sync():
- m = tvm.size_var('m')
- l = tvm.size_var('l')
- A = tvm.placeholder((m, l), name='A')
+ m = te.size_var('m')
+ l = te.size_var('l')
+ A = te.placeholder((m, l), name='A')
- A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
- A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
+ A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1')
+ A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
- s = tvm.create_schedule(A2.op)
+ s = te.create_schedule(A2.op)
xo, xi = s[A2].split(A2.op.axis[0], factor=8)
- s[A2].bind(xo, tvm.thread_axis("blockIdx.x"))
+ s[A2].bind(xo, te.thread_axis("blockIdx.x"))
s[A1].compute_at(s[A2], xo)
s[A1].set_scope("shared")
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
- A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
- f = tvm.ir_pass.MakeAPI(stmt, "test", [Ab, A2b], 0, True)
- flist = tvm.ir_pass.SplitHostDevice(f)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
+ A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
+ f = tvm.tir.ir_pass.MakeAPI(stmt, "test", [Ab, A2b], 0, True)
+ flist = tvm.tir.ir_pass.SplitHostDevice(f)
f = flist[1]
- f = tvm.ir_pass.ThreadSync(f, "shared")
+ f = tvm.tir.ir_pass.ThreadSync(f, "shared")
body_list = tvm.tir.stmt_list(f.body.body.body.body)
assert(body_list[1].value.name == "tvm_storage_sync")
unit_bits=8,
max_simd_bits=32,
max_num_bits=128,
- head_address=tvm.call_extern("handle", "global_cache"))
- ib = tvm.ir_builder.create()
- n = tvm.size_var("n")
- cp = tvm.thread_axis((0, 1), "cop")
+ head_address=tvm.tir.call_extern("handle", "global_cache"))
+ ib = tvm.tir.ir_builder.create()
+ n = te.size_var("n")
+ cp = te.thread_axis((0, 1), "cop")
A = ib.allocate("float32", 128, name="A", scope="global.cache")
with ib.for_range(0, n, name="i") as i:
A[i] = A[i] + 1
ib.scope_attr(cp, "coproc_scope", 1)
A[j] = A[j + k * 10] + 2
stmt = ib.get()
- stmt = tvm.ir_pass.CoProcSync(stmt)
+ stmt = tvm.tir.ir_pass.CoProcSync(stmt)
body = stmt.body.body.body
blist = tvm.tir.stmt_list(body)
assert(blist[1].value.name == "cop.coproc_read_barrier")
def test_coproc_sync2():
- ib = tvm.ir_builder.create()
- n = tvm.size_var("n")
- cp = tvm.thread_axis((0, 1), "cop")
- ty = tvm.thread_axis("cthread")
+ ib = tvm.tir.ir_builder.create()
+ n = te.size_var("n")
+ cp = te.thread_axis((0, 1), "cop")
+ ty = te.thread_axis("cthread")
A = ib.allocate("float32", 128, name="A")
ib.scope_attr(ty, "virtual_thread", 2)
with ib.new_scope():
ib.scope_attr(cp, "coproc_scope", 2)
A[ty] = 1.0
stmt = ib.get()
- stmt = tvm.ir_pass.CoProcSync(stmt)
+ stmt = tvm.tir.ir_pass.CoProcSync(stmt)
def test_coproc_sync3():
def __check_list(tvm_array, py_list):
return False
return True
- ib = tvm.ir_builder.create()
- n = tvm.size_var("n")
- cp = tvm.thread_axis((0, 1), "cop")
+ ib = tvm.tir.ir_builder.create()
+ n = te.size_var("n")
+ cp = te.thread_axis((0, 1), "cop")
A = ib.allocate("float32", 128, name="A", scope="global.cache")
with ib.for_range(0, n, name="i") as i:
with ib.for_range(0, n, name="i") as j:
A[0] = 0.0
stmt = ib.get()
- stmt = tvm.ir_pass.CoProcSync(stmt)
+ stmt = tvm.tir.ir_pass.CoProcSync(stmt)
slist = tvm.tir.stmt_list(stmt[0].body.body)
push_st = slist[2]
slist = tvm.tir.stmt_list(slist[-1])
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import os
def test_unroll_loop():
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
dtype = 'int64'
- n = tvm.size_var('n')
- Ab = tvm.decl_buffer((n, ), dtype)
+ n = te.size_var('n')
+ Ab = tvm.tir.decl_buffer((n, ), dtype)
Aptr = ib.buffer_ptr(Ab)
# for i in 0 to n-1:
with ib.for_range(n, n + 2, name="i") as i:
stmt = ib.get()
assert isinstance(stmt, tvm.tir.For)
- ret = tvm.ir_pass.UnrollLoop(stmt, 16, 8, 0, True)
+ ret = tvm.tir.ir_pass.UnrollLoop(stmt, 16, 8, 0, True)
assert not isinstance(ret, tvm.tir.For)
- ret = tvm.ir_pass.UnrollLoop(stmt, 15, 8, 0, True)
+ ret = tvm.tir.ir_pass.UnrollLoop(stmt, 15, 8, 0, True)
assert isinstance(ret, tvm.tir.For)
- ret = tvm.ir_pass.UnrollLoop(stmt, 16, 8, 0, False)
+ ret = tvm.tir.ir_pass.UnrollLoop(stmt, 16, 8, 0, False)
assert isinstance(ret, tvm.tir.For)
assert ret.for_type == tvm.tir.For.Unrolled
- ib = tvm.ir_builder.create()
- ib.scope_attr(tvm.const(0, "int32"), "pragma_auto_unroll_max_step", 16)
+ ib = tvm.tir.ir_builder.create()
+ ib.scope_attr(tvm.tir.const(0, "int32"), "pragma_auto_unroll_max_step", 16)
ib.emit(stmt)
wrapped = ib.get()
wrapped = tvm.tir.SeqStmt([wrapped, stmt])
assert isinstance(ret, tvm.tir.For)
- ret = tvm.ir_pass.UnrollLoop(wrapped, 0, 8, 0, False)
+ ret = tvm.tir.ir_pass.UnrollLoop(wrapped, 0, 8, 0, False)
assert isinstance(ret[0], tvm.tir.For)
assert ret[0].for_type == tvm.tir.For.Unrolled
assert isinstance(ret[1], tvm.tir.For)
assert ret[1].for_type != tvm.tir.For.Unrolled
def test_unroll_fake_loop():
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
dtype = 'int32'
- n = tvm.size_var('n')
- Ab = tvm.decl_buffer((n, ), dtype)
+ n = te.size_var('n')
+ Ab = tvm.tir.decl_buffer((n, ), dtype)
Aptr = ib.buffer_ptr(Ab)
# for i in 0 to n-1:
with ib.for_range(0, 1, name="i") as i:
Aptr[j + 1] = Aptr[i] + 1
stmt = ib.get()
- ret = tvm.ir_pass.UnrollLoop(stmt, 8, 0, 1, True)
+ ret = tvm.tir.ir_pass.UnrollLoop(stmt, 8, 0, 1, True)
assert isinstance(ret[0], tvm.tir.Store)
def test_unroll_single_count_loops():
- n = tvm.size_var('n')
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute((n,), lambda *i: A(*i), name='B')
- s = tvm.create_schedule(B.op)
+ n = te.size_var('n')
+ A = te.placeholder((n,), name='A')
+ B = te.compute((n,), lambda *i: A(*i), name='B')
+ s = te.create_schedule(B.op)
s = s.normalize()
- dom_map = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, dom_map)
+ dom_map = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, dom_map)
# all parameters to UnrolLoops are default values except for
# auto_unroll_max_extent which has been set to 1 (default:0)
- after_unroll_stmt = tvm.ir_pass.UnrollLoop(stmt, 0, 8, 1, True)
+ after_unroll_stmt = tvm.tir.ir_pass.UnrollLoop(stmt, 0, 8, 1, True)
assert after_unroll_stmt == stmt
if __name__ == "__main__":
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_vectorize_loop():
dtype = 'int64'
- n = tvm.var('n')
- ib = tvm.ir_builder.create()
+ n = te.var('n')
+ ib = tvm.tir.ir_builder.create()
A = ib.pointer("float32", name="A")
with ib.for_range(0, n) as i:
with ib.for_range(0, 4, for_type="vectorize") as j:
- A[j] = tvm.const(1, A.dtype)
+ A[j] = tvm.tir.const(1, A.dtype)
stmt = ib.get()
assert isinstance(stmt.body, tvm.tir.For)
- stmt = tvm.ir_pass.VectorizeLoop(stmt)
+ stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
assert isinstance(stmt, tvm.tir.For)
assert not isinstance(stmt.body, tvm.tir.For)
assert isinstance(stmt.body.index, tvm.tir.Ramp)
def test_vectorize_vector():
dtype = 'int64'
- n = tvm.var('n')
- ib = tvm.ir_builder.create()
+ n = te.var('n')
+ ib = tvm.tir.ir_builder.create()
A = ib.pointer("float32x4", name="A")
with ib.for_range(0, n) as i:
with ib.for_range(0, 4, for_type="vectorize") as j:
- A[j] = tvm.const(1, A.dtype)
+ A[j] = tvm.tir.const(1, A.dtype)
stmt = ib.get()
assert isinstance(stmt.body, tvm.tir.For)
- stmt = tvm.ir_pass.VectorizeLoop(stmt)
+ stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
assert isinstance(stmt, tvm.tir.For)
assert not isinstance(stmt.body, tvm.tir.For)
assert isinstance(stmt.body.index, tvm.tir.Ramp)
def test_vectorize_with_if():
- n = tvm.var('n')
- x = tvm.var('x')
- ib = tvm.ir_builder.create()
+ n = te.var('n')
+ x = te.var('x')
+ ib = tvm.tir.ir_builder.create()
A = ib.pointer("float32", name="A")
with ib.for_range(0, 4, for_type="vectorize") as i:
with ib.if_scope(x < n):
with ib.if_scope(i < n):
A[i] = 2.0
stmt = ib.get()
- stmt = tvm.ir_pass.VectorizeLoop(stmt)
+ stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
assert isinstance(stmt, tvm.tir.IfThenElse)
assert isinstance(stmt.then_case.index, tvm.tir.Ramp)
assert isinstance(stmt.then_case.value, tvm.tir.Add)
assert isinstance(stmt.else_case, tvm.tir.For)
def test_vectorize_with_le_cond():
- n = tvm.var('n')
- ib = tvm.ir_builder.create()
+ n = te.var('n')
+ ib = tvm.tir.ir_builder.create()
A = ib.pointer("float32", name="A")
with ib.for_range(0, 4, for_type="vectorize") as i:
with ib.if_scope(i <= n):
A[i] = A[i] + 1
stmt = ib.get()
- stmt = tvm.ir_pass.VectorizeLoop(stmt)
+ stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
assert isinstance(stmt, tvm.tir.For)
def test_vectorize_with_ge_cond():
- n = tvm.var('n')
- ib = tvm.ir_builder.create()
+ n = te.var('n')
+ ib = tvm.tir.ir_builder.create()
A = ib.pointer("float32", name="A")
with ib.for_range(0, 4, for_type="vectorize") as i:
with ib.if_scope(i >= n):
A[i] = A[i] + 1
stmt = ib.get()
- stmt = tvm.ir_pass.VectorizeLoop(stmt)
+ stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
assert isinstance(stmt, tvm.tir.For)
def test_vectorize_if_then_else():
- n = tvm.var('n')
- x = tvm.var('x')
- ib = tvm.ir_builder.create()
+ n = te.var('n')
+ x = te.var('x')
+ ib = tvm.tir.ir_builder.create()
A = ib.pointer("float32", name="A")
with ib.for_range(0, 4, for_type="vectorize") as i:
- A[i] = tvm.call_intrin("float32", "tvm_if_then_else",
+ A[i] = tvm.tir.call_intrin("float32", "tvm_if_then_else",
i > 0,
A[i] + 1, A[i])
stmt = ib.get()
- stmt = tvm.ir_pass.VectorizeLoop(stmt)
+ stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
assert isinstance(stmt, tvm.tir.For)
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
A = ib.pointer("float32", name="A")
with ib.for_range(0, n) as k:
with ib.for_range(0, 4, for_type="vectorize") as i:
- A[k * 4 + i] = tvm.call_intrin("float32", "tvm_if_then_else",
+ A[k * 4 + i] = tvm.tir.call_intrin("float32", "tvm_if_then_else",
k > 0,
A[k * 4 + i], 0)
stmt = ib.get()
assert isinstance(stmt.body, tvm.tir.For)
- stmt = tvm.ir_pass.VectorizeLoop(stmt)
+ stmt = tvm.tir.ir_pass.VectorizeLoop(stmt)
assert not isinstance(stmt.body, tvm.tir.For)
assert isinstance(stmt.body.value.args[2], tvm.tir.Broadcast)
# under the License.
"""Test gpu code verifier"""
import tvm
+from tvm import te
def get_verify_pass(valid, **kwargs):
def verify_pass(stmt):
- valid[0] = tvm.ir_pass.VerifyGPUCode(stmt, kwargs)
+ valid[0] = tvm.tir.ir_pass.VerifyGPUCode(stmt, kwargs)
return stmt
return verify_pass
tvm_type = tvm.runtime.DataType(dtype)
type_size = tvm_type.bits // 8 * tvm_type.lanes
- A = tvm.placeholder((N,), name='A', dtype=dtype)
- B = tvm.compute((N, ), lambda i: A[i], name='B')
+ A = te.placeholder((N,), name='A', dtype=dtype)
+ B = te.compute((N, ), lambda i: A[i], name='B')
- s = tvm.create_schedule([B.op])
+ s = te.create_schedule([B.op])
AA = s.cache_read(A, "shared", [B])
o, i = s[B].split(s[B].op.axis[0], M)
s[AA].compute_at(s[B], o)
- s[B].bind(o, tvm.thread_axis("blockIdx.x"))
- s[B].bind(i, tvm.thread_axis("threadIdx.x"))
+ s[B].bind(o, te.thread_axis("blockIdx.x"))
+ s[B].bind(i, te.thread_axis("threadIdx.x"))
# shared memory usage: M * sizeof(dtype) Bytes
# thread usage: M
if not tvm.context(target).exist:
continue
valid = [None]
- with tvm.build_config(**{"add_lower_pass": [
+ with tvm.target.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_shared_memory_per_block=type_size * M - 1,
max_threads_per_block=M))]}):
tvm.build(s, [A, B], target)
assert not valid[0]
- with tvm.build_config(**{"add_lower_pass": [
+ with tvm.target.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_shared_memory_per_block=type_size * M,
max_threads_per_block=M))]}):
N = 1024
M = 128
- A = tvm.placeholder((N,), name='A', dtype='float32')
- B = tvm.compute((N, ), lambda i: A[i], name='B')
+ A = te.placeholder((N,), name='A', dtype='float32')
+ B = te.compute((N, ), lambda i: A[i], name='B')
- s = tvm.create_schedule([B.op])
+ s = te.create_schedule([B.op])
AA = s.cache_read(A, "local", [B])
o, i = s[B].split(s[B].op.axis[0], M)
s[AA].compute_at(s[B], o)
- s[B].bind(o, tvm.thread_axis("blockIdx.x"))
+ s[B].bind(o, te.thread_axis("blockIdx.x"))
# local memory usage: M * 4B
# thread usage: M
continue
valid = [None]
- with tvm.build_config(**{"add_lower_pass": [
+ with tvm.target.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_local_memory_per_block=4 * M - 1,
max_threads_per_block=1))]}):
tvm.build(s, [A, B], target)
assert not valid[0]
- with tvm.build_config(**{"add_lower_pass": [
+ with tvm.target.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_local_memory_per_block=4 * M,
max_threads_per_block=1))]}):
N = 1024
M = 128
- A = tvm.placeholder((N,), name='A', dtype='float32')
- B = tvm.compute((N, ), lambda i: A[i], name='B')
+ A = te.placeholder((N,), name='A', dtype='float32')
+ B = te.compute((N, ), lambda i: A[i], name='B')
- s = tvm.create_schedule([B.op])
+ s = te.create_schedule([B.op])
o, i = s[B].split(s[B].op.axis[0], M)
- s[B].bind(o, tvm.thread_axis('threadIdx.x'))
- s[B].bind(i, tvm.thread_axis("threadIdx.y"))
+ s[B].bind(o, te.thread_axis('threadIdx.x'))
+ s[B].bind(i, te.thread_axis("threadIdx.y"))
# shared memory usage: 0
# thread usage: N
continue
valid = [None]
- with tvm.build_config(**{"add_lower_pass": [
+ with tvm.target.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_shared_memory_per_block=0,
max_threads_per_block=N - 1))]}):
tvm.build(s, [A, B], target)
assert not valid[0]
- with tvm.build_config(**{"add_lower_pass": [
+ with tvm.target.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_shared_memory_per_block=0,
max_threads_per_block=N))]}):
tvm.build(s, [A, B], target)
assert valid[0]
- with tvm.build_config(**{"add_lower_pass": [
+ with tvm.target.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_shared_memory_per_block=0,
max_threads_per_block=N,
tvm.build(s, [A, B], target)
assert not valid[0]
- with tvm.build_config(**{"add_lower_pass": [
+ with tvm.target.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_shared_memory_per_block=0,
max_threads_per_block=N,
def test_multiple_kernels():
N = 1024
- A = tvm.placeholder((N, N), name='A')
- B = tvm.compute((N, N), lambda i, j: A[i, j])
- C = tvm.compute((N, N), lambda i, j: B[i, j])
+ A = te.placeholder((N, N), name='A')
+ B = te.compute((N, N), lambda i, j: A[i, j])
+ C = te.compute((N, N), lambda i, j: B[i, j])
- s = tvm.create_schedule([C.op])
+ s = te.create_schedule([C.op])
- s[C].bind(s[C].op.axis[1], tvm.thread_axis("threadIdx.x"))
- s[B].bind(s[B].op.axis[1], tvm.thread_axis("threadIdx.x"))
+ s[C].bind(s[C].op.axis[1], te.thread_axis("threadIdx.x"))
+ s[B].bind(s[B].op.axis[1], te.thread_axis("threadIdx.x"))
# shared memory usage: 0
# thread usage: N
continue
valid = [None]
- with tvm.build_config(**{"add_lower_pass": [
+ with tvm.target.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_shared_memory_per_block=0,
max_threads_per_block=N - 1))]}):
tvm.build(s, [A, C], target)
assert not valid[0]
- with tvm.build_config(**{"add_lower_pass": [
+ with tvm.target.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_shared_memory_per_block=0,
max_threads_per_block=N))]}):
def test_wrong_bind():
N = 1024
- A = tvm.placeholder((N, N-1), name='A')
- B = tvm.compute((N, N-1), lambda i, j: A[i, j])
+ A = te.placeholder((N, N-1), name='A')
+ B = te.compute((N, N-1), lambda i, j: A[i, j])
- s = tvm.create_schedule([B.op])
+ s = te.create_schedule([B.op])
# bind a thread axis to two loop axes with different lengths
- s[B].bind(s[B].op.axis[0], tvm.thread_axis("threadIdx.x"))
- s[B].bind(s[B].op.axis[1], tvm.thread_axis("threadIdx.x"))
+ s[B].bind(s[B].op.axis[0], te.thread_axis("threadIdx.x"))
+ s[B].bind(s[B].op.axis[1], te.thread_axis("threadIdx.x"))
for target in ['opencl', 'cuda']:
if not tvm.context(target).exist:
continue
valid = [None]
- with tvm.build_config(**{"add_lower_pass": [
+ with tvm.target.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid, max_threads_per_block=N*N))]}):
tvm.build(s, [A, B], target)
assert not valid[0]
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
# The following DLDeviceType/TVMDeviceExtType values
# are originally defined in dlpack.h and c_runtime_api.h.
binds = {}
arg_list = []
for x in args:
- if isinstance(x, tvm.tensor.Tensor):
- buf = tvm.decl_buffer(x.shape, dtype=x.dtype, name=x.name)
+ if isinstance(x, te.tensor.Tensor):
+ buf = tvm.tir.decl_buffer(x.shape, dtype=x.dtype, name=x.name)
assert x not in binds
binds[x] = buf
arg_list.append(buf)
else:
raise ValueError("args must be Tensor, Buffer or Var")
sch = sch.normalize()
- bounds = tvm.schedule.InferBound(sch)
- stmt = tvm.schedule.ScheduleOps(sch, bounds)
- stmt = tvm.ir_pass.LoopPartition(stmt, False)
- stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 64)
- func = tvm.ir_pass.MakeAPI(stmt, "myadd", arg_list, 0, True)
+ bounds = tvm.te.schedule.InferBound(sch)
+ stmt = tvm.te.schedule.ScheduleOps(sch, bounds)
+ stmt = tvm.tir.ir_pass.LoopPartition(stmt, False)
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, binds, 64)
+ func = tvm.tir.ir_pass.MakeAPI(stmt, "myadd", arg_list, 0, True)
return func
# So VerifyMemory pass is expected to succeed.
#
def test_verify_memory_all_bind():
- n = tvm.var("n")
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B")
+ n = te.var("n")
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B")
# B is bound to threads.
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
bx, tx = s[B].split(B.op.axis[0], factor=64)
- s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[B].bind(bx, te.thread_axis("blockIdx.x"))
+ s[B].bind(tx, te.thread_axis("threadIdx.x"))
func = lower(s, [A, B])
for dev_type in gpu_devices + other_devices:
- assert tvm.ir_pass.VerifyMemory(func, dev_type)
+ assert tvm.tir.ir_pass.VerifyMemory(func, dev_type)
# Computations are not bound.
# So VerifyMemory pass fails when device type is GPU.
#
def test_verify_memory_not_bind():
- n = tvm.var("n")
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B")
+ n = te.var("n")
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B")
# B is not bound to threads.
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
func = lower(s, [A, B])
for dev_type in gpu_devices:
- assert not tvm.ir_pass.VerifyMemory(func, dev_type)
+ assert not tvm.tir.ir_pass.VerifyMemory(func, dev_type)
for dev_type in other_devices:
- assert tvm.ir_pass.VerifyMemory(func, dev_type)
+ assert tvm.tir.ir_pass.VerifyMemory(func, dev_type)
# Computations are partially bound.
# So VerifyMemory pass fails when device type is GPU.
#
def test_verify_memory_partially_bind():
- n = tvm.var("n")
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B")
- C = tvm.compute(B.shape, lambda i: B[i] + 2.0, name="C")
- D = tvm.compute(C.shape, lambda i: C[i] + 2.0, name="D")
+ n = te.var("n")
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B")
+ C = te.compute(B.shape, lambda i: B[i] + 2.0, name="C")
+ D = te.compute(C.shape, lambda i: C[i] + 2.0, name="D")
# C is bound to threads, but B and D are not.
- s = tvm.create_schedule([B.op, C.op, D.op])
+ s = te.create_schedule([B.op, C.op, D.op])
bx, tx = s[C].split(C.op.axis[0], factor=64)
- s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[C].bind(bx, te.thread_axis("blockIdx.x"))
+ s[C].bind(tx, te.thread_axis("threadIdx.x"))
func = lower(s, [A, B, C, D])
for dev_type in gpu_devices:
- assert not tvm.ir_pass.VerifyMemory(func, dev_type)
+ assert not tvm.tir.ir_pass.VerifyMemory(func, dev_type)
for dev_type in other_devices:
- assert tvm.ir_pass.VerifyMemory(func, dev_type)
+ assert tvm.tir.ir_pass.VerifyMemory(func, dev_type)
if __name__ == "__main__":
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_virtual_thread():
- m = tvm.var('m')
- A = tvm.placeholder((m, ), name='A')
- A1 = tvm.compute((m,), lambda i: A[i], name='A1')
- A2 = tvm.compute((m,), lambda i: A1[i] + 3, name='A2')
+ m = te.var('m')
+ A = te.placeholder((m, ), name='A')
+ A1 = te.compute((m,), lambda i: A[i], name='A1')
+ A2 = te.compute((m,), lambda i: A1[i] + 3, name='A2')
- s = tvm.create_schedule(A2.op)
- vx = tvm.thread_axis("vthread", name="vx")
+ s = te.create_schedule(A2.op)
+ vx = te.thread_axis("vthread", name="vx")
xo, xi = s[A2].split(A2.op.axis[0], nparts=2)
s[A2].bind(xo, vx)
xo, xi = s[A2].split(xi, 8)
s[A1].compute_at(s[A2], xo)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
- Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
- A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2')
- stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
- stmt = tvm.ir_pass.Simplify(stmt)
- stmt = tvm.ir_pass.InjectVirtualThread(stmt)
+ Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A')
+ A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2')
+ stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
+ stmt = tvm.tir.ir_pass.Simplify(stmt)
+ stmt = tvm.tir.ir_pass.InjectVirtualThread(stmt)
print(stmt)
if __name__ == "__main__":
# under the License.
"""Test runtime error handling"""
import tvm
+from tvm import te
import tvm.testing
def test_op_translation():
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
@tvm.register_extension
def test_dltensor_compatible():
dtype = 'int64'
- n = tvm.var('n')
- Ab = tvm.decl_buffer((n,), dtype)
- i = tvm.var('i')
- ib = tvm.ir_builder.create()
+ n = te.var('n')
+ Ab = tvm.tir.decl_buffer((n,), dtype)
+ i = te.var('i')
+ ib = tvm.tir.ir_builder.create()
A = ib.buffer_ptr(Ab)
with ib.for_range(0, n - 1, "i") as i:
A[i + 1] = A[i] + 1
stmt = ib.get()
- fapi = tvm.ir_pass.MakeAPI(stmt, "arange", [Ab], 0, True)
- fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+ fapi = tvm.tir.ir_pass.MakeAPI(stmt, "arange", [Ab], 0, True)
+ fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
f = tvm.target.codegen.build_module(fapi, "stackvm")
a = tvm.nd.array(np.zeros(10, dtype=dtype))
aview = MyTensorView(a)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
import json
from tvm import rpc
def test_graph_simple():
n = 4
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
- s = tvm.create_schedule(B.op)
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ s = te.create_schedule(B.op)
node0 = {"op": "null", "name": "x", "inputs": []}
node1 = {"op": "tvm_op", "name": "add",
# under the License.
import os
import tvm
+from tvm import te
import numpy as np
import json
from tvm import rpc
def test_graph_simple():
n = 4
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
- s = tvm.create_schedule(B.op)
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ s = te.create_schedule(B.op)
node0 = {"op": "null", "name": "x", "inputs": []}
node1 = {"op": "tvm_op", "name": "add",
import numpy as np
import tvm
+from tvm import te
from tvm.contrib import graph_runtime, util
import topi
shape = (4,)
# Create module for add whose target is the device.
- tensor_a = tvm.placeholder(shape, name="A")
- tensor_b = tvm.placeholder(shape, name="B")
- elemwise_add = tvm.compute(shape, lambda *i: tensor_a(*i)
+ tensor_a = te.placeholder(shape, name="A")
+ tensor_b = te.placeholder(shape, name="B")
+ elemwise_add = te.compute(shape, lambda *i: tensor_a(*i)
+ tensor_b(*i), name="elemwise_add")
target = topi.cpp.TEST_create_target(device)
schedule_add = topi.cpp.cuda.schedule_injective(target, [elemwise_add])
# Insert copy. Neither compute nor schedule is required for the copy
# node. The compute will be performed at runtime which is just data
# copy from the input to the output.
- tensor_copy = tvm.placeholder(shape, name="__copy")
+ tensor_copy = te.placeholder(shape, name="__copy")
# Create module for sub whose target is the host.
- tensor_c = tvm.placeholder(shape, name="C")
- elemwise_sub = tvm.compute(shape, lambda *i: tensor_copy(*i)
+ tensor_c = te.placeholder(shape, name="C")
+ elemwise_sub = te.compute(shape, lambda *i: tensor_copy(*i)
- tensor_c(*i), name="elemwise_sub")
- schedule_sub = tvm.create_schedule(elemwise_sub.op)
+ schedule_sub = te.create_schedule(elemwise_sub.op)
lower_sub = tvm.lower(schedule_sub, [tensor_copy, tensor_c,
elemwise_sub],
name="elemwise_sub")
# Insert copy nodes for data transferring between add and sub nodes.
# Transfers data from gpu to cpu.
- copy_add_sub = tvm.placeholder(shape, name="__copy0")
+ copy_add_sub = te.placeholder(shape, name="__copy0")
# Transfers data from cpu to gpu.
- copy_sub_add = tvm.placeholder(shape, name="__copy1")
+ copy_sub_add = te.placeholder(shape, name="__copy1")
# Create a module containing adds on the device.
- tensor_a = tvm.placeholder(shape, name="A")
- tensor_b = tvm.placeholder(shape, name="B")
- tensor_d = tvm.placeholder(shape, name="D")
- elemwise_add0 = tvm.compute(shape, lambda *i: tensor_a(*i)
+ tensor_a = te.placeholder(shape, name="A")
+ tensor_b = te.placeholder(shape, name="B")
+ tensor_d = te.placeholder(shape, name="D")
+ elemwise_add0 = te.compute(shape, lambda *i: tensor_a(*i)
+ tensor_b(*i), name="elemwise_add0")
- elemwise_add1 = tvm.compute(shape, lambda *i: copy_sub_add(*i)
+ elemwise_add1 = te.compute(shape, lambda *i: copy_sub_add(*i)
+ tensor_d(*i), name="elemwise_add1")
target = topi.cpp.TEST_create_target(device)
add_schedule0 = topi.cpp.cuda.schedule_injective(
add_schedule1, [tensor_d, copy_sub_add, elemwise_add1],
name="elemwise_add1")
# Create module for sub whose target is the host.
- tensor_c = tvm.placeholder(shape, name="C")
- elemwise_sub = tvm.compute(shape, lambda *i: copy_add_sub(*i)
+ tensor_c = te.placeholder(shape, name="C")
+ elemwise_sub = te.compute(shape, lambda *i: copy_add_sub(*i)
- tensor_c(*i), name="elemwise_sub")
- sub_schedule = tvm.create_schedule(elemwise_sub.op)
+ sub_schedule = te.create_schedule(elemwise_sub.op)
lower_sub = tvm.lower(sub_schedule, [copy_add_sub, tensor_c,
elemwise_sub],
name="elemwise_sub")
import ctypes
import tvm
+from tvm import te
from tvm.contrib.util import tempdir
with open(filename, "a") as fout:
fout.write("c")
- X = tvm.compute((), lambda : tvm.call_packed("my_debug", filename))
- s = tvm.create_schedule(X.op)
+ X = te.compute((), lambda : tvm.tir.call_packed("my_debug", filename))
+ s = te.create_schedule(X.op)
func = tvm.build(s, [X])
x = tvm.nd.empty((), dtype="int32")
import numpy as np
import tvm
+from tvm import te
from tvm.contrib import graph_runtime, util
from tvm import relay
import tvm.micro as micro
mod : tvm.runtime.Module
graph runtime module for the target device
"""
- with tvm.build_config(disable_vectorize=True):
+ with tvm.target.build_config(disable_vectorize=True):
graph, c_mod, params = relay.build(func, target="c", params=params)
micro_mod = create_micro_mod(c_mod, dev_config)
ctx = tvm.micro_dev(0)
dtype = "float32"
# Construct TVM expression.
- tvm_shape = tvm.convert(shape)
- A = tvm.placeholder(tvm_shape, name="A", dtype=dtype)
- B = tvm.placeholder(tvm_shape, name="B", dtype=dtype)
- C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name="C")
- s = tvm.create_schedule(C.op)
+ tvm_shape = tvm.runtime.convert(shape)
+ A = te.placeholder(tvm_shape, name="A", dtype=dtype)
+ B = te.placeholder(tvm_shape, name="B", dtype=dtype)
+ C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name="C")
+ s = te.create_schedule(C.op)
func_name = "fadd"
c_mod = tvm.build(s, [A, B, C], target="c", name=func_name)
dtype = "float32"
# Construct TVM expression.
- tvm_shape = tvm.convert(shape)
- A = tvm.placeholder(tvm_shape, name="A", dtype=dtype)
- B = tvm.placeholder(tvm_shape, name="B", dtype=dtype)
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1, name="B")
- C = tvm.compute(A.shape, lambda *i: B(*i) + 1, name="C")
- s = tvm.create_schedule(C.op)
+ tvm_shape = tvm.runtime.convert(shape)
+ A = te.placeholder(tvm_shape, name="A", dtype=dtype)
+ B = te.placeholder(tvm_shape, name="B", dtype=dtype)
+ B = te.compute(A.shape, lambda *i: A(*i) + 1, name="B")
+ C = te.compute(A.shape, lambda *i: B(*i) + 1, name="C")
+ s = te.create_schedule(C.op)
func_name = "fadd_two_workspace"
c_mod = tvm.build(s, [A, C], target="c", name=func_name)
from tvm import relay
from tvm.relay import testing
import tvm
+from tvm import te
from tvm.contrib import util
header_file_dir_path = util.tempdir()
with relay.build_config(opt_level=3):
_, resnet18_cpu_lib, _ = relay.build_module.build(resnet18_mod, "llvm", params=resnet18_params)
- A = tvm.placeholder((1024,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
- s = tvm.create_schedule(B.op)
+ A = te.placeholder((1024,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ s = te.create_schedule(B.op)
f = tvm.build(s, [A, B], "llvm", name="myadd")
from tvm.contrib import util
temp = util.tempdir()
f.write(subgraph_json)
# Get Json and module.
- A = tvm.placeholder((1024,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
- s = tvm.create_schedule(B.op)
+ A = te.placeholder((1024,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ s = te.create_schedule(B.op)
f = tvm.build(s, [A, B], "llvm", name="myadd")
try:
ext_lib = tvm.runtime.load_module(subgraph_path, "examplejson")
with relay.build_config(opt_level=3):
_, resnet18_cpu_lib, _ = relay.build_module.build(resnet18_mod, "llvm", params=resnet18_params)
- A = tvm.placeholder((1024,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
- s = tvm.create_schedule(B.op)
+ A = te.placeholder((1024,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ s = te.create_schedule(B.op)
f = tvm.build(s, [A, B], "c", name="myadd")
engine_module = generate_engine_module()
from tvm.contrib import util
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm.contrib import cc, util
import ctypes
import os
os.environ["TVM_USE_RUNTIME_LIB"] = "1"
os.environ["TVM_FFI"] = "ctypes"
import tvm
+from tvm import te
import numpy as np
path_dso = sys.argv[1]
dtype = sys.argv[2]
temp = util.tempdir()
def save_object(names):
- n = tvm.size_var('n')
- Ab = tvm.decl_buffer((n, ), dtype)
- i = tvm.var('i')
+ n = te.size_var('n')
+ Ab = tvm.tir.decl_buffer((n, ), dtype)
+ i = te.var('i')
# for i in 0 to n-1:
stmt = tvm.tir.For(
i, 0, n - 1, 0, 0,
tvm.tir.Store(Ab.data,
tvm.tir.Load(dtype, Ab.data, i) + 1,
i + 1))
- fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
- fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
+ fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
+ fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi)
m = tvm.target.codegen.build_module(fapi, "llvm")
for name in names:
m.save(name)
def test_device_module_dump():
# graph
- n = tvm.convert(1024)
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
- s = tvm.create_schedule(B.op)
+ n = tvm.runtime.convert(1024)
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ s = te.create_schedule(B.op)
# create iter var and assign them tags.
num_thread = 8
bx, tx = s[B].split(B.op.axis[0], factor=num_thread)
- s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[B].bind(bx, te.thread_axis("blockIdx.x"))
+ s[B].bind(tx, te.thread_axis("threadIdx.x"))
def check_device(device):
ctx = tvm.context(device, 0)
"""Test combine multiple module into one shared lib."""
# graph
nn = 12
- n = tvm.convert(nn)
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
- s = tvm.create_schedule(B.op)
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ s = te.create_schedule(B.op)
def check_llvm():
ctx = tvm.cpu(0)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
def enabled_ctx_list():
n = 100
for (src, dst) in [('float32', 'float16'), ('float16', 'float32')]:
- A = tvm.placeholder((n,), dtype=src)
- B = tvm.compute((n,), lambda i: A[i].astype(dst))
+ A = te.placeholder((n,), dtype=src)
+ B = te.compute((n,), lambda i: A[i].astype(dst))
- s = tvm.create_schedule([B.op])
+ s = te.create_schedule([B.op])
func = tvm.build(s, [A, B], 'llvm')
x_tvm = tvm.nd.array(100 * np.random.randn(n).astype(src) - 50)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import tvm.testing
import numpy as np
assert y == 10
def test_get_callback_with_node():
- x = tvm.convert(10)
+ x = tvm.runtime.convert(10)
def test(y):
assert y.handle != x.handle
return y
- f2 = tvm.convert(test)
+ f2 = tvm.runtime.convert(test)
# register into global function table
@tvm.register_func
def my_callback_with_node(y, f):
def test_return_func():
def addy(y):
def add(x):
- return tvm.convert(x + y)
+ return tvm.runtime.convert(x + y)
return add
- myf = tvm.convert(addy)
+ myf = tvm.runtime.convert(addy)
f = myf(10)
assert f(11).value == 21
def myfunc(*args):
assert(tuple(args) == targs)
- f = tvm.convert(myfunc)
+ f = tvm.runtime.convert(myfunc)
assert isinstance(f, tvm.runtime.PackedFunc)
def test_byte_array():
def myfunc(ss):
assert ss == a
- f = tvm.convert(myfunc)
+ f = tvm.runtime.convert(myfunc)
f(a)
def test_empty_array():
def myfunc(ss):
assert tuple(ss) == ()
- x = tvm.convert(())
- tvm.convert(myfunc)(x)
+ x = tvm.runtime.convert(())
+ tvm.runtime.convert(myfunc)(x)
def test_ctx():
def test_trace_default_action():
n = 2
- x = tvm.placeholder((n,n,n), name="X", dtype="float32")
- y = tvm.compute(x.shape, lambda i, j, k: tvm.trace([i, j, k, x[i][j][k]]))
- s = tvm.create_schedule(y.op)
+ x = te.placeholder((n,n,n), name="X", dtype="float32")
+ y = te.compute(x.shape, lambda i, j, k: tvm.tir.trace([i, j, k, x[i][j][k]]))
+ s = te.create_schedule(y.op)
f = tvm.build(s, [x, y], target="llvm")
xnd = tvm.nd.array(np.ones((n,n,n), dtype=x.dtype))
ynd = tvm.nd.array(np.zeros((n,n,n), dtype=y.dtype))
f(xnd, ynd)
def test_trace_expr_assign():
- @tvm.register_func("tvm.trace_callback2")
+ @tvm.register_func("tvm.tir.trace_callback2")
def trace_buffer(x):
return
def check_assign(dtype):
n = 4
- x = tvm.placeholder((n,n,n), name="X", dtype=dtype)
- y = tvm.compute(x.shape, lambda i, j, k: tvm.trace([x[i][j][k]], "tvm.trace_callback2"))
- z = tvm.compute(x.shape, lambda i, j, k: tvm.trace([y[i][j][k]], "tvm.trace_callback2"))
- s = tvm.create_schedule(z.op)
+ x = te.placeholder((n,n,n), name="X", dtype=dtype)
+ y = te.compute(x.shape, lambda i, j, k: tvm.tir.trace([x[i][j][k]], "tvm.tir.trace_callback2"))
+ z = te.compute(x.shape, lambda i, j, k: tvm.tir.trace([y[i][j][k]], "tvm.tir.trace_callback2"))
+ s = te.create_schedule(z.op)
f = tvm.build(s, [x, y, z], "llvm")
xnd = tvm.nd.array(np.ones((n,n,n), dtype=x.dtype))
check_assign(t)
def test_trace_expr_sum_generated():
- @tvm.register_func("tvm.trace_callback3")
+ @tvm.register_func("tvm.tir.trace_callback3")
def trace_buffer(x):
return
def check_expr_sum(dtype):
n = 4
- a = tvm.placeholder((n,n,n), name="a", dtype=dtype)
- b = tvm.placeholder((n,n,n), name="b", dtype=dtype)
- c = tvm.compute(a.shape, lambda i, j, k: tvm.trace([a[i][j][k]],"tvm.trace_callback3")
- + tvm.trace([b[i][j][k]],"tvm.trace_callback3"))
- s = tvm.create_schedule(c.op)
+ a = te.placeholder((n,n,n), name="a", dtype=dtype)
+ b = te.placeholder((n,n,n), name="b", dtype=dtype)
+ c = te.compute(a.shape, lambda i, j, k: tvm.tir.trace([a[i][j][k]],"tvm.tir.trace_callback3")
+ + tvm.tir.trace([b[i][j][k]],"tvm.tir.trace_callback3"))
+ s = te.create_schedule(c.op)
f = tvm.build(s, [a, b, c])
xnd = tvm.nd.array(np.array(np.ones((n,n,n), dtype=a.dtype)))
ynd = tvm.nd.array(np.array(np.ones((n,n,n), dtype=b.dtype)))
check_expr_sum(t)
def test_trace_expr_sum_args():
- @tvm.register_func("tvm.trace_silent")
+ @tvm.register_func("tvm.tir.trace_silent")
def silent(*args):
return
def check_expr_sum(dtype):
n = 4
- a = tvm.placeholder((n,n,n), name="a", dtype=dtype)
- b = tvm.placeholder((n,n,n), name="b", dtype=dtype)
- e = tvm.placeholder((n,n,n), name="e", dtype=dtype)
- d = tvm.placeholder((n,n,n), name="d", dtype=dtype)
-
- c = tvm.compute(a.shape, lambda i, j, k: tvm.trace([i, j, k, a[i][j][k]], "tvm.trace_silent")
- + tvm.trace([i, j, k, b[i][j][k]], "tvm.trace_silent")
- + tvm.trace([i, j, k, d[i][j][k]], "tvm.trace_silent")
- + tvm.trace([i, j, k, e[i][j][k]], "tvm.trace_silent"))
- s = tvm.create_schedule(c.op)
+ a = te.placeholder((n,n,n), name="a", dtype=dtype)
+ b = te.placeholder((n,n,n), name="b", dtype=dtype)
+ e = te.placeholder((n,n,n), name="e", dtype=dtype)
+ d = te.placeholder((n,n,n), name="d", dtype=dtype)
+
+ c = te.compute(a.shape, lambda i, j, k: tvm.tir.trace([i, j, k, a[i][j][k]], "tvm.tir.trace_silent")
+ + tvm.tir.trace([i, j, k, b[i][j][k]], "tvm.tir.trace_silent")
+ + tvm.tir.trace([i, j, k, d[i][j][k]], "tvm.tir.trace_silent")
+ + tvm.tir.trace([i, j, k, e[i][j][k]], "tvm.tir.trace_silent"))
+ s = te.create_schedule(c.op)
f = tvm.build(s, [a, b, d, e, c])
a_nd = tvm.nd.array(np.array(np.ones((n,n,n), dtype=a.dtype)))
b_nd = tvm.nd.array(np.array(np.ones((n,n,n), dtype=b.dtype)))
check_expr_sum(t)
def test_trace_expr_sum_custom():
- @tvm.register_func("tvm.trace_callback4")
+ @tvm.register_func("tvm.tir.trace_callback4")
def trace_buffer(x):
return
def check_expr_sum_custom(dtype):
n = 4
- a = tvm.placeholder((n,n), name="a", dtype=dtype)
- b = tvm.placeholder((n,n), name="b", dtype=dtype)
- c = tvm.compute(a.shape, lambda i,j: tvm.trace([a[i][j]], "tvm.trace_callback4")
- + tvm.trace([b[i][j]], "tvm.trace_callback4"))
- s = tvm.create_schedule(c.op)
+ a = te.placeholder((n,n), name="a", dtype=dtype)
+ b = te.placeholder((n,n), name="b", dtype=dtype)
+ c = te.compute(a.shape, lambda i,j: tvm.tir.trace([a[i][j]], "tvm.tir.trace_callback4")
+ + tvm.tir.trace([b[i][j]], "tvm.tir.trace_callback4"))
+ s = te.create_schedule(c.op)
f = tvm.build(s, [a, b, c])
npa = np.array([[1,0,0,0], [0,1,0,0],[0,0,1,0],[0,0,0,1]], dtype=a.dtype)
npb = np.array([[1,0,0,0], [0,1,0,0],[0,0,1,0],[0,0,0,1]], dtype=a.dtype)
check_expr_sum_custom(t)
def test_trace_can_change_traced_value_int():
- @tvm.register_func("tvm.trace_change_int_first")
+ @tvm.register_func("tvm.tir.trace_change_int_first")
def trace_buffer(x):
return 13
- @tvm.register_func("tvm.trace_change_int_second")
+ @tvm.register_func("tvm.tir.trace_change_int_second")
def trace_buffer(x):
return 14
def check_assign(dtype):
n = 4
- x = tvm.placeholder((n,), name="X", dtype=dtype)
- y = tvm.compute(x.shape, lambda i: tvm.trace([x[i]], "tvm.trace_change_int_first"))
- z = tvm.compute(x.shape, lambda i: tvm.trace([y[i]], "tvm.trace_change_int_second"))
- s = tvm.create_schedule(z.op)
+ x = te.placeholder((n,), name="X", dtype=dtype)
+ y = te.compute(x.shape, lambda i: tvm.tir.trace([x[i]], "tvm.tir.trace_change_int_first"))
+ z = te.compute(x.shape, lambda i: tvm.tir.trace([y[i]], "tvm.tir.trace_change_int_second"))
+ s = te.create_schedule(z.op)
f = tvm.build(s, [x, y, z], "llvm")
xnd = tvm.nd.array(np.ones((n,), dtype=x.dtype))
check_assign(t)
def test_trace_can_change_traced_value_float():
- @tvm.register_func("tvm.trace_change_float_first")
+ @tvm.register_func("tvm.tir.trace_change_float_first")
def trace_buffer(x):
return 13.0
- @tvm.register_func("tvm.trace_change_float_second")
+ @tvm.register_func("tvm.tir.trace_change_float_second")
def trace_buffer(x):
return 14.0
def check_assign(dtype):
n = 4
- x = tvm.placeholder((n,), name="X", dtype=dtype)
- y = tvm.compute(x.shape, lambda i: tvm.trace([x[i]], "tvm.trace_change_float_first"))
- z = tvm.compute(x.shape, lambda i: tvm.trace([y[i]], "tvm.trace_change_float_second"))
- s = tvm.create_schedule(z.op)
+ x = te.placeholder((n,), name="X", dtype=dtype)
+ y = te.compute(x.shape, lambda i: tvm.tir.trace([x[i]], "tvm.tir.trace_change_float_first"))
+ z = te.compute(x.shape, lambda i: tvm.tir.trace([y[i]], "tvm.tir.trace_change_float_second"))
+ s = te.create_schedule(z.op)
f = tvm.build(s, [x, y, z], "llvm")
xnd = tvm.nd.array(np.ones((n,), dtype=x.dtype))
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import tvm.testing
import os
import logging
if host is None:
return
def verify_rpc(remote, target, shape, dtype):
- A = tvm.placeholder(shape, dtype=dtype)
- B = tvm.compute(A.shape, lambda i: A[i]+tvm.const(1, A.dtype))
- s = tvm.create_schedule(B.op)
+ A = te.placeholder(shape, dtype=dtype)
+ B = te.compute(A.shape, lambda i: A[i]+tvm.tir.const(1, A.dtype))
+ s = te.create_schedule(B.op)
f = tvm.build(s, [A, B], target, name="myadd")
ctx = remote.cpu(0)
server = rpc.Server("localhost")
client = rpc.connect(server.host, server.port)
# graph
- n = tvm.convert(1024)
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
- s = tvm.create_schedule(B.op)
+ n = tvm.runtime.convert(1024)
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ s = te.create_schedule(B.op)
def check_remote(remote):
if not tvm.runtime.enabled("llvm"):
return
temp = util.tempdir()
ctx = remote.cl(0)
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=32)
- s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
- s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
+ s[B].bind(xo, te.thread_axis("blockIdx.x"))
+ s[B].bind(xi, te.thread_axis("threadIdx.x"))
f = tvm.build(s, [A, B], "opencl", target_host="llvm", name="myadd")
# Option 1: save modules separately and rely on remote compiler
path_o = temp.relpath("myadd.o")
import numpy as np
import tvm
+from tvm import te
from tvm.runtime import profiler_vm
from tvm import relay
from tvm.relay.testing import resnet
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_bound1():
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((m, l), name='A')
- A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
- A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
+ m = te.var('m')
+ l = te.var('l')
+ A = te.placeholder((m, l), name='A')
+ A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1')
+ A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
- s = tvm.create_schedule([A2.op])
+ s = te.create_schedule([A2.op])
xo, xi = s[A2].split(s[A2].op.axis[0], 8)
s[A1].compute_at(s[A2], xo)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
assert(bounds[A1.op.axis[0]].extent.value == 8)
def test_bound2():
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((m, l), name='A')
- A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
- A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
- s = tvm.create_schedule(A2.op)
+ m = te.var('m')
+ l = te.var('l')
+ A = te.placeholder((m, l), name='A')
+ A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1')
+ A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
+ s = te.create_schedule(A2.op)
xo, yo, xi, yi = s[A2].tile(A2.op.axis[0], A2.op.axis[1], 8, 8)
# test normalize not affecting schedule
_ = s.normalize()
s[A1].compute_at(s[A2], yo)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
assert(bounds[A1.op.axis[0]].extent.value == 8)
assert(bounds[A1.op.axis[1]].extent.value == 8)
def test_bound3():
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((m, l), name='A')
- A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
- A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
+ m = te.var('m')
+ l = te.var('l')
+ A = te.placeholder((m, l), name='A')
+ A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1')
+ A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
- s = tvm.create_schedule(A2.op)
+ s = te.create_schedule(A2.op)
s[A1].set_scope("shared")
xo, xi = s[A2].split(A2.op.axis[0], 32)
xi0, xi1 = s[A2].split(xi, nparts=16)
- s[A2].bind(xi0, tvm.thread_axis("threadIdx.x"))
+ s[A2].bind(xi0, te.thread_axis("threadIdx.x"))
yo, yi = s[A2].split(A2.op.axis[1], 16)
# test normalize not affecting schedule
_ = s.normalize()
s[A2].reorder(xo, xi0, yo, xi1, yi)
s[A1].compute_at(s[A2], yo)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
assert(bounds[A1.op.axis[0]].extent.value==32)
assert(bounds[A1.op.axis[1]].extent.value==16)
def test_bound_split_divisible():
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((8 * m, l), name='A')
- B = tvm.compute((8 * m, l), lambda i, j: A[i, j], name='B')
- s = tvm.create_schedule(B.op)
+ m = te.var('m')
+ l = te.var('l')
+ A = te.placeholder((8 * m, l), name='A')
+ B = te.compute((8 * m, l), lambda i, j: A[i, j], name='B')
+ s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], 8)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
assert bounds[xo].extent == m
assert bounds[xi].extent.value == 8
def test_bound_tile_divisible():
- m = tvm.var('m')
- l = tvm.var('l')
+ m = te.var('m')
+ l = te.var('l')
shape = (8 * m, 32 * l)
- A = tvm.placeholder(shape, name='A')
- B = tvm.compute(shape, lambda i, j: A[i, j], name='B')
- s = tvm.create_schedule(B.op)
+ A = te.placeholder(shape, name='A')
+ B = te.compute(shape, lambda i, j: A[i, j], name='B')
+ s = te.create_schedule(B.op)
xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], 8, 32)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
assert bounds[xo].extent == m
assert bounds[xi].extent.value == 8
assert bounds[yi].extent.value == 32
def test_bound_fusesplit1():
- m = tvm.var('m')
- l = tvm.var('l')
- split1 = tvm.var('s')
- A = tvm.placeholder((m, l), name='A')
- A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
- A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
-
- s = tvm.create_schedule(A2.op)
+ m = te.var('m')
+ l = te.var('l')
+ split1 = te.var('s')
+ A = te.placeholder((m, l), name='A')
+ A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1')
+ A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
+
+ s = te.create_schedule(A2.op)
fused_axes = s[A2].fuse(A2.op.axis[0], A2.op.axis[1])
xo, xi = s[A2].split(fused_axes, split1)
s[A1].compute_at(s[A2], xo)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- idxdiv = tvm.indexdiv
- assert(tvm.ir_pass.Simplify(
+ idxdiv = tvm.tir.indexdiv
+ assert(tvm.tir.ir_pass.Simplify(
bounds[A1.op.axis[0]].min - idxdiv(xo * split1, l)).value == 0)
expected_extent = (idxdiv((xo + 1) * split1 - 1, l) - idxdiv(xo * split1, l) + 1)
for i in range(1, 6):
for j in range(1, 6):
for k in range(1, 6):
- vars = tvm.convert({split1: tvm.const(i, "int32"), l: tvm.const(j, "int32"), xo.var: tvm.const(k, "int32")})
- comp_ext = tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[0]].extent, vars)).value
- exp_ext = tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(expected_extent, vars)).value
+ vars = tvm.runtime.convert({split1: tvm.tir.const(i, "int32"), l: tvm.tir.const(j, "int32"), xo.var: tvm.tir.const(k, "int32")})
+ comp_ext = tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[0]].extent, vars)).value
+ exp_ext = tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(expected_extent, vars)).value
assert(comp_ext == exp_ext)
- assert(tvm.ir_pass.Simplify(bounds[A1.op.axis[1]].extent - l).value == 0)
+ assert(tvm.tir.ir_pass.Simplify(bounds[A1.op.axis[1]].extent - l).value == 0)
def test_bound_fusesplit2():
- m = tvm.var("m")
- l = tvm.convert(6)
- split = tvm.convert(3)
- A = tvm.placeholder((m, l), name='A')
- A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
- A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
-
- s = tvm.create_schedule(A2.op)
+ m = te.var("m")
+ l = tvm.runtime.convert(6)
+ split = tvm.runtime.convert(3)
+ A = te.placeholder((m, l), name='A')
+ A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1')
+ A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
+
+ s = te.create_schedule(A2.op)
fused_axes = s[A2].fuse(A2.op.axis[0], A2.op.axis[1])
xo, xi = s[A2].split(fused_axes, split)
s[A1].compute_at(s[A2], xo)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- vars = tvm.convert({xo.var: tvm.const(5, "int32")})
- assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[0]].min, vars)).value == 2)
- assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[1]].min, vars)).value == 3)
- assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[0]].extent, vars)).value == 1)
- assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[1]].extent, vars)).value == 3)
+ vars = tvm.runtime.convert({xo.var: tvm.tir.const(5, "int32")})
+ assert(tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[0]].min, vars)).value == 2)
+ assert(tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[1]].min, vars)).value == 3)
+ assert(tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[0]].extent, vars)).value == 1)
+ assert(tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[1]].extent, vars)).value == 3)
def test_bound_warp():
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((m, l), name='A')
- A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
- A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
+ m = te.var('m')
+ l = te.var('l')
+ A = te.placeholder((m, l), name='A')
+ A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1')
+ A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
- s = tvm.create_schedule(A2.op)
+ s = te.create_schedule(A2.op)
s[A1].set_scope("warp")
xo, xi = s[A2].split(A2.op.axis[0], 32)
xi0, xi1 = s[A2].split(xi, factor=16)
- tx = tvm.thread_axis("threadIdx.x")
+ tx = te.thread_axis("threadIdx.x")
s[A2].bind(xi1, tx)
- s[A2].bind(xi0, tvm.thread_axis("threadIdx.y"))
+ s[A2].bind(xi0, te.thread_axis("threadIdx.y"))
y = s[A2].op.axis[1]
s[A1].compute_at(s[A2], y)
xo, xi = s[A1].split(s[A1].op.axis[0], factor=16)
s[A1].bind(xi, tx)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
assert(bounds[A1.op.axis[0]].extent.value==16)
def test_bound_scan():
- m = tvm.var("m")
- n = tvm.var("n")
- X = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x")
- s_state = tvm.placeholder((m, n))
- s_init = tvm.compute((1, n), lambda _, i: X[0, i])
- s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i])
- s_scan = tvm.scan(s_init, s_update, s_state)
+ m = te.var("m")
+ n = te.var("n")
+ X = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x")
+ s_state = te.placeholder((m, n))
+ s_init = te.compute((1, n), lambda _, i: X[0, i])
+ s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i])
+ s_scan = tvm.te.scan(s_init, s_update, s_state)
assert tuple(s_scan.shape) == (m, n)
- s = tvm.create_schedule(s_scan.op)
+ s = te.create_schedule(s_scan.op)
XX = s.cache_read(X, "local", s_update)
xo, xi = s[s_update].split(s_update.op.axis[1], factor=4)
s[XX].compute_at(s[s_update], xo)
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
assert bounds[XX.op.axis[1]].extent.value == 4
def test_bound_conv1d():
- n = tvm.var('n')
- A = tvm.compute((n+2), lambda i: 1, name='A')
+ n = te.var('n')
+ A = te.compute((n+2), lambda i: 1, name='A')
def computeB(ii):
i = ii + 1
return A[i-1] + A[i] + A[i+1]
- B = tvm.compute(n, computeB, name='B')
- s = tvm.create_schedule(B.op)
+ B = te.compute(n, computeB, name='B')
+ s = te.create_schedule(B.op)
s[A].compute_at(s[B], B.op.axis[0])
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert(bounds[A.op.axis[0]].extent.value == 3)
def test_bound_blur():
- n = tvm.convert(12)
- A = tvm.compute((n, n), lambda i, j: 1, name='A')
+ n = tvm.runtime.convert(12)
+ A = te.compute((n, n), lambda i, j: 1, name='A')
def computeB(ii, jj):
# set the correct center
i = ii + 1
j = jj + 1
return A[i][j] + A[i-1][j] + A[i+1][j] + A[i][j+1] + A[i][j-1]
- B = tvm.compute((n-2, n-2), computeB, name='B')
- s = tvm.create_schedule(B.op)
+ B = te.compute((n-2, n-2), computeB, name='B')
+ s = te.create_schedule(B.op)
s[A].compute_at(s[B], B.op.axis[1])
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert(bounds[A.op.axis[0]].extent.value == 3)
assert(bounds[A.op.axis[1]].extent.value == 3)
def test_bound_rfactor():
- n = tvm.var('n')
- A = tvm.placeholder((n,), name='A')
- k = tvm.reduce_axis((0, n))
- B = tvm.compute((1,), lambda i: tvm.sum(A[k], axis=k, where=(i>1)), name='B')
+ n = te.var('n')
+ A = te.placeholder((n,), name='A')
+ k = te.reduce_axis((0, n))
+ B = te.compute((1,), lambda i: te.sum(A[k], axis=k, where=(i>1)), name='B')
# schedule
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
kf, ki = s[B].split(k, nparts=4)
BF = s.rfactor(B, kf)
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert(bounds[BF.op.axis[0]].extent.value == 4)
assert(bounds[BF.op.axis[1]].extent.value == 1)
def test_bound_group_schedule():
- m = tvm.var("m")
- n = tvm.var("n")
- x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x")
- x1 = tvm.compute(x.shape, lambda *i: x(*i) + 1, name="x1")
- x2 = tvm.compute(x.shape, lambda *i: x1(*i) + 2, name="x2")
- s = tvm.create_schedule(x2.op)
+ m = te.var("m")
+ n = te.var("n")
+ x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x")
+ x1 = te.compute(x.shape, lambda *i: x(*i) + 1, name="x1")
+ x2 = te.compute(x.shape, lambda *i: x1(*i) + 2, name="x2")
+ s = te.create_schedule(x2.op)
g = s.create_group(outputs=x1, inputs=x, include_inputs=True)
g.compute_at(s[x2], x2.op.axis[0])
assert s[x1].group == g
assert s[x].group == g
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert bounds[x.op.axis[0]].extent.value == 1
assert bounds[x.op.axis[1]].extent == n
def test_bound_nest_group():
- m = tvm.var("m")
- n = tvm.var("n")
- x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x")
- x1 = tvm.compute(x.shape, lambda *i: x(*i) + 1, name="x1")
- x2 = tvm.compute(x.shape, lambda *i: x1(*i) + 2, name="x2")
- s = tvm.create_schedule(x2.op)
+ m = te.var("m")
+ n = te.var("n")
+ x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x")
+ x1 = te.compute(x.shape, lambda *i: x(*i) + 1, name="x1")
+ x2 = te.compute(x.shape, lambda *i: x1(*i) + 2, name="x2")
+ s = te.create_schedule(x2.op)
g1 = s.create_group(outputs=x, inputs=x, include_inputs=True)
g2 = s.create_group(outputs=x1, inputs=x, include_inputs=True)
assert s[x].group == g1
g2.compute_at(s[x2], x2.op.axis[0])
g1.compute_at(s[x1], s[x1].op.axis[1])
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert bounds[x.op.axis[0]].extent.value == 1
assert bounds[x.op.axis[1]].extent.value == 1
assert bounds[x1.op.axis[0]].extent.value == 1
def test_bound_nest_thread():
- m = tvm.var('m')
- A = tvm.placeholder((m), name='A')
- A1 = tvm.compute((m,), lambda i: A[i], name='A1')
- A2 = tvm.compute((m,), lambda i: A1[i] + 2, name='A2')
- A3 = tvm.compute((m,), lambda i: A2[i] + 3, name='A3')
+ m = te.var('m')
+ A = te.placeholder((m), name='A')
+ A1 = te.compute((m,), lambda i: A[i], name='A1')
+ A2 = te.compute((m,), lambda i: A1[i] + 2, name='A2')
+ A3 = te.compute((m,), lambda i: A2[i] + 3, name='A3')
- s = tvm.create_schedule(A3.op)
+ s = te.create_schedule(A3.op)
s[A2].set_scope("shared")
s[A1].set_scope("local")
- block_x = tvm.thread_axis("blockIdx.x")
- thread_x = tvm.thread_axis("threadIdx.x")
+ block_x = te.thread_axis("blockIdx.x")
+ thread_x = te.thread_axis("threadIdx.x")
bx, tx = s[A3].split(A3.op.axis[0], factor=32)
s[A3].bind(bx, block_x)
s[A3].bind(tx, thread_x)
s[A2].bind(xi, thread_x)
s[A1].compute_at(s[A3], tx)
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert(bounds[A1.op.axis[0]].extent.value==1)
assert(bounds[A2.op.axis[0]].extent.value==32)
assert(bounds[A3.op.axis[0]].extent == m)
def test_gemm_bound():
nn = 1024
- n = tvm.convert(nn)
- A = tvm.placeholder((n, n), name='A')
- B = tvm.placeholder((n, n), name='B')
- k = tvm.reduce_axis((0, n), name='k')
- C = tvm.compute(
+ n = tvm.runtime.convert(nn)
+ A = te.placeholder((n, n), name='A')
+ B = te.placeholder((n, n), name='B')
+ k = te.reduce_axis((0, n), name='k')
+ C = te.compute(
(n, n),
- lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k),
+ lambda ii, jj: te.sum(A[ii, k] * B[jj, k], axis=k),
name='CC')
# schedule
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
xtile, ytile = 32, 32
scale = 8
num_thread = 8
block_factor = scale * num_thread
- block_x = tvm.thread_axis("blockIdx.x")
- thread_x = tvm.thread_axis("threadIdx.x")
- block_y = tvm.thread_axis("blockIdx.y")
- thread_y = tvm.thread_axis("threadIdx.y")
+ block_x = te.thread_axis("blockIdx.x")
+ thread_x = te.thread_axis("threadIdx.x")
+ block_y = te.thread_axis("blockIdx.y")
+ thread_y = te.thread_axis("threadIdx.y")
CC = s.cache_write(C, "local")
AA = s.cache_read(A, "shared", [CC])
s[BB].bind(ty, thread_y)
s[BB].bind(tx, thread_x)
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert(bounds[BB.op.axis[0]].extent.value==64)
assert(bounds[AA.op.axis[0]].extent.value==64)
assert(bounds[CC.op.axis[0]].extent.value == 8)
def test_bound_tensor_compute_op():
def intrin_test():
- m1 = tvm.var("m1")
- n1 = tvm.var("n1")
- a = tvm.placeholder((m1, n1), name='a')
- c = tvm.compute((1, n1), lambda i, j : a[0, j] + a[1, j] + a[2, j], name='c')
+ m1 = te.var("m1")
+ n1 = te.var("n1")
+ a = te.placeholder((m1, n1), name='a')
+ c = te.compute((1, n1), lambda i, j : a[0, j] + a[1, j] + a[2, j], name='c')
- Ab = tvm.decl_buffer(a.shape, name="Abuf", offset_factor=1)
- Cb = tvm.decl_buffer(c.shape, name="Cbuf", offset_factor=1)
+ Ab = tvm.tir.decl_buffer(a.shape, name="Abuf", offset_factor=1)
+ Cb = tvm.tir.decl_buffer(c.shape, name="Cbuf", offset_factor=1)
def intrin_func(ins, outs):
aa = ins[0]
cc = outs[0]
def _body():
- ib = tvm.ir_builder.create()
- ib.emit(tvm.call_extern("int32", "test", cc.access_ptr("w"), aa.access_ptr("r")))
+ ib = tvm.tir.ir_builder.create()
+ ib.emit(tvm.tir.call_extern("int32", "test", cc.access_ptr("w"), aa.access_ptr("r")))
return ib.get()
return _body()
- with tvm.build_config(offset_factor=1):
- return tvm.decl_tensor_intrin(c.op, intrin_func, binds={a : Ab, c : Cb})
+ with tvm.target.build_config(offset_factor=1):
+ return te.decl_tensor_intrin(c.op, intrin_func, binds={a : Ab, c : Cb})
test_func = intrin_test()
- A = tvm.placeholder((20,20), name='A')
- B = tvm.compute(A.shape, lambda i,j : A[i,j], name='B')
- C = tvm.compute((10, 20), lambda i : test_func(B[i:10, 0:20]), name='C')
- s = tvm.create_schedule(C.op)
- bounds = tvm.schedule.InferBound(s)
+ A = te.placeholder((20,20), name='A')
+ B = te.compute(A.shape, lambda i,j : A[i,j], name='B')
+ C = te.compute((10, 20), lambda i : test_func(B[i:10, 0:20]), name='C')
+ s = te.create_schedule(C.op)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
assert(bounds[B.op.axis[0]].extent.value == 10)
def test_bound_simplification_failure():
# Check that the bounds are not expanded
- A = tvm.compute((2,), lambda j: j, "A")
+ A = te.compute((2,), lambda j: j, "A")
def _check(B, A=A):
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
stmt = tvm.lower(s, [B, A], simple_mode=True)
if not bounds[A.op.axis[0]].extent.value <= 2:
print(stmt)
assert bounds[A.op.axis[0]].extent.value <= 2
- tdiv = tvm.truncdiv
+ tdiv = tvm.tir.truncdiv
# These are hard to simplify, moreover we don't simplify them
- _check(tvm.compute((10,), lambda i: A[tvm.min(3*i, 4*i) + tvm.min(-3*i, -2*i)]))
- _check(tvm.compute((10,), lambda i: A[tvm.min(3*i, 4*i) + tvm.max(-3*i, -4*i)]))
- _check(tvm.compute((10,), lambda i: A[-2*tdiv(i,2) - tvm.min(i, 0-i)]))
- _check(tvm.compute((10,), lambda i: A[i + (0 - i)]))
+ _check(te.compute((10,), lambda i: A[tvm.te.min(3*i, 4*i) + tvm.te.min(-3*i, -2*i)]))
+ _check(te.compute((10,), lambda i: A[tvm.te.min(3*i, 4*i) + tvm.te.max(-3*i, -4*i)]))
+ _check(te.compute((10,), lambda i: A[-2*tdiv(i,2) - tvm.te.min(i, 0-i)]))
+ _check(te.compute((10,), lambda i: A[i + (0 - i)]))
# This would cause out of bounds, but we nevertheless include it
- _check(tvm.compute((10,), lambda i: A[i]))
+ _check(te.compute((10,), lambda i: A[i]))
if __name__ == "__main__":
test_bound_nest_thread()
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_scan():
- m = tvm.var("m")
- n = tvm.var("n")
- x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x")
- s_state = tvm.placeholder((m, n))
- s_init = tvm.compute((1, n), lambda _, i: x[0, i], name="s_init")
- x_trans = tvm.compute((m, n), lambda i, j: x[i, j] + 1, name="x_trans")
- s_up1 = tvm.compute((m, n), lambda t, i: s_state[t - 1, i] + 1, name="up1")
- s_update = tvm.compute((m, n), lambda t, i: s_up1[t, i] + x_trans[t, i], name="update")
- s_scan = tvm.scan(s_init, s_update, s_state)
+ m = te.var("m")
+ n = te.var("n")
+ x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x")
+ s_state = te.placeholder((m, n))
+ s_init = te.compute((1, n), lambda _, i: x[0, i], name="s_init")
+ x_trans = te.compute((m, n), lambda i, j: x[i, j] + 1, name="x_trans")
+ s_up1 = te.compute((m, n), lambda t, i: s_state[t - 1, i] + 1, name="up1")
+ s_update = te.compute((m, n), lambda t, i: s_up1[t, i] + x_trans[t, i], name="update")
+ s_scan = tvm.te.scan(s_init, s_update, s_state)
def test_getbody():
- body = tvm.schedule.ScanGetBody(s_scan.op)
+ body = tvm.te.schedule.ScanGetBody(s_scan.op)
assert set(body) == set([s_scan.op, s_update.op, s_up1.op])
def test_attach_path():
- s = tvm.create_schedule(s_scan.op)
+ s = te.create_schedule(s_scan.op)
s[x_trans].compute_at(s[s_update], s_update.op.axis[0])
- apath = tvm.schedule.CreateAttachPath(s)
+ apath = tvm.te.schedule.CreateAttachPath(s)
assert(tuple(apath[s_update.op]) == tuple([s_scan.op.scan_axis]))
assert(tuple(apath[x_trans.op]) == tuple([s_update.op.axis[0], s_scan.op.scan_axis]))
def test_fix_pt():
- body = tvm.schedule.ScanGetBody(s_scan.op)
- fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op, body)
+ body = tvm.te.schedule.ScanGetBody(s_scan.op)
+ fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op, body)
assert(fxpt[s_scan.spatial_axis_[0]].value != 0)
def test_scan_fix_point():
- m = tvm.var("m")
- n = tvm.var("n")
- l = tvm.var("l")
- x = tvm.compute((l, m, n), lambda *i: tvm.const(1, "float32"), name="x")
- s_state = tvm.placeholder((l, m, n))
- s_init = tvm.compute((1, m, n), lambda _, i, j: x[0, i, j], name="s_init")
+ m = te.var("m")
+ n = te.var("n")
+ l = te.var("l")
+ x = te.compute((l, m, n), lambda *i: tvm.tir.const(1, "float32"), name="x")
+ s_state = te.placeholder((l, m, n))
+ s_init = te.compute((1, m, n), lambda _, i, j: x[0, i, j], name="s_init")
def test_scan0():
- s_update = tvm.compute((l, m, n),
+ s_update = te.compute((l, m, n),
lambda t, i, j: x[t, j, i] + s_state[t-1, i, j], name="update")
- s_scan = tvm.scan(s_init, s_update, s_state)
- body = tvm.schedule.ScanGetBody(s_scan.op)
- fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op, body)
+ s_scan = tvm.te.scan(s_init, s_update, s_state)
+ body = tvm.te.schedule.ScanGetBody(s_scan.op)
+ fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op, body)
assert(fxpt[s_scan.op.spatial_axis_[0]].value == 1)
assert(fxpt[s_scan.op.spatial_axis_[1]].value == 1)
def test_scan1():
- s_update = tvm.compute((l, m, n),
+ s_update = te.compute((l, m, n),
lambda t, i, j: x[t, j, i] + s_state[t-1, j, i], name="update")
- s_scan = tvm.scan(s_init, s_update, s_state)
- body = tvm.schedule.ScanGetBody(s_scan.op)
- fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op, body)
+ s_scan = tvm.te.scan(s_init, s_update, s_state)
+ body = tvm.te.schedule.ScanGetBody(s_scan.op)
+ fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op, body)
assert(fxpt[s_scan.op.spatial_axis_[0]].value == 0)
assert(fxpt[s_scan.op.spatial_axis_[1]].value == 0)
def test_scan3_not_exact_reach():
- s_h1 = tvm.compute((l, n, m), lambda t, j, i: s_state[t-1, i, j], name="h1")
- s_h2 = tvm.compute((l, m, n), lambda t, i, j: s_state[t-1, i, 10] * 2, name="h1")
- s_update = tvm.compute((l, m, n), lambda t, i, j: s_h1[t, j, i] + s_h2[t, i, j], name="update")
- s_scan = tvm.scan(s_init, s_update, s_state)
- body = tvm.schedule.ScanGetBody(s_scan.op)
- fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op)
+ s_h1 = te.compute((l, n, m), lambda t, j, i: s_state[t-1, i, j], name="h1")
+ s_h2 = te.compute((l, m, n), lambda t, i, j: s_state[t-1, i, 10] * 2, name="h1")
+ s_update = te.compute((l, m, n), lambda t, i, j: s_h1[t, j, i] + s_h2[t, i, j], name="update")
+ s_scan = tvm.te.scan(s_init, s_update, s_state)
+ body = tvm.te.schedule.ScanGetBody(s_scan.op)
+ fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op)
assert(fxpt[s_scan.op.spatial_axis_[0]].value == 1)
assert(fxpt[s_scan.op.spatial_axis_[1]].value == 0)
def test_scan4_reach_other():
- s_h1 = tvm.compute((l, n, m), lambda t, j, i: s_state[t-1, j, j], name="h1")
- s_h2 = tvm.compute((l, m, n), lambda t, i, j: s_state[t-1, i, j] * 2, name="h1")
- s_update = tvm.compute((l, m, n),
+ s_h1 = te.compute((l, n, m), lambda t, j, i: s_state[t-1, j, j], name="h1")
+ s_h2 = te.compute((l, m, n), lambda t, i, j: s_state[t-1, i, j] * 2, name="h1")
+ s_update = te.compute((l, m, n),
lambda t, i, j: s_h1[t, j, i] + s_h2[t, i, j], name="update")
- s_scan = tvm.scan(s_init, s_update, s_state)
- fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op)
+ s_scan = tvm.te.scan(s_init, s_update, s_state)
+ fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op)
assert(fxpt[s_scan.op.spatial_axis_[0]].value == 0)
assert(fxpt[s_scan.op.spatial_axis_[1]].value == 0)
def test_scan5_multi_output():
- m = tvm.var("m")
- n = tvm.var("n")
- x1 = tvm.placeholder((m, n))
- s1 = tvm.placeholder((m, n))
- x2 = tvm.placeholder((m, n))
- s2 = tvm.placeholder((m, n))
- s1_init = tvm.compute((1, n), lambda _, i: x1[0, i])
- s2_init = tvm.compute((1, n), lambda _, i: x2[0, i])
- s1_update = tvm.compute((m, n), lambda t, i: s1[t-1, i] + x1[t, i])
- s2_update = tvm.compute((m, n), lambda t, i: x2[t, i] + s2[t-1,i])
- r0, r1 = tvm.scan([s1_init, s2_init],
+ m = te.var("m")
+ n = te.var("n")
+ x1 = te.placeholder((m, n))
+ s1 = te.placeholder((m, n))
+ x2 = te.placeholder((m, n))
+ s2 = te.placeholder((m, n))
+ s1_init = te.compute((1, n), lambda _, i: x1[0, i])
+ s2_init = te.compute((1, n), lambda _, i: x2[0, i])
+ s1_update = te.compute((m, n), lambda t, i: s1[t-1, i] + x1[t, i])
+ s2_update = te.compute((m, n), lambda t, i: x2[t, i] + s2[t-1,i])
+ r0, r1 = tvm.te.scan([s1_init, s2_init],
[s1_update, s2_update],
[s1, s2])
- body = tvm.schedule.ScanGetBody(r0.op)
- fxpt = tvm.schedule.ScanFixPointAnalysis(r0.op)
+ body = tvm.te.schedule.ScanGetBody(r0.op)
+ fxpt = tvm.te.schedule.ScanFixPointAnalysis(r0.op)
assert(fxpt[r1.op.spatial_axis_[0]].value == 1)
test_scan0()
test_scan5_multi_output()
def test_create_read_graph():
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((m, l), name='A')
- A1 = tvm.compute((m, l), lambda i, j: A[i, j])
- A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3)
+ m = te.var('m')
+ l = te.var('l')
+ A = te.placeholder((m, l), name='A')
+ A1 = te.compute((m, l), lambda i, j: A[i, j])
+ A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3)
- g = tvm.schedule.CreateReadGraph([A2.op])
+ g = tvm.te.schedule.CreateReadGraph([A2.op])
assert g[A2.op][0] == A1
assert g[A1.op][0] == A
- post_order = tvm.schedule.PostDFSOrder([A2.op], g)
+ post_order = tvm.te.schedule.PostDFSOrder([A2.op], g)
assert(post_order[0] == A.op)
assert(post_order[1] == A1.op)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def test_lstm_cell_inline():
num_step = 128
num_hidden = 1152
batch_size = 4
# Global transition matrix
- X = tvm.placeholder((num_step - 1, batch_size, num_input), name="X")
- Wi2h = tvm.placeholder((4, num_hidden, num_input), name="Wi2h")
- Wh2h = tvm.placeholder((4, num_hidden, num_hidden), name="Wh2h")
+ X = te.placeholder((num_step - 1, batch_size, num_input), name="X")
+ Wi2h = te.placeholder((4, num_hidden, num_input), name="Wi2h")
+ Wh2h = te.placeholder((4, num_hidden, num_hidden), name="Wh2h")
# h: output hidden state, c: cell state.
- s_state_h = tvm.placeholder((num_step, batch_size, num_hidden))
- s_state_c = tvm.placeholder((num_step, batch_size, num_hidden))
- s_init_c = tvm.compute((1, batch_size, num_hidden),
+ s_state_h = te.placeholder((num_step, batch_size, num_hidden))
+ s_state_c = te.placeholder((num_step, batch_size, num_hidden))
+ s_init_c = te.compute((1, batch_size, num_hidden),
lambda *i: 0.0, name="init_c")
- s_init_h = tvm.compute((1, batch_size, num_hidden),
+ s_init_h = te.compute((1, batch_size, num_hidden),
lambda *i: 0.0, name="init_h")
# LSTM transition
- k = tvm.reduce_axis((0, num_input), name="ki2h")
- s_i2h = tvm.compute(
+ k = te.reduce_axis((0, num_input), name="ki2h")
+ s_i2h = te.compute(
(num_step, 4, batch_size, num_hidden),
- lambda t, x, i, j: tvm.sum(X[t - 1, i, k] * Wi2h[x, j, k], axis=k),
+ lambda t, x, i, j: te.sum(X[t - 1, i, k] * Wi2h[x, j, k], axis=k),
name="s_i2h")
- k = tvm.reduce_axis((0, num_hidden), name="ki2h")
- s_h2h = tvm.compute(
+ k = te.reduce_axis((0, num_hidden), name="ki2h")
+ s_h2h = te.compute(
(num_step, 4, batch_size, num_hidden),
- lambda t, x, i, j: tvm.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k),
+ lambda t, x, i, j: te.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k),
name="s_h2h")
# Gate rules
- gates = tvm.compute(s_i2h.shape, lambda *i:
+ gates = te.compute(s_i2h.shape, lambda *i:
s_i2h(*i) + s_h2h(*i), name="gates")
gshape = (num_step, batch_size, num_hidden)
- in_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 0, i, j]), name="in_gate")
- in_transform = tvm.compute(gshape, lambda t, i, j: tvm.tanh(gates[t, 1, i, j]), name="in_transform")
- forget_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 2, i, j]), name="forget_gate")
- out_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 3, i, j]), name="out_gate")
- next_c = tvm.compute(gshape,
+ in_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, 0, i, j]), name="in_gate")
+ in_transform = te.compute(gshape, lambda t, i, j: te.tanh(gates[t, 1, i, j]), name="in_transform")
+ forget_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, 2, i, j]), name="forget_gate")
+ out_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, 3, i, j]), name="out_gate")
+ next_c = te.compute(gshape,
lambda t, i, j:
forget_gate[t, i, j] * s_state_c[t - 1, i, j] +
in_gate[t, i, j] * in_transform[t, i, j], name="next_c")
- next_h = tvm.compute(gshape,
- lambda t, i, j: out_gate[t, i, j] * tvm.tanh(next_c[t, i, j]), name="next_h")
- update_c = tvm.compute(gshape, lambda *i: next_c(*i), name="update_c")
- update_h = tvm.compute(gshape, lambda *i: next_h(*i), name="update_h")
+ next_h = te.compute(gshape,
+ lambda t, i, j: out_gate[t, i, j] * te.tanh(next_c[t, i, j]), name="next_h")
+ update_c = te.compute(gshape, lambda *i: next_c(*i), name="update_c")
+ update_h = te.compute(gshape, lambda *i: next_h(*i), name="update_h")
# schedule
- scan_h, scan_c = tvm.scan(
+ scan_h, scan_c = tvm.te.scan(
[s_init_h, s_init_c],
[update_h, update_c],
[s_state_h, s_state_c],
inputs=[X],
name="lstm_scan")
# schedule
- s = tvm.create_schedule(scan_h.op)
+ s = te.create_schedule(scan_h.op)
# Inline gate computations
s[gates].compute_inline()
s[in_gate].compute_inline()
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
def test_schedule0():
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((m, l), name='A')
- A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
- s = tvm.create_schedule(A1.op)
+ m = te.var('m')
+ l = te.var('l')
+ A = te.placeholder((m, l), name='A')
+ A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1')
+ s = te.create_schedule(A1.op)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def test_schedule1():
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((m, l), name='A')
- A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
+ m = te.var('m')
+ l = te.var('l')
+ A = te.placeholder((m, l), name='A')
+ A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1')
- s = tvm.create_schedule(A1.op)
+ s = te.create_schedule(A1.op)
xo, xi = s[A1].split(A1.op.axis[0], 8)
s[A1].pragma(xo, "auto_unroll_max_step", 10)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def test_schedule2():
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((m, l), name='A')
- A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
- A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
+ m = te.var('m')
+ l = te.var('l')
+ A = te.placeholder((m, l), name='A')
+ A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1')
+ A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
- s = tvm.create_schedule(A2.op)
+ s = te.create_schedule(A2.op)
xo, xi = s[A2].split(A2.op.axis[0], 8)
s[A1].compute_at(s[A2], xo)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def test_schedule_scan():
- m = tvm.var("m")
- n = tvm.var("n")
- x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x")
- s_state = tvm.placeholder((m, n))
- s_init = tvm.compute((1, n), lambda _, i: x[0, i])
- s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + x[t, i])
- res = tvm.scan(s_init, s_update, s_state)
+ m = te.var("m")
+ n = te.var("n")
+ x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x")
+ s_state = te.placeholder((m, n))
+ s_init = te.compute((1, n), lambda _, i: x[0, i])
+ s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + x[t, i])
+ res = tvm.te.scan(s_init, s_update, s_state)
assert tuple(res.shape) == (m, n)
- s = tvm.create_schedule(res.op)
+ s = te.create_schedule(res.op)
s = s.normalize()
ir = tvm.lower(s, [s_state], simple_mode=True)
assert not hasattr(ir.body.body.body.body[1].body.body[1].body, "condition")
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert(bounds[res.op.scan_axis].min.value == 1)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def test_inline_multi_reduce():
val = tvm.tir.Select((x[1] >= y[1]), x[1], y[1])
return idx, val
def argmax_init(idx_typ, val_typ):
- return tvm.const(-1, idx_typ), tvm.min_value(val_typ)
-
- argmax = tvm.comm_reducer(argmax_comp, argmax_init, name='argmax')
- m = tvm.var('m')
- n = tvm.var('n')
- val = tvm.placeholder((m, n), name='val', dtype='float32')
- val1 = tvm.compute((m, n), lambda i, j: val[i, j]+1, name='val1')
- val2 = tvm.compute((m, n), lambda i, j: tvm.exp(val1[i, j]), name='val2')
- k = tvm.reduce_axis((0, n), 'k')
- T_idx, T_val = tvm.compute((m, ), lambda i: argmax((k.var, val2[i, k]), axis=k), name='T')
- s = tvm.create_schedule(T_idx.op)
+ return tvm.tir.const(-1, idx_typ), tvm.te.min_value(val_typ)
+
+ argmax = te.comm_reducer(argmax_comp, argmax_init, name='argmax')
+ m = te.var('m')
+ n = te.var('n')
+ val = te.placeholder((m, n), name='val', dtype='float32')
+ val1 = te.compute((m, n), lambda i, j: val[i, j]+1, name='val1')
+ val2 = te.compute((m, n), lambda i, j: te.exp(val1[i, j]), name='val2')
+ k = te.reduce_axis((0, n), 'k')
+ T_idx, T_val = te.compute((m, ), lambda i: argmax((k.var, val2[i, k]), axis=k), name='T')
+ s = te.create_schedule(T_idx.op)
s[val1].compute_inline()
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def test_auto_inline():
- m = tvm.var('m')
- n = tvm.var('n')
- A = tvm.placeholder((m, n), name='A')
- B = tvm.placeholder((m, n), name='B')
- C = tvm.placeholder((m, n), name='C')
- T1 = tvm.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='T1')
- T2 = tvm.compute((m, n), lambda i, j: T1(i, j) + C(i, j), name='T2')
-
- s = tvm.create_schedule(T2.op)
- tvm.schedule.AutoInlineElemWise(s)
+ m = te.var('m')
+ n = te.var('n')
+ A = te.placeholder((m, n), name='A')
+ B = te.placeholder((m, n), name='B')
+ C = te.placeholder((m, n), name='C')
+ T1 = te.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='T1')
+ T2 = te.compute((m, n), lambda i, j: T1(i, j) + C(i, j), name='T2')
+
+ s = te.create_schedule(T2.op)
+ tvm.te.schedule.AutoInlineElemWise(s)
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def test_schedule_const_bound():
n = 128
- A = tvm.placeholder((n,), name='A')
- A1 = tvm.compute((n,), lambda i: A[i] + 1, name='A1')
- s = tvm.create_schedule(A1.op)
+ A = te.placeholder((n,), name='A')
+ A1 = te.compute((n,), lambda i: A[i] + 1, name='A1')
+ s = te.create_schedule(A1.op)
xo, xi = s[A1].split(A1.op.axis[0], 8)
- bounds = tvm.schedule.InferBound(s)
+ bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def test_inline_mixed():
- n = tvm.var('n')
- A = tvm.placeholder((n, ), name='A')
- A1 = tvm.compute(A.shape, lambda *i: A(*i) + 1, name='A1')
- A2 = tvm.compute(A.shape, lambda *i: A1(*i) + 2, name='A2')
- C = tvm.compute((n,), lambda i: A2[i] + A1[i], name='C')
+ n = te.var('n')
+ A = te.placeholder((n, ), name='A')
+ A1 = te.compute(A.shape, lambda *i: A(*i) + 1, name='A1')
+ A2 = te.compute(A.shape, lambda *i: A1(*i) + 2, name='A2')
+ C = te.compute((n,), lambda i: A2[i] + A1[i], name='C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], factor=8)
s[A1].compute_at(s[C], xo)
s[A2].compute_inline()
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def check(x):
if isinstance(x, tvm.tir.Call):
assert x.func != A2
- tvm.ir_pass.PostOrderVisit(s[C].op.body[0], check)
+ tvm.tir.ir_pass.PostOrderVisit(s[C].op.body[0], check)
def test_scan_inline1():
- m = tvm.var("m")
- n = tvm.var("n")
- x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x")
- s_state1 = tvm.placeholder((m, n))
- s_state2 = tvm.placeholder((m, n))
- s_init1 = tvm.compute((1, n), lambda _, i: x[0, i])
- s_init2 = tvm.compute((1, n), lambda _, i: x[0, i])
- s_x1 = tvm.compute((m, n), lambda t, i: s_state1[t-1, i] + x[t, i], name="x1")
- s_x2 = tvm.compute((m, n), lambda t, i: s_state2[t-1, i] + 1 , name="x2")
- s_update1 = tvm.compute((m, n), lambda t, i: s_x1[t, i], "u1")
- s_update2 = tvm.compute((m, n), lambda t, i: s_x2[t, i], "u2")
- res1, res2 = tvm.scan([s_init1, s_init2],
+ m = te.var("m")
+ n = te.var("n")
+ x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x")
+ s_state1 = te.placeholder((m, n))
+ s_state2 = te.placeholder((m, n))
+ s_init1 = te.compute((1, n), lambda _, i: x[0, i])
+ s_init2 = te.compute((1, n), lambda _, i: x[0, i])
+ s_x1 = te.compute((m, n), lambda t, i: s_state1[t-1, i] + x[t, i], name="x1")
+ s_x2 = te.compute((m, n), lambda t, i: s_state2[t-1, i] + 1 , name="x2")
+ s_update1 = te.compute((m, n), lambda t, i: s_x1[t, i], "u1")
+ s_update2 = te.compute((m, n), lambda t, i: s_x2[t, i], "u2")
+ res1, res2 = tvm.te.scan([s_init1, s_init2],
[s_update1, s_update2],
[s_state1, s_state2])
- s = tvm.create_schedule(res1.op)
+ s = te.create_schedule(res1.op)
s[s_x1].compute_inline()
stmt = tvm.lower(s, [x, res1, res2])
def test_scan_inline2():
- m = tvm.var("m")
- n = tvm.var("n")
- x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x")
- s_state1 = tvm.placeholder((m, n))
- s_state2 = tvm.placeholder((m, n))
- s_init1 = tvm.compute((1, n), lambda _, i: x[0, i])
- s_init2 = tvm.compute((1, n), lambda _, i: x[0, i])
- s_xx = tvm.compute((m, n), lambda t, i: s_state1[t-1, i] + x[t, i], name="xx")
- s_x1 = tvm.compute((m, n), lambda t, i: s_xx[t, i] + 1, name="x1")
- s_x2 = tvm.compute((m, n), lambda t, i: s_xx[t, i] + s_state2[t-1, 2], name="x2")
- s_update1 = tvm.compute((m, n), lambda t, i: s_x1[t, i], "u1")
- s_update2 = tvm.compute((m, n), lambda t, i: s_x2[t, i], "u2")
- res1, res2 = tvm.scan([s_init1, s_init2],
+ m = te.var("m")
+ n = te.var("n")
+ x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x")
+ s_state1 = te.placeholder((m, n))
+ s_state2 = te.placeholder((m, n))
+ s_init1 = te.compute((1, n), lambda _, i: x[0, i])
+ s_init2 = te.compute((1, n), lambda _, i: x[0, i])
+ s_xx = te.compute((m, n), lambda t, i: s_state1[t-1, i] + x[t, i], name="xx")
+ s_x1 = te.compute((m, n), lambda t, i: s_xx[t, i] + 1, name="x1")
+ s_x2 = te.compute((m, n), lambda t, i: s_xx[t, i] + s_state2[t-1, 2], name="x2")
+ s_update1 = te.compute((m, n), lambda t, i: s_x1[t, i], "u1")
+ s_update2 = te.compute((m, n), lambda t, i: s_x2[t, i], "u2")
+ res1, res2 = tvm.te.scan([s_init1, s_init2],
[s_update1, s_update2],
[s_state1, s_state2])
- s = tvm.create_schedule(res1.op)
+ s = te.create_schedule(res1.op)
s[s_xx].compute_inline()
s[s_x1].compute_inline()
s[s_x2].compute_inline()
def test_schedule_cache():
- m = tvm.var('m')
- n = tvm.var('n')
- A = tvm.placeholder((m, n), name='A')
- B = tvm.placeholder((m, n), name='B')
- C = tvm.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C')
+ m = te.var('m')
+ n = te.var('n')
+ A = te.placeholder((m, n), name='A')
+ B = te.placeholder((m, n), name='B')
+ C = te.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
AA = s.cache_read(A, "shared", readers=[C])
CC = s.cache_write(C, "shared")
s[AA].compute_at(s[CC], CC.op.axis[0])
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def test_schedule_middle_cache():
- m = tvm.var('m')
- n = tvm.var('n')
- A = tvm.placeholder((m, n), name='A')
- B = tvm.placeholder((m, n), name='B')
+ m = te.var('m')
+ n = te.var('n')
+ A = te.placeholder((m, n), name='A')
+ B = te.placeholder((m, n), name='B')
- C = tvm.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C')
- D = tvm.compute((m, n), lambda i, j: C(i , j) , name='D')
+ C = te.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C')
+ D = te.compute((m, n), lambda i, j: C(i , j) , name='D')
- s = tvm.create_schedule(D.op)
+ s = te.create_schedule(D.op)
AA = s.cache_read(A, "local", readers=[C])
BB = s.cache_read(B, "local", readers=[C])
CC = s.cache_read(C, "local", readers=[D])
DD = s.cache_write(D, "local")
#s[AA].compute_at(s[CC], CC.op.axis[0])
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def test_schedule_cache_relayout1():
- m = tvm.var('m')
- n = tvm.var('n')
- A = tvm.placeholder((m, n), name='A')
- B = tvm.placeholder((m, n), name='B')
- C = tvm.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C')
+ m = te.var('m')
+ n = te.var('n')
+ A = te.placeholder((m, n), name='A')
+ B = te.placeholder((m, n), name='B')
+ C = te.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
s[C].reorder(C.op.axis[1], C.op.axis[0])
CC = s.cache_write(C, "global")
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def test_schedule_cache_relayout2():
- m = tvm.var('m')
- n = tvm.var('n')
- A = tvm.placeholder((m*4, n), name='A')
- B = tvm.placeholder((m*4, n), name='B')
- C = tvm.compute(A.shape, lambda i, j: A(i, j) * B(i, j), name='C')
- s = tvm.create_schedule(C.op)
+ m = te.var('m')
+ n = te.var('n')
+ A = te.placeholder((m*4, n), name='A')
+ B = te.placeholder((m*4, n), name='B')
+ C = te.compute(A.shape, lambda i, j: A(i, j) * B(i, j), name='C')
+ s = te.create_schedule(C.op)
x, y = C.op.axis
xo, xi = s[C].split(x, factor=4)
s[C].reorder(xo, y, xi)
CC = s.cache_write(C, "global")
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def test_schedule_cache_relayout3():
- m = tvm.var('m')
- n = tvm.var('n')
- A = tvm.placeholder((m*4, n), name='A')
- B = tvm.placeholder((m*4, n), name='B')
- k = tvm.reduce_axis((0, n), "k")
- C = tvm.compute((A.shape[0],),
- lambda i: tvm.sum(A(i, k) * B(i, k), axis=k), name='C')
- s = tvm.create_schedule(C.op)
+ m = te.var('m')
+ n = te.var('n')
+ A = te.placeholder((m*4, n), name='A')
+ B = te.placeholder((m*4, n), name='B')
+ k = te.reduce_axis((0, n), "k")
+ C = te.compute((A.shape[0],),
+ lambda i: te.sum(A(i, k) * B(i, k), axis=k), name='C')
+ s = te.create_schedule(C.op)
x = C.op.axis[0]
xo, xi = s[C].split(x, factor=4)
CC = s.cache_write(C, "global")
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def test_schedule_cache_relayout4():
def _compute(*indice):
return A(*indice) + 1, B(*indice) / 2
- m = tvm.var('m')
- n = tvm.var('n')
- A = tvm.placeholder((m*4, n), name='A')
- B = tvm.placeholder((m*4, n), name='B')
- C1, C2 = tvm.compute(A.shape, _compute, name='C')
- s = tvm.create_schedule([C1.op, C2.op])
+ m = te.var('m')
+ n = te.var('n')
+ A = te.placeholder((m*4, n), name='A')
+ B = te.placeholder((m*4, n), name='B')
+ C1, C2 = te.compute(A.shape, _compute, name='C')
+ s = te.create_schedule([C1.op, C2.op])
C1_cache, C2_cache = s.cache_write([C1, C2], "local")
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def intrin_gemv(m, n):
- w = tvm.placeholder((m, n), name='w')
- x = tvm.placeholder((n,), name='x')
- k = tvm.reduce_axis((0, n), name='k')
- z = tvm.compute((m,), lambda i:
- tvm.sum(w[i, k] * x[k], axis=k), name='z')
- Wb = tvm.decl_buffer(w.shape, w.dtype,
+ w = te.placeholder((m, n), name='w')
+ x = te.placeholder((n,), name='x')
+ k = te.reduce_axis((0, n), name='k')
+ z = te.compute((m,), lambda i:
+ te.sum(w[i, k] * x[k], axis=k), name='z')
+ Wb = tvm.tir.decl_buffer(w.shape, w.dtype,
name="W",
offset_factor=16,
- strides=[tvm.var('ldw'), 1])
+ strides=[te.var('ldw'), 1])
def intrin_func(ins, outs):
ww, xx = ins
zz = outs[0]
ww_ptr = ww.access_ptr("r")
xx_ptr = xx.access_ptr("r")
zz_ptr = zz.access_ptr("w")
- body = tvm.call_packed(
+ body = tvm.tir.call_packed(
"gemm", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
- reset = tvm.call_packed(
+ reset = tvm.tir.call_packed(
"fill_zero", zz_ptr, n)
- update = tvm.call_packed(
+ update = tvm.tir.call_packed(
"gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
return body, reset, update
- with tvm.build_config(data_alignment=16,
+ with tvm.target.build_config(data_alignment=16,
offset_factor=16):
- return tvm.decl_tensor_intrin(z.op, intrin_func,
+ return te.decl_tensor_intrin(z.op, intrin_func,
binds={w: Wb})
# basic: split, reorder, tile
M, N, L = 2048, 1024, 512
factor, rfactor = 16, 16
- A = tvm.placeholder((N//factor, L//rfactor, factor, rfactor), name='A')
- B = tvm.placeholder((M, L//rfactor, rfactor), name='B')
- k = tvm.reduce_axis((0, L//rfactor), name='k')
+ A = te.placeholder((N//factor, L//rfactor, factor, rfactor), name='A')
+ B = te.placeholder((M, L//rfactor, rfactor), name='B')
+ k = te.reduce_axis((0, L//rfactor), name='k')
gemv = intrin_gemv(factor, rfactor)
- C = tvm.compute((N, M//factor, factor),
+ C = te.compute((N, M//factor, factor),
lambda i, j: gemv(A[i, k, 0:factor, 0:factor], B[j, k, 0:rfactor], reduce_axis=k),
name='C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
ai, aj, ax = s[C].op.axis
aio, aii = s[C].split(ai, 16)
s[C].reorder(aio, aj, aii)
aioo, ajo, aioi, aji = s[C].tile(aio, aj, 16, 4)
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def intrin_vadd(n, cache_read=False, cache_write=False):
scope_ubuf = 'local'
dtype = 'float32'
- x = tvm.placeholder((n,), dtype=dtype, name='vx')
- y = tvm.placeholder((n,), dtype=dtype, name='vy')
- z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z')
- s = tvm.create_schedule(z.op)
+ x = te.placeholder((n,), dtype=dtype, name='vx')
+ y = te.placeholder((n,), dtype=dtype, name='vy')
+ z = te.compute(x.shape, lambda i: x[i] + y[i], name='z')
+ s = te.create_schedule(z.op)
def create_buffer(t):
- return tvm.decl_buffer(t.shape, t.dtype,
+ return tvm.tir.decl_buffer(t.shape, t.dtype,
name='W'+t.name,
scope=scope_ubuf,
offset_factor=16)
binds[z] = create_buffer(z)
def intrin_func(ins, outs):
- ib = tvm.ir_builder.create()
- ib.emit(tvm.call_extern(outs[0].dtype, 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr')))
+ ib = tvm.tir.ir_builder.create()
+ ib.emit(tvm.tir.call_extern(outs[0].dtype, 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr')))
return ib.get()
- with tvm.build_config(offset_factor=16):
- return tvm.decl_tensor_intrin(z.op, intrin_func, binds=binds)
+ with tvm.target.build_config(offset_factor=16):
+ return te.decl_tensor_intrin(z.op, intrin_func, binds=binds)
def test_schedule_tensor_compute2():
dtype = 'float32'
scope_ubuf = 'local'
- A = tvm.placeholder((M//factor, factor), name="A", dtype=dtype)
- B = tvm.placeholder((M//factor, factor), name="B", dtype=dtype)
+ A = te.placeholder((M//factor, factor), name="A", dtype=dtype)
+ B = te.placeholder((M//factor, factor), name="B", dtype=dtype)
vadd = intrin_vadd(factor, True, True)
- C = tvm.compute((M//factor, factor),
+ C = te.compute((M//factor, factor),
lambda i: vadd(A[i, 0:factor], B[i, 0:factor]), name='C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
AL = s.cache_read(A, scope_ubuf, C)
BL = s.cache_read(B, scope_ubuf, C)
CL = s.cache_write(C, scope_ubuf)
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def test_schedule_tensor_compute3():
M = 1024
factor = 16
dtype = 'float32'
- A = tvm.placeholder((M//factor, factor), name="A", dtype=dtype)
- B = tvm.placeholder((M//factor, factor), name="B", dtype=dtype)
- Bi = tvm.compute((M//factor, factor), lambda i, j: B[i, j] + 5, name="Bi")
+ A = te.placeholder((M//factor, factor), name="A", dtype=dtype)
+ B = te.placeholder((M//factor, factor), name="B", dtype=dtype)
+ Bi = te.compute((M//factor, factor), lambda i, j: B[i, j] + 5, name="Bi")
vadd = intrin_vadd(factor)
- C = tvm.compute((M//factor, factor),
+ C = te.compute((M//factor, factor),
lambda i: vadd(A[i, 0:factor], Bi[i, 0:factor]), name='C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
s[Bi].compute_at(s[C], C.op.axis[0])
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def test_loop_dep_reduce():
- X = tvm.placeholder(shape=(10,), name="x")
+ X = te.placeholder(shape=(10,), name="x")
def f(n):
- rv = tvm.reduce_axis((0, n))
- return tvm.sum(X[rv], axis=rv)
- Y = tvm.compute(X.shape, f, name="y")
- s = tvm.create_schedule([Y.op])
+ rv = te.reduce_axis((0, n))
+ return te.sum(X[rv], axis=rv)
+ Y = te.compute(X.shape, f, name="y")
+ s = te.create_schedule([Y.op])
f = tvm.build(s, [X, Y])
def test_loop_dep_reduce_cache_write():
- X = tvm.placeholder(shape=(10,), name="x")
+ X = te.placeholder(shape=(10,), name="x")
def f(n):
- rv = tvm.reduce_axis((0, n))
- init = lambda dtype: tvm.tir.Select(n > 1, tvm.const(0, dtype), n.astype(dtype))
- sum = tvm.comm_reducer(lambda x, y: tvm.max(x + y, n.astype('float32')), init, name='sum')
+ rv = te.reduce_axis((0, n))
+ init = lambda dtype: tvm.tir.Select(n > 1, tvm.tir.const(0, dtype), n.astype(dtype))
+ sum = te.comm_reducer(lambda x, y: tvm.te.max(x + y, n.astype('float32')), init, name='sum')
return sum(X[rv], axis=rv)
- Y = tvm.compute(X.shape, f, name="y")
- s = tvm.create_schedule([Y.op])
+ Y = te.compute(X.shape, f, name="y")
+ s = te.create_schedule([Y.op])
s.cache_write(Y, 'local')
f = tvm.build(s, [X, Y])
def test_reduction_and_dummy_fuse_split():
n = 10
- X = tvm.placeholder(shape=(n,), dtype='int32', name="X")
- k = tvm.reduce_axis((0, n))
- Y = tvm.compute((), lambda: tvm.sum(X[k], k), name="Y")
- s = tvm.create_schedule([Y.op])
+ X = te.placeholder(shape=(n,), dtype='int32', name="X")
+ k = te.reduce_axis((0, n))
+ Y = te.compute((), lambda: te.sum(X[k], k), name="Y")
+ s = te.create_schedule([Y.op])
ax = s[Y.op].fuse(*Y.op.axis)
axo, axi = s[Y.op].split(ax, nparts=20)
f = tvm.build(s, [Y, X])
assert args[0].asnumpy() == n
n = 10
- X = tvm.placeholder(shape=(n,), dtype='int32', name="X")
- k = tvm.reduce_axis((0, n))
- Y = tvm.compute((n,), lambda i: tvm.sum(X[k], k), name="Y")
- s = tvm.create_schedule([Y.op])
+ X = te.placeholder(shape=(n,), dtype='int32', name="X")
+ k = te.reduce_axis((0, n))
+ Y = te.compute((n,), lambda i: te.sum(X[k], k), name="Y")
+ s = te.create_schedule([Y.op])
ax = s[Y.op].fuse(*(list(Y.op.axis) + list(Y.op.reduce_axis)))
f = tvm.build(s, [Y, X])
def test_schedule_compute_inline():
shape = [10, 1024]
- A = tvm.placeholder(shape, name="A")
- B = tvm.placeholder(shape, name="B")
- C = tvm.compute(shape, lambda *index:A(*index)+ B(*index), name = "C")
+ A = te.placeholder(shape, name="A")
+ B = te.placeholder(shape, name="B")
+ C = te.compute(shape, lambda *index:A(*index)+ B(*index), name = "C")
def _compute(*index) :
return C(*index) , C(*index) * B(*index)
- F,E = tvm.compute(shape, _compute, name = "F")
+ F,E = te.compute(shape, _compute, name = "F")
- s = tvm.create_schedule([F.op, E.op])
+ s = te.create_schedule([F.op, E.op])
AL = s.cache_read(A, "local", [C])
BL = s.cache_read(B, "local", [C,E])
CL = s.cache_write(C, "local")
s[C].compute_inline()
s = s.normalize()
- bounds = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, bounds)
+ bounds = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, bounds)
if __name__ == "__main__":
test_loop_dep_reduce()
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
from topi.testing import conv2d_nhwc_python
from tvm.contrib import nvcc
row, col = n, l
elif scope == "wmma.matrix_b":
row, col = l, m
- A = tvm.placeholder((row, col), name='A', dtype='float16')
- BA = tvm.decl_buffer(A.shape, A.dtype, scope='shared', data_alignment=32, offset_factor=row * col)
- C = tvm.compute((row, col), lambda i, j: A[i, j], name='C')
- BC = tvm.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=row * col)
+ A = te.placeholder((row, col), name='A', dtype='float16')
+ BA = tvm.tir.decl_buffer(A.shape, A.dtype, scope='shared', data_alignment=32, offset_factor=row * col)
+ C = te.compute((row, col), lambda i, j: A[i, j], name='C')
+ BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=row * col)
def intrin_func(ins, outs):
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
BA = ins[0]
BC = outs[0]
- ib.emit(tvm.call_intrin('handle', 'tvm_load_matrix_sync',
+ ib.emit(tvm.tir.call_intrin('handle', 'tvm_load_matrix_sync',
BC.data, n, m, l, BC.elem_offset // (row * col),
BA.access_ptr('r'), col, 'row_major'))
return ib.get()
- return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
+ return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
def intrin_wmma_gemm(shape):
n, m, l = shape
- A = tvm.placeholder((n, l), name='A', dtype='float16')
- B = tvm.placeholder((l, m), name='B', dtype='float16')
- k = tvm.reduce_axis((0, l), name="k")
- C = tvm.compute((n, m),
+ A = te.placeholder((n, l), name='A', dtype='float16')
+ B = te.placeholder((l, m), name='B', dtype='float16')
+ k = te.reduce_axis((0, l), name="k")
+ C = te.compute((n, m),
lambda ii, jj:
- tvm.sum(A[ii, k].astype('float') * B[k, jj].astype('float'), axis=k),
+ te.sum(A[ii, k].astype('float') * B[k, jj].astype('float'), axis=k),
name='C')
- BA = tvm.decl_buffer(A.shape, A.dtype, name='BA', scope='wmma.matrix_a', data_alignment=32, offset_factor=n * l)
- BB = tvm.decl_buffer(B.shape, B.dtype, name='BB', scope='wmma.matrix_b', data_alignment=32, offset_factor=l * m)
- BC = tvm.decl_buffer(C.shape, C.dtype, name='BC', scope='wmma.accumulator', data_alignment=32, offset_factor=n * m)
+ BA = tvm.tir.decl_buffer(A.shape, A.dtype, name='BA', scope='wmma.matrix_a', data_alignment=32, offset_factor=n * l)
+ BB = tvm.tir.decl_buffer(B.shape, B.dtype, name='BB', scope='wmma.matrix_b', data_alignment=32, offset_factor=l * m)
+ BC = tvm.tir.decl_buffer(C.shape, C.dtype, name='BC', scope='wmma.accumulator', data_alignment=32, offset_factor=n * m)
def intrin_func(ins, outs):
BA, BB = ins
BC, = outs
def init():
- ib = tvm.ir_builder.create()
- ib.emit(tvm.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, m, l, BC.elem_offset // (n * m), 0.0))
+ ib = tvm.tir.ir_builder.create()
+ ib.emit(tvm.tir.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, m, l, BC.elem_offset // (n * m), 0.0))
return ib.get()
def update():
- ib = tvm.ir_builder.create()
- ib.emit(tvm.call_intrin('handle', 'tvm_mma_sync',
+ ib = tvm.tir.ir_builder.create()
+ ib.emit(tvm.tir.call_intrin('handle', 'tvm_mma_sync',
BC.data, BC.elem_offset // (n * m),
BA.data, BA.elem_offset // (n * l),
BB.data, BB.elem_offset // (l * m),
return update(), init(), update()
- return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, B: BB, C: BC})
+ return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, B: BB, C: BC})
def intrin_wmma_store_matrix(shape):
n, m, l = shape
- A = tvm.placeholder((n, m), name='A', dtype='float32')
- BA = tvm.decl_buffer(A.shape, A.dtype, scope='wmma.accumulator', data_alignment=32, offset_factor=n * m)
- C = tvm.compute((n, m), lambda i, j: A[i, j], name='C')
- BC = tvm.decl_buffer(C.shape, C.dtype, scope='global', data_alignment=32, offset_factor=n * m)
+ A = te.placeholder((n, m), name='A', dtype='float32')
+ BA = tvm.tir.decl_buffer(A.shape, A.dtype, scope='wmma.accumulator', data_alignment=32, offset_factor=n * m)
+ C = te.compute((n, m), lambda i, j: A[i, j], name='C')
+ BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope='global', data_alignment=32, offset_factor=n * m)
def intrin_func(ins, outs):
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
BA = ins[0]
BC = outs[0]
- ib.emit(tvm.call_intrin('handle', 'tvm_store_matrix_sync',
+ ib.emit(tvm.tir.call_intrin('handle', 'tvm_store_matrix_sync',
BA.data, n, m, l, BA.elem_offset // (n * m),
BC.access_ptr('w'), m, 'row_major'))
return ib.get()
- return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
+ return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
def test_tensor_core_batch_matmal():
assert (m % 8 == 0)
assert (l % 16 == 0)
nn, mm, ll = n // 32, m // 8, l // 16
- A = tvm.placeholder((batch_size, nn, ll, 32, 16), name='A', dtype='float16')
- B = tvm.placeholder((batch_size, ll, mm, 16, 8), name='B', dtype='float16')
- k1 = tvm.reduce_axis((0, ll), name='k1')
- k2 = tvm.reduce_axis((0, 16), name='k2')
- C = tvm.compute((batch_size, nn, mm, 32, 8),
+ A = te.placeholder((batch_size, nn, ll, 32, 16), name='A', dtype='float16')
+ B = te.placeholder((batch_size, ll, mm, 16, 8), name='B', dtype='float16')
+ k1 = te.reduce_axis((0, ll), name='k1')
+ k2 = te.reduce_axis((0, 16), name='k2')
+ C = te.compute((batch_size, nn, mm, 32, 8),
lambda b, i, j, ii, jj:
- tvm.sum(A[b, i, k1, ii, k2].astype('float') * B[b, k1, j, k2, jj].astype('float'), axis=[k1, k2]),
+ te.sum(A[b, i, k1, ii, k2].astype('float') * B[b, k1, j, k2, jj].astype('float'), axis=[k1, k2]),
name='Fragment_C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
warp_size = 32
kernel_size = 16
warp_col_tiles = 2
chunk = 4
- block_x = tvm.thread_axis('blockIdx.x')
- block_y = tvm.thread_axis('blockIdx.y')
- block_z = tvm.thread_axis('blockIdx.z')
- thread_x = tvm.thread_axis('threadIdx.x')
- thread_y = tvm.thread_axis('threadIdx.y')
- thread_z = tvm.thread_axis('threadIdx.z')
+ block_x = te.thread_axis('blockIdx.x')
+ block_y = te.thread_axis('blockIdx.y')
+ block_z = te.thread_axis('blockIdx.z')
+ thread_x = te.thread_axis('threadIdx.x')
+ thread_y = te.thread_axis('threadIdx.y')
+ thread_z = te.thread_axis('threadIdx.z')
AS = s.cache_read(A, 'shared', [C])
BS = s.cache_read(B, 'shared', [C])
assert (in_channels % block_size == 0)
assert (out_channels % block_size == 0)
- kh = tvm.reduce_axis((0, kernel_h), name='kh')
- kw = tvm.reduce_axis((0, kernel_w), name='kw')
- ic = tvm.reduce_axis((0, in_channels // block_size), name='ic')
- ii = tvm.reduce_axis((0, block_size), name='ii')
+ kh = te.reduce_axis((0, kernel_h), name='kh')
+ kw = te.reduce_axis((0, kernel_w), name='kw')
+ ic = te.reduce_axis((0, in_channels // block_size), name='ic')
+ ii = te.reduce_axis((0, block_size), name='ii')
# Algorithm
- A = tvm.placeholder(data_shape, name='A', dtype="float16")
- W = tvm.placeholder(kernel_shape, name='W', dtype="float16")
- Apad = tvm.compute(
+ A = te.placeholder(data_shape, name='A', dtype="float16")
+ W = te.placeholder(kernel_shape, name='W', dtype="float16")
+ Apad = te.compute(
(batch_size // block_size, height + 2 * pad_h, width + 2 * pad_w, in_channels // block_size, block_size,
block_size),
- lambda n, h, w, i, nn, ii: tvm.if_then_else(
- tvm.all(h >= pad_h, h - pad_h < height,
+ lambda n, h, w, i, nn, ii: tvm.tir.if_then_else(
+ tvm.tir.all(h >= pad_h, h - pad_h < height,
w >= pad_w, w - pad_w < width),
- A[n, h - pad_h, w - pad_w, i, nn, ii], tvm.const(0., "float16")),
+ A[n, h - pad_h, w - pad_w, i, nn, ii], tvm.tir.const(0., "float16")),
name='Apad')
- Conv = tvm.compute(output_shape,
- lambda n, h, w, o, nn, oo: tvm.sum(
+ Conv = te.compute(output_shape,
+ lambda n, h, w, o, nn, oo: te.sum(
Apad[n, h * stride_h + kh, w * stride_w + kw, ic, nn, ii].astype("float32") *
W[kh, kw, ic, o, ii, oo].astype("float32"),
axis=[ic, kh, kw, ii]),
name="Conv")
- s = tvm.create_schedule(Conv.op)
+ s = te.create_schedule(Conv.op)
s[Apad].compute_inline()
AS = s.cache_read(Apad, 'shared', [Conv])
WF = s.cache_read(WS, 'wmma.matrix_b', [Conv])
ConvF = s.cache_write(Conv, 'wmma.accumulator')
- block_x = tvm.thread_axis('blockIdx.x')
- block_y = tvm.thread_axis('blockIdx.y')
- block_z = tvm.thread_axis('blockIdx.z')
- thread_x = tvm.thread_axis('threadIdx.x')
- thread_y = tvm.thread_axis('threadIdx.y')
- thread_z = tvm.thread_axis('threadIdx.z')
+ block_x = te.thread_axis('blockIdx.x')
+ block_y = te.thread_axis('blockIdx.y')
+ block_z = te.thread_axis('blockIdx.z')
+ thread_x = te.thread_axis('threadIdx.x')
+ thread_y = te.thread_axis('threadIdx.y')
+ thread_z = te.thread_axis('threadIdx.z')
nc, hc, wc, oc, nnc, ooc = Conv.op.axis
block_k = s[Conv].fuse(hc, wc)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
def intrin_vadd(n):
- x = tvm.placeholder((n,), name='vx')
- y = tvm.placeholder((n,), name='vy')
- z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z')
+ x = te.placeholder((n,), name='vx')
+ y = te.placeholder((n,), name='vy')
+ z = te.compute(x.shape, lambda i: x[i] + y[i], name='z')
def intrin_func(ins, outs):
xx, yy = ins
zz = outs[0]
- return tvm.call_packed("vadd", xx, yy, zz)
- with tvm.build_config(offset_factor=16):
- return tvm.decl_tensor_intrin(z.op, intrin_func)
+ return tvm.tir.call_packed("vadd", xx, yy, zz)
+ with tvm.target.build_config(offset_factor=16):
+ return te.decl_tensor_intrin(z.op, intrin_func)
def intrin_gemv(m, n):
- w = tvm.placeholder((m, n), name='w')
- x = tvm.placeholder((n,), name='x')
- k = tvm.reduce_axis((0, n), name='k')
- z = tvm.compute((m,), lambda i:
- tvm.sum(w[i, k] * x[k], axis=k), name='z')
- Wb = tvm.decl_buffer(w.shape, w.dtype,
+ w = te.placeholder((m, n), name='w')
+ x = te.placeholder((n,), name='x')
+ k = te.reduce_axis((0, n), name='k')
+ z = te.compute((m,), lambda i:
+ te.sum(w[i, k] * x[k], axis=k), name='z')
+ Wb = tvm.tir.decl_buffer(w.shape, w.dtype,
name="W",
offset_factor=16,
- strides=[tvm.var('ldw'), 1])
+ strides=[te.var('ldw'), 1])
def intrin_func(ins, outs):
ww, xx = ins
zz = outs[0]
ww_ptr = ww.access_ptr("r")
xx_ptr = xx.access_ptr("r")
zz_ptr = zz.access_ptr("w")
- body = tvm.call_packed(
+ body = tvm.tir.call_packed(
"gemv", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
- reset = tvm.call_packed(
+ reset = tvm.tir.call_packed(
"fill_zero", zz_ptr, n)
- update = tvm.call_packed(
+ update = tvm.tir.call_packed(
"gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
return body, reset, update
- with tvm.build_config(data_alignment=16,
+ with tvm.target.build_config(data_alignment=16,
offset_factor=16):
- return tvm.decl_tensor_intrin(z.op, intrin_func,
+ return te.decl_tensor_intrin(z.op, intrin_func,
binds={w: Wb})
def intrin_gemv_no_reset(m, n):
- w = tvm.placeholder((m, n), name='w')
- x = tvm.placeholder((n,), name='x')
- k = tvm.reduce_axis((0, n), name='k')
- z = tvm.compute((m,), lambda i:
- tvm.sum(w[i, k] * x[k], axis=k), name='z')
- Wb = tvm.decl_buffer(w.shape, w.dtype,
+ w = te.placeholder((m, n), name='w')
+ x = te.placeholder((n,), name='x')
+ k = te.reduce_axis((0, n), name='k')
+ z = te.compute((m,), lambda i:
+ te.sum(w[i, k] * x[k], axis=k), name='z')
+ Wb = tvm.tir.decl_buffer(w.shape, w.dtype,
name="W",
offset_factor=16,
- strides=[tvm.var('ldw'), 1])
+ strides=[te.var('ldw'), 1])
def intrin_func(ins, outs):
ww, xx = ins
zz = outs[0]
ww_ptr = ww.access_ptr("r")
xx_ptr = xx.access_ptr("r")
zz_ptr = zz.access_ptr("w")
- body = tvm.call_packed(
+ body = tvm.tir.call_packed(
"gemv", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
- update = tvm.call_packed(
+ update = tvm.tir.call_packed(
"gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
return body, None, update
- with tvm.build_config(data_alignment=16,
+ with tvm.target.build_config(data_alignment=16,
offset_factor=16):
- return tvm.decl_tensor_intrin(z.op, intrin_func,
+ return te.decl_tensor_intrin(z.op, intrin_func,
binds={w: Wb})
def test_tensorize_vadd():
m = 128
- x = tvm.placeholder((m,), name='x')
- y = tvm.placeholder((m,), name='y')
- z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z')
+ x = te.placeholder((m,), name='x')
+ y = te.placeholder((m,), name='y')
+ z = te.compute(x.shape, lambda i: x[i] + y[i], name='z')
def check(factor):
- s = tvm.create_schedule(z.op)
+ s = te.create_schedule(z.op)
xo, xi = s[z].split(z.op.axis[0], factor=factor)
vadd = intrin_vadd(factor)
s[z].tensorize(xi, vadd)
s = s.normalize()
- dom_map = tvm.schedule.InferBound(s)
+ dom_map = tvm.te.schedule.InferBound(s)
finfer = tvm.get_global_func("test.op.InferTensorizeRegion")
out_dom, in_dom = finfer(s[z], dom_map)
- assert tvm.ir_pass.Equal(out_dom[z.op.axis[0]].extent, factor)
- assert tvm.ir_pass.Equal(out_dom[z.op.axis[0]].min, xo * factor)
- assert tvm.ir_pass.Equal(in_dom.items()[0][1][0].extent, factor)
+ assert tvm.tir.ir_pass.Equal(out_dom[z.op.axis[0]].extent, factor)
+ assert tvm.tir.ir_pass.Equal(out_dom[z.op.axis[0]].min, xo * factor)
+ assert tvm.tir.ir_pass.Equal(in_dom.items()[0][1][0].extent, factor)
fmatch = tvm.get_global_func("test.op.MatchTensorizeBody")
body = fmatch(s[z], out_dom, in_dom, vadd)
- assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]),
- tvm.ir_pass.CanonicalSimplify(vadd.op.body[0]))
- stmt = tvm.schedule.ScheduleOps(s, dom_map)
+ assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]),
+ tvm.tir.ir_pass.CanonicalSimplify(vadd.op.body[0]))
+ stmt = tvm.te.schedule.ScheduleOps(s, dom_map)
tvm.lower(s, [x, y, z])
check(16)
n = 1024
m = n
l = n
- A = tvm.placeholder((n, l), name='A')
- B = tvm.placeholder((m, l), name='B')
- k = tvm.reduce_axis((0, l), name='k')
- C = tvm.compute((n, m), lambda i, j:
- tvm.sum(B[j, k] * A[i, k], axis=k), name='C')
+ A = te.placeholder((n, l), name='A')
+ B = te.placeholder((m, l), name='B')
+ k = te.reduce_axis((0, l), name='k')
+ C = te.compute((n, m), lambda i, j:
+ te.sum(B[j, k] * A[i, k], axis=k), name='C')
def check(factor):
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
x, y = C.op.axis
yo, yi = s[C].split(y, factor=factor)
gemv = intrin_gemv(factor, l)
s[C].tensorize(yi, gemv)
s = s.normalize()
- dom_map = tvm.schedule.InferBound(s)
+ dom_map = tvm.te.schedule.InferBound(s)
finfer = tvm.get_global_func("test.op.InferTensorizeRegion")
out_dom, in_dom = finfer(s[C], dom_map)
- assert tvm.ir_pass.Equal(out_dom[x].extent, 1)
- assert tvm.ir_pass.Equal(out_dom[y].extent, factor)
- assert tvm.ir_pass.Equal(out_dom[y].min, yo * factor)
+ assert tvm.tir.ir_pass.Equal(out_dom[x].extent, 1)
+ assert tvm.tir.ir_pass.Equal(out_dom[y].extent, factor)
+ assert tvm.tir.ir_pass.Equal(out_dom[y].min, yo * factor)
fmatch = tvm.get_global_func("test.op.MatchTensorizeBody")
body = fmatch(s[C], out_dom, in_dom, gemv)
- assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]),
- tvm.ir_pass.CanonicalSimplify(gemv.op.body[0]))
- stmt = tvm.schedule.ScheduleOps(s, dom_map)
+ assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]),
+ tvm.tir.ir_pass.CanonicalSimplify(gemv.op.body[0]))
+ stmt = tvm.te.schedule.ScheduleOps(s, dom_map)
tvm.lower(s, [A, B, C])
def check_rfactor(factor, rfactor):
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
x, y = C.op.axis
rk = C.op.reduce_axis[0]
yo, yi = s[C].split(y, factor=factor)
gemv = intrin_gemv(factor, rfactor)
s[C].tensorize(yi, gemv)
s = s.normalize()
- dom_map = tvm.schedule.InferBound(s)
+ dom_map = tvm.te.schedule.InferBound(s)
finfer = tvm.get_global_func("test.op.InferTensorizeRegion")
out_dom, in_dom = finfer(s[C], dom_map)
- assert tvm.ir_pass.Equal(out_dom[x].extent, 1)
- assert tvm.ir_pass.Equal(out_dom[y].extent, factor)
- assert tvm.ir_pass.Equal(out_dom[y].min, yo * factor)
+ assert tvm.tir.ir_pass.Equal(out_dom[x].extent, 1)
+ assert tvm.tir.ir_pass.Equal(out_dom[y].extent, factor)
+ assert tvm.tir.ir_pass.Equal(out_dom[y].min, yo * factor)
fmatch = tvm.get_global_func("test.op.MatchTensorizeBody")
body = fmatch(s[C], out_dom, in_dom, gemv)
- assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]),
- tvm.ir_pass.CanonicalSimplify(gemv.op.body[0]))
- stmt = tvm.schedule.ScheduleOps(s, dom_map)
+ assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]),
+ tvm.tir.ir_pass.CanonicalSimplify(gemv.op.body[0]))
+ stmt = tvm.te.schedule.ScheduleOps(s, dom_map)
tvm.lower(s, [A, B, C])
def check_rfactor_no_reset(factor, rfactor):
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
x, y = C.op.axis
rk = C.op.reduce_axis[0]
yo, yi = s[C].split(y, factor=factor)
gemv = intrin_gemv_no_reset(factor, rfactor)
s[C].tensorize(yi, gemv)
s = s.normalize()
- dom_map = tvm.schedule.InferBound(s)
+ dom_map = tvm.te.schedule.InferBound(s)
finfer = tvm.get_global_func("test.op.InferTensorizeRegion")
out_dom, in_dom = finfer(s[C], dom_map)
- assert tvm.ir_pass.Equal(out_dom[x].extent, 1)
- assert tvm.ir_pass.Equal(out_dom[y].extent, factor)
- assert tvm.ir_pass.Equal(out_dom[y].min, yo * factor)
+ assert tvm.tir.ir_pass.Equal(out_dom[x].extent, 1)
+ assert tvm.tir.ir_pass.Equal(out_dom[y].extent, factor)
+ assert tvm.tir.ir_pass.Equal(out_dom[y].min, yo * factor)
fmatch = tvm.get_global_func("test.op.MatchTensorizeBody")
body = fmatch(s[C], out_dom, in_dom, gemv)
- assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]),
- tvm.ir_pass.CanonicalSimplify(gemv.op.body[0]))
- stmt = tvm.schedule.ScheduleOps(s, dom_map)
+ assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]),
+ tvm.tir.ir_pass.CanonicalSimplify(gemv.op.body[0]))
+ stmt = tvm.te.schedule.ScheduleOps(s, dom_map)
tvm.lower(s, [A, B, C])
def check_rfactor_no_reset_multi_reduction(factor, rfactor):
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
x, y = C.op.axis
rk = C.op.reduce_axis[0]
yo, yi = s[C].split(y, factor=factor)
gemv = intrin_gemv_no_reset(factor, rfactor)
s[C].tensorize(yi, gemv)
s = s.normalize()
- dom_map = tvm.schedule.InferBound(s)
+ dom_map = tvm.te.schedule.InferBound(s)
finfer = tvm.get_global_func("test.op.InferTensorizeRegion")
out_dom, in_dom = finfer(s[C], dom_map)
- assert tvm.ir_pass.Equal(out_dom[x].extent, 1)
- assert tvm.ir_pass.Equal(out_dom[y].extent, factor)
- assert tvm.ir_pass.Equal(out_dom[y].min, yo * factor)
+ assert tvm.tir.ir_pass.Equal(out_dom[x].extent, 1)
+ assert tvm.tir.ir_pass.Equal(out_dom[y].extent, factor)
+ assert tvm.tir.ir_pass.Equal(out_dom[y].min, yo * factor)
fmatch = tvm.get_global_func("test.op.MatchTensorizeBody")
body = fmatch(s[C], out_dom, in_dom, gemv)
- assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]),
- tvm.ir_pass.CanonicalSimplify(gemv.op.body[0]))
- stmt = tvm.schedule.ScheduleOps(s, dom_map)
+ assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]),
+ tvm.tir.ir_pass.CanonicalSimplify(gemv.op.body[0]))
+ stmt = tvm.te.schedule.ScheduleOps(s, dom_map)
tvm.lower(s, [A, B, C])
check(16)
# This tests whether algorithm and intrinsics expressions are simplified
# as much as possible first and then checked for equality. See Issue #696
def test_tensorize_op():
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
def op_intrin():
bh = 9
bw = 9
- x = tvm.placeholder((5, 5), name='A')
- y = tvm.compute((bh, bw),
+ x = te.placeholder((5, 5), name='A')
+ y = te.compute((bh, bw),
lambda i, j: x[idxd(j,3) + idxm(i,3), idxm(j,3)+ idxd(i,3)])
def intrin_func(ins, outs):
xx, = ins
zz = outs[0]
- return tvm.call_packed("op", xx, zz)
+ return tvm.tir.call_packed("op", xx, zz)
- with tvm.build_config(offset_factor=2):
- return tvm.decl_tensor_intrin(y.op, intrin_func)
+ with tvm.target.build_config(offset_factor=2):
+ return te.decl_tensor_intrin(y.op, intrin_func)
- A = tvm.placeholder((5, 5), name='A')
- B = tvm.compute((9,9), lambda i, j: A[idxd(j,3) + idxm(i,3), idxm(j,3) + idxd(i,3)])
+ A = te.placeholder((5, 5), name='A')
+ B = te.compute((9,9), lambda i, j: A[idxd(j,3) + idxm(i,3), idxm(j,3) + idxd(i,3)])
bt = op_intrin()
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
x,y = B.op.axis
s[B].tensorize(x, bt)
# an intrinsic called "multivadd" whose definition (pattern)
# is a loop of another intrinsic called "vadd"
def intrin_multivadd(n):
- n_a = tvm.var("n_a")
- Ab = tvm.decl_buffer((n, ), tvm.float32, strides=[n_a])
+ n_a = te.var("n_a")
+ Ab = tvm.tir.decl_buffer((n, ), "float32", strides=[n_a])
- n_b = tvm.var("n_b")
- Bb = tvm.decl_buffer((n, ), tvm.float32, strides=[n_b])
+ n_b = te.var("n_b")
+ Bb = tvm.tir.decl_buffer((n, ), "float32", strides=[n_b])
- n_c = tvm.var("n_c")
- Cb = tvm.decl_buffer((n, ), tvm.float32, strides=[n_c])
+ n_c = te.var("n_c")
+ Cb = tvm.tir.decl_buffer((n, ), "float32", strides=[n_c])
- z = tvm.compute((n,), lambda i: tvm.call_extern("float32", 'vadd',
+ z = te.compute((n,), lambda i: tvm.tir.call_extern("float32", 'vadd',
Ab.access_ptr("w", offset=n_a*i),
Bb.access_ptr("r", offset=n_b*i),
Cb.access_ptr("r", offset=n_c*i)))
# replace the pattern with the multivadd call. I need to figure out
# how to pass it the right parameters.
def intrin_func(ins, outs):
- return tvm.call_packed("multivadd")
+ return tvm.tir.call_packed("multivadd")
- with tvm.build_config():
- return tvm.decl_tensor_intrin(z.op, intrin_func, name="multivadd")
+ with tvm.target.build_config():
+ return te.decl_tensor_intrin(z.op, intrin_func, name="multivadd")
def intrin_vadd(n):
dtype = 'float32'
- x = tvm.placeholder((n,), dtype=dtype, name='vx')
- y = tvm.placeholder((n,), dtype=dtype, name='vy')
- z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z')
- s = tvm.create_schedule(z.op)
+ x = te.placeholder((n,), dtype=dtype, name='vx')
+ y = te.placeholder((n,), dtype=dtype, name='vy')
+ z = te.compute(x.shape, lambda i: x[i] + y[i], name='z')
+ s = te.create_schedule(z.op)
def create_buffer(t):
- return tvm.decl_buffer(t.shape, t.dtype,
+ return tvm.tir.decl_buffer(t.shape, t.dtype,
name='W'+t.name,
offset_factor=16)
def intrin_func(ins, outs):
- ib = tvm.ir_builder.create()
- ib.emit(tvm.call_extern("float32", 'vadd',
+ ib = tvm.tir.ir_builder.create()
+ ib.emit(tvm.tir.call_extern("float32", 'vadd',
ins[0].access_ptr("r"), ins[1].access_ptr('r'),
outs[0].access_ptr('wr')))
return ib.get()
- with tvm.build_config(offset_factor=16):
- return tvm.decl_tensor_intrin(z.op, intrin_func, binds={x: create_buffer(x),
+ with tvm.target.build_config(offset_factor=16):
+ return te.decl_tensor_intrin(z.op, intrin_func, binds={x: create_buffer(x),
y: create_buffer(y),
z: create_buffer(z)})
factor = 16
dtype = 'float32'
- A = tvm.placeholder((M//factor, factor), name="A", dtype=dtype)
- B = tvm.placeholder((M//factor, factor), name="B", dtype=dtype)
+ A = te.placeholder((M//factor, factor), name="A", dtype=dtype)
+ B = te.placeholder((M//factor, factor), name="B", dtype=dtype)
vadd = intrin_vadd(factor)
- C = tvm.compute((M//factor, factor),
+ C = te.compute((M//factor, factor),
lambda i: vadd(A[i, 0:factor], B[i, 0:factor]), name='C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
multivadd = intrin_multivadd(64)
s[C].tensorize(C.op.axis[0], multivadd)
s = s.normalize()
- dom_map = tvm.schedule.InferBound(s)
- stmt = tvm.schedule.ScheduleOps(s, dom_map)
+ dom_map = tvm.te.schedule.InferBound(s)
+ stmt = tvm.te.schedule.ScheduleOps(s, dom_map)
# The loop that we tried to tensorize still exists in the code
# That means tensorize didn't work as expected
assert isinstance(stmt.body.body.body, tvm.tir.For)
# under the License.
import numpy as np
import tvm
+from tvm import te
from tvm.testing import check_numerical_grads
def test_check_numerical_grads():
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import topi
from tvm.contrib import util, clang
import numpy as np
def test_nearbyint():
- m = tvm.var("m",)
- A = tvm.placeholder((m,), name='A')
- A_rounded = tvm.compute((m,), lambda *i: tvm.nearbyint(A(*i)), name='A')
- s = tvm.create_schedule(A_rounded.op)
+ m = te.var("m",)
+ A = te.placeholder((m,), name='A')
+ A_rounded = te.compute((m,), lambda *i: tvm.tir.nearbyint(A(*i)), name='A')
+ s = te.create_schedule(A_rounded.op)
f = tvm.build(s, [A, A_rounded], "llvm")
ctx = tvm.cpu(0)
n = 10
# under the License.
# Prepare test library for js.
import tvm
+from tvm import te
from tvm.contrib import emscripten
import os
target = "llvm -target=asmjs-unknown-emscripten -system-lib"
if not tvm.runtime.enabled(target):
raise RuntimeError("Target %s is not enbaled" % target)
- n = tvm.var("n")
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
- s = tvm.create_schedule(B.op)
+ n = te.var("n")
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ s = te.create_schedule(B.op)
fadd1 = tvm.build(s, [A, B], target, name="add_one")
obj_path = os.path.join(base_path, "test_add_one.bc")
fadd1.save(obj_path)
"""
import tvm
+from tvm import te
import os
from tvm import rpc
from tvm.contrib import util, emscripten
if not tvm.runtime.enabled("rpc"):
return
# graph
- n = tvm.convert(1024)
- A = tvm.placeholder((n,), name='A')
- B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
- s = tvm.create_schedule(B.op)
+ n = tvm.runtime.convert(1024)
+ A = te.placeholder((n,), name='A')
+ B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
+ s = te.create_schedule(B.op)
remote = rpc.connect(proxy_host, proxy_port, key="js")
target = "llvm -target=asmjs-unknown-emscripten -system-lib"
def check_remote():
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
def test_local_gemm():
return
nn = 1024
- n = tvm.var('n')
- n = tvm.convert(nn)
+ n = te.var('n')
+ n = tvm.runtime.convert(nn)
m = n
l = n
- A = tvm.placeholder((n, l), name='A', dtype='int32')
- B = tvm.placeholder((m, l), name='B', dtype='int32')
- k = tvm.reduce_axis((0, l), name='k')
- C = tvm.compute((n, m), lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k),
+ A = te.placeholder((n, l), name='A', dtype='int32')
+ B = te.placeholder((m, l), name='B', dtype='int32')
+ k = te.reduce_axis((0, l), name='k')
+ C = te.compute((n, m), lambda ii, jj: te.sum(A[ii, k] * B[jj, k], axis=k),
name='CC')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
s[C].opengl()
print(tvm.lower(s, [A, B, C], simple_mode=True))
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
def test_local_multi_stage():
if not tvm.runtime.enabled("llvm"):
return
- n = tvm.var("n")
- A = tvm.placeholder((n,), name='A', dtype="int32")
- B = tvm.compute((n,), lambda i: A[i] + 1, name="B")
- C = tvm.compute((n,), lambda i: B[i] * 2, name="C")
+ n = te.var("n")
+ A = te.placeholder((n,), name='A', dtype="int32")
+ B = te.compute((n,), lambda i: A[i] + 1, name="B")
+ C = te.compute((n,), lambda i: B[i] * 2, name="C")
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
s[B].opengl()
s[C].opengl()
# under the License.
import numpy as np
import tvm
+from tvm import te
from tvm import rpc
from tvm.contrib import util, emscripten
if not tvm.runtime.enabled("llvm"):
return
- n = tvm.var("n")
- A = tvm.placeholder((n,), name='A', dtype='int32')
- B = tvm.placeholder((n,), name='B', dtype='int32')
- C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
- s = tvm.create_schedule(C.op)
+ n = te.var("n")
+ A = te.placeholder((n,), name='A', dtype='int32')
+ B = te.placeholder((n,), name='B', dtype='int32')
+ C = te.compute(A.shape, lambda i: A[i] + B[i], name="C")
+ s = te.create_schedule(C.op)
s[C].opengl()
f = tvm.build(s, [A, B, C], "opengl", target_host="llvm", name="myadd")
import os
import numpy as np
import tvm
+from tvm import te
import topi
from tvm.contrib.pickle_memoize import memoize
from topi.util import get_const_tuple
def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding):
in_height = in_width = in_size
- A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
- W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W')
+ A = te.placeholder((batch, in_channel, in_height, in_width), name='A')
+ W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W')
B = topi.nn.conv2d_nchw(A, W, stride, padding)
C = topi.nn.relu(B)
w = tvm.nd.array(w_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
- with tvm.build_config(auto_unroll_max_step=1400,
+ with tvm.target.build_config(auto_unroll_max_step=1400,
unroll_explicit=(device != "cuda")):
func1 = tvm.build(s1, [A, W, B], device, name="conv2d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding))
func2 = tvm.build(s2, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding))
"""
import numpy as np
import tvm
+from tvm import te
import topi
from topi.util import get_const_tuple
from tvm.contrib.pickle_memoize import memoize
def verify_dense(batch, in_dim, out_dim, use_bias=True):
- A = tvm.placeholder((batch, in_dim), name='A')
- B = tvm.placeholder((out_dim, in_dim), name='B')
- C = tvm.placeholder((out_dim,), name='C')
+ A = te.placeholder((batch, in_dim), name='A')
+ B = te.placeholder((out_dim, in_dim), name='B')
+ C = te.placeholder((out_dim,), name='C')
D = topi.nn.dense(A, B, C if use_bias else None)
D = topi.nn.relu(D)
dtype = A.dtype
"""
import numpy as np
import tvm
+from tvm import te
import topi
import math
from topi.util import get_const_tuple
kw = kh
sw = sh
ph, pw = padding
- A = tvm.placeholder((n, ic, ih, iw), name='A')
+ A = te.placeholder((n, ic, ih, iw), name='A')
B = topi.nn.pool(A, kernel=[kh, kw], stride=[sh, sw], padding=padding,
pool_type=pool_type, ceil_mode=ceil_mode)
B = topi.nn.relu(B)
def verify_global_pool(n, c, h, w, pool_type):
- A = tvm.placeholder((n, c, h, w), name='A')
+ A = te.placeholder((n, c, h, w), name='A')
B = topi.nn.global_pool(A, pool_type=pool_type)
B = topi.nn.relu(B)
import os
import numpy as np
import tvm
+from tvm import te
import topi
import logging
from topi.util import get_const_tuple
def verify_softmax(m, n):
- A = tvm.placeholder((m, n), name='A')
+ A = te.placeholder((m, n), name='A')
B = topi.nn.softmax(A)
# confirm lower works
- s = tvm.create_schedule([B.op])
+ s = te.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
def verify_log_softmax(m, n):
- A = tvm.placeholder((m, n), name='A')
+ A = te.placeholder((m, n), name='A')
B = topi.nn.log_softmax(A)
# confirm lower works
- s = tvm.create_schedule([B.op])
+ s = te.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = topi.testing.log_softmax_python(a_np)
import numpy as np
import tvm
+from tvm import te
from tvm import rpc
from tvm.contrib import util, emscripten
return
# Build the module.
- n = tvm.var("n")
- A = tvm.placeholder((n,), name='A')
- B = tvm.placeholder((n,), name='B')
- C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
- s = tvm.create_schedule(C.op)
+ n = te.var("n")
+ A = te.placeholder((n,), name='A')
+ B = te.placeholder((n,), name='B')
+ C = te.compute(A.shape, lambda i: A[i] + B[i], name="C")
+ s = te.create_schedule(C.op)
s[C].opengl()
target_host = "llvm -target=asmjs-unknown-emscripten -system-lib"
f = tvm.build(s, [A, B, C], "opengl", target_host=target_host, name="myadd")
import os, shutil, SimpleHTTPServer, SocketServer
import tvm
+from tvm import te
from tvm.contrib import emscripten, util
import numpy as np
os.chdir(os.path.join(curr_path, "../../lib"))
# Create OpenGL module.
- n = tvm.var("n")
- A = tvm.placeholder((n,), name='A', dtype="float")
- B = tvm.compute((n,), lambda *i: A[i], name="B")
+ n = te.var("n")
+ A = te.placeholder((n,), name='A', dtype="float")
+ B = te.compute((n,), lambda *i: A[i], name="B")
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
s[B].opengl()
target_host = "llvm -target=asmjs-unknown-emscripten -system-lib"
Parameters
----------
- condition : tvm.Tensor
+ condition : tvm.te.Tensor
1-D tensor with boolean values.
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
Indices of non-zero elements.
"""
a = output_tensor(output_shape, "int32")
Parameters
----------
- condition : tvm.Tensor
+ condition : tvm.te.Tensor
2-D tensor with boolean values.
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
Indices of non-zero elements.
"""
a = output_tensor(output_shape, "int32")
Parameters
----------
- condition : tvm.Tensor
+ condition : tvm.te.Tensor
3-D tensor with boolean values.
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
Indices of non-zero elements.
"""
a = output_tensor(output_shape, "int32")
Parameters
----------
- condition : tvm.Tensor
+ condition : tvm.te.Tensor
4-D tensor with boolean values.
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
Indices of non-zero elements.
"""
a = output_tensor(output_shape, "int32")
Parameters
----------
- condition : tvm.Tensor
+ condition : tvm.te.Tensor
5-D tensor with boolean values.
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
Indices of non-zero elements.
"""
a = output_tensor(output_shape, "int32")
Parameters
----------
- condition : tvm.Tensor
+ condition : tvm.te.Tensor
Tensor with boolean values.
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
Indices of non-zero elements.
"""
if len(condition.shape) == 1:
"""Bitserial conv2d schedule on arm cpu"""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from tvm import autotvm
from tvm import relay
from .. import tag
kernel_q = kernel
KH, KW, KB, CI, CO = kernel_q.shape
kvshape = (CO//VC, KH, KW, KB, VC, CI)
- return tvm.compute(kvshape, lambda co, dh, dw, b, vc, ci: \
- kernel_q[dh][dw][b][ci][co*VC+vc], name='kernel_vec')
+ return te.compute(kvshape, lambda co, dh, dw, b, vc, ci: \
+ kernel_q[dh][dw][b][ci][co*VC+vc], name='kernel_vec')
@autotvm.register_topi_compute("bitserial_conv2d_nhwc.arm_cpu")
def bitserial_conv2d_nhwc(cfg, data, kernel, stride, padding, activation_bits, weight_bits,
OW = (PAD_W - KW) // WSTR + 1
oshape = (1, OH, OW, CO)
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
# Pad input channels of weights and data when it is not a multiple of 8
if CI_packed % 8 != 0:
data_q = bitpack(data, activation_bits, pack_axis=3, bit_axis=3, pack_type='uint8')
kernel_vec = _kernel_vec_spatial_pack_nhwc(kernel, weight_bits, VC, len(kernel.shape) == 4)
- idxm = tvm.indexmod
+ idxm = tvm.tir.indexmod
if idxm(kernel_vec.shape[-1], 8) != 0 and CI_PAD != 0:
kernel_vec = pad(kernel_vec, [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, CI_PAD])
else:
data_pad = data_q
- data_vec = tvm.compute(dvshape, lambda n, h, w, vh, vw, b, ci: \
- data_pad[n][h*VH*HSTR+vh][w*VW*WSTR+vw][b][ci], name='data_vec')
- ci = tvm.reduce_axis((0, CI), name='ci')
- dh = tvm.reduce_axis((0, KH), name='dh')
- dw = tvm.reduce_axis((0, KW), name='dw')
- ib = tvm.reduce_axis((0, IB), name='ib')
- kb = tvm.reduce_axis((0, KB), name='kb')
+ data_vec = te.compute(dvshape, lambda n, h, w, vh, vw, b, ci: \
+ data_pad[n][h*VH*HSTR+vh][w*VW*WSTR+vw][b][ci], name='data_vec')
+ ci = te.reduce_axis((0, CI), name='ci')
+ dh = te.reduce_axis((0, KH), name='dh')
+ dw = te.reduce_axis((0, KW), name='dw')
+ ib = te.reduce_axis((0, IB), name='ib')
+ kb = te.reduce_axis((0, KB), name='kb')
def _bipolar_conv(n, h, w, co, vh, vw, vc):
- return tvm.sum((tvm.popcount(
+ return te.sum((tvm.tir.popcount(
kernel_vec[co, dh, dw, kb, vc, ci].astype('uint16') &
data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci].astype('uint16'))
- << (kb + ib).astype('uint16')), axis=[dh, dw, kb, ib, ci])
+ << (kb + ib).astype('uint16')), axis=[dh, dw, kb, ib, ci])
def _unipolar_conv(n, h, w, co, vh, vw, vc):
- return tvm.sum(
- ((tvm.popcount(kernel_vec[co, dh, dw, kb, vc, ci].astype('int16') &
- data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci].astype('int16')) -
- tvm.popcount(~kernel_vec[co, dh, dw, kb, vc, ci].astype('int16') &
- data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci]).astype('int16'))
+ return te.sum(
+ ((tvm.tir.popcount(kernel_vec[co, dh, dw, kb, vc, ci].astype('int16') &
+ data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci].astype('int16')) -
+ tvm.tir.popcount(~kernel_vec[co, dh, dw, kb, vc, ci].astype('int16') &
+ data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci]).astype('int16'))
<< (kb + ib).astype('int16')), axis=[dh, dw, kb, ib, ci])
if unipolar:
- conv_vec = tvm.compute(ovshape, _unipolar_conv, name='conv_vec', tag='unipolar')
+ conv_vec = te.compute(ovshape, _unipolar_conv, name='conv_vec', tag='unipolar')
else:
- conv_vec = tvm.compute(ovshape, _bipolar_conv, name='conv_vec', tag='bipolar')
+ conv_vec = te.compute(ovshape, _bipolar_conv, name='conv_vec', tag='bipolar')
- conv = tvm.compute(oshape,
- lambda n, h, w, co:
- conv_vec[n,
- idxd(h, VH), idxd(w, VW), idxd(co, VC),
- idxm(h, VH), idxm(w, VW), idxm(co, VC)].astype(out_dtype),
- name='conv', tag='spatial_bitserial_conv_nhwc')
+ conv = te.compute(oshape,
+ lambda n, h, w, co:
+ conv_vec[n,
+ idxd(h, VH), idxd(w, VW), idxd(co, VC),
+ idxm(h, VH), idxm(w, VW), idxm(co, VC)].astype(out_dtype),
+ name='conv', tag='spatial_bitserial_conv_nhwc')
return conv
def _intrin_popcount(m, k_i, w_b, x_b, unipolar):
pack_dtype = 'uint8'
- w = tvm.placeholder((w_b, m, k_i), dtype=pack_dtype, name='w')
- x = tvm.placeholder((x_b, k_i,), dtype=pack_dtype, name='x')
- k = tvm.reduce_axis((0, k_i), name='k')
- bw = tvm.reduce_axis((0, w_b), name='bw')
- bx = tvm.reduce_axis((0, x_b), name='bx')
+ w = te.placeholder((w_b, m, k_i), dtype=pack_dtype, name='w')
+ x = te.placeholder((x_b, k_i,), dtype=pack_dtype, name='x')
+ k = te.reduce_axis((0, k_i), name='k')
+ bw = te.reduce_axis((0, w_b), name='bw')
+ bx = te.reduce_axis((0, x_b), name='bx')
if unipolar:
dtype = 'int16'
- z = tvm.compute((m,), lambda i:
- tvm.sum((tvm.popcount(w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype)) -
- tvm.popcount(~w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype)))
- << (bw+bx).astype(dtype), axis=[bw, bx, k]), name='z')
+ z = te.compute(
+ (m,), lambda i:
+ te.sum((tvm.tir.popcount(w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype)) -
+ tvm.tir.popcount(~w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype)))
+ << (bw+bx).astype(dtype), axis=[bw, bx, k]), name='z')
else:
dtype = 'uint16'
- z = tvm.compute((m,), lambda i:
- tvm.sum(tvm.popcount(w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype))
- << (bw+bx).astype(dtype), axis=[bw, bx, k]), name='z')
- Wb = tvm.decl_buffer(w.shape, w.dtype,
- name="W",
- offset_factor=k_i,
- strides=[tvm.var('ldw'), tvm.var('ldw'), 1]) # stride can be inferred
- Xb = tvm.decl_buffer(x.shape, x.dtype,
- name="X",
- offset_factor=k_i,
- strides=[tvm.var('ldw'), 1])
- Zb = tvm.decl_buffer(z.shape, z.dtype,
- name="Z",
- offset_factor=1,
- strides=[1])
+ z = te.compute((m,), lambda i:
+ te.sum(tvm.tir.popcount(w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype))
+ << (bw+bx).astype(dtype), axis=[bw, bx, k]), name='z')
+ Wb = tvm.tir.decl_buffer(w.shape, w.dtype,
+ name="W",
+ offset_factor=k_i,
+ strides=[te.var('ldw'), te.var('ldw'), 1]) # stride can be inferred
+ Xb = tvm.tir.decl_buffer(x.shape, x.dtype,
+ name="X",
+ offset_factor=k_i,
+ strides=[te.var('ldw'), 1])
+ Zb = tvm.tir.decl_buffer(z.shape, z.dtype,
+ name="Z",
+ offset_factor=1,
+ strides=[1])
def _intrin_func(ins, outs):
ww, xx = ins
zz = outs[0]
- args_1 = tvm.const(1, 'uint32')
- args_2 = tvm.const(2, 'uint32')
+ args_1 = tvm.tir.const(1, 'uint32')
+ args_2 = tvm.tir.const(2, 'uint32')
if unipolar:
vpadd = "llvm.arm.neon.vpadd.v8i8"
return_dtype = 'uint16x8'
def _instr(index):
- irb = tvm.ir_builder.create()
+ irb = tvm.tir.ir_builder.create()
if index == 1: # reduce reset
- irb.emit(zz.vstore(0, tvm.const(0, return_dtype)))
+ irb.emit(zz.vstore(0, tvm.tir.const(0, return_dtype)))
return irb.get()
# body and reduce update
cnts8 = [None] * 8
w_ = ww.vload([bw, i, 0], 'uint8x16').astype(full_dtype)
x_ = xx.vload([bx, 0], 'uint8x16').astype(full_dtype)
if unipolar:
- cnts = tvm.popcount(w_ & x_) - tvm.popcount(~w_ & x_)
+ cnts = tvm.tir.popcount(w_ & x_) - tvm.tir.popcount(~w_ & x_)
else:
- cnts = tvm.popcount(w_ & x_)
- upper_half = tvm.call_pure_intrin(half_dtype, 'vectorhigh', cnts)
- lower_half = tvm.call_pure_intrin(half_dtype, 'vectorlow', cnts)
+ cnts = tvm.tir.popcount(w_ & x_)
+ upper_half = tvm.tir.call_pure_intrin(half_dtype, 'vectorhigh', cnts)
+ lower_half = tvm.tir.call_pure_intrin(half_dtype, 'vectorlow', cnts)
cnts8[i] = upper_half + lower_half
for i in range(m//2):
- cnts4[i] = tvm.call_llvm_intrin(half_dtype, vpadd,
- args_1, cnts8[i*2], cnts8[i*2+1])
+ cnts4[i] = tvm.tir.call_llvm_intrin(half_dtype, vpadd,
+ args_1, cnts8[i*2], cnts8[i*2+1])
for i in range(m//4):
- cnts2[i] = tvm.call_llvm_intrin(half_dtype, vpadd,
- args_1, cnts4[i*2], cnts4[i*2+1])
- cnts = tvm.call_pure_intrin(full_dtype, 'vectorcombine', cnts2[0], cnts2[1])
- shifted_cnts = cnts << tvm.const(bw+bx, pack_dtype)
- out = tvm.call_llvm_intrin(return_dtype, vpadalu,
- args_2, zz.vload(0, return_dtype), shifted_cnts)
+ cnts2[i] = tvm.tir.call_llvm_intrin(half_dtype, vpadd,
+ args_1, cnts4[i*2], cnts4[i*2+1])
+ cnts = tvm.tir.call_pure_intrin(
+ full_dtype, 'vectorcombine', cnts2[0], cnts2[1])
+ shifted_cnts = cnts << tvm.tir.const(bw+bx, pack_dtype)
+ out = tvm.tir.call_llvm_intrin(
+ return_dtype, vpadalu,
+ args_2, zz.vload(0, return_dtype), shifted_cnts)
else: # ki == 8
for i in range(m):
w_ = ww.vload([bw, i, 0], 'uint8x8').astype(half_dtype)
x_ = xx.vload([bx, 0], 'uint8x8').astype(half_dtype)
if unipolar:
- cnts8[i] = tvm.popcount(w_ & x_) - tvm.popcount(~w_ & x_)
+ cnts8[i] = tvm.tir.popcount(w_ & x_) - tvm.tir.popcount(~w_ & x_)
else:
- cnts8[i] = tvm.popcount(w_ & x_)
+ cnts8[i] = tvm.tir.popcount(w_ & x_)
for i in range(m//2):
- cnts4[i] = tvm.call_llvm_intrin(half_dtype, vpadd,
- args_1, cnts8[i*2], cnts8[i*2+1])
+ cnts4[i] = tvm.tir.call_llvm_intrin(half_dtype, vpadd,
+ args_1, cnts8[i*2], cnts8[i*2+1])
for i in range(m//4):
- cnts2[i] = tvm.call_llvm_intrin(half_dtype, vpadd,
- args_1, cnts4[i*2], cnts4[i*2+1])
- cnts = tvm.call_pure_intrin(full_dtype, 'vectorcombine', cnts2[0], cnts2[1])
- shifted_cnts = cnts << tvm.const(bw+bx, pack_dtype)
- out = tvm.call_llvm_intrin(return_dtype, vpadalu,
- args_2, zz.vload(0, return_dtype), shifted_cnts)
+ cnts2[i] = tvm.tir.call_llvm_intrin(half_dtype, vpadd,
+ args_1, cnts4[i*2], cnts4[i*2+1])
+ cnts = tvm.tir.call_pure_intrin(
+ full_dtype, 'vectorcombine', cnts2[0], cnts2[1])
+ shifted_cnts = cnts << tvm.tir.const(bw+bx, pack_dtype)
+ out = tvm.tir.call_llvm_intrin(
+ return_dtype, vpadalu,
+ args_2, zz.vload(0, return_dtype), shifted_cnts)
irb.emit(zz.vstore(0, out))
return irb.get()
# body, reset, update
return _instr(0), _instr(1), _instr(2)
- with tvm.build_config(offset_factor=1, partition_const_loop=True):
- return tvm.decl_tensor_intrin(z.op, _intrin_func, binds={w: Wb, x:Xb, z:Zb})
+ with tvm.target.build_config(offset_factor=1, partition_const_loop=True):
+ return te.decl_tensor_intrin(z.op, _intrin_func, binds={w: Wb, x:Xb, z:Zb})
# ARM specific schedule that using custom microkernel
def _schedule_spatial_conv2d_nhwc(cfg, s, data_pad, data_vec, kernel_vec,
@autotvm.register_topi_schedule("bitserial_conv2d_nhwc.arm_cpu")
def schedule_bitserial_conv2d_nhwc(cfg, outs):
"""Arm cpu schedule for bitserial conv2d"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
scheduled_ops = []
def traverse(op):
if op not in s.outputs:
s[op].compute_inline()
for tensor in op.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
if 'spatial_bitserial_conv_nhwc' in op.tag:
data_q = data_vec.op.input_tensors[0]
data = data_q.op.input_tensors[0]
data_pad = None
- if isinstance(data_q.op, tvm.tensor.ComputeOp) and "pad" in data_q.op.tag:
+ if isinstance(data_q.op, te.tensor.ComputeOp) and "pad" in data_q.op.tag:
data_pad = data_q
data_q = data
data = data.op.input_tensors[0]
"""Schedule for bitserial dense operator."""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from tvm import autotvm
from topi.util import get_const_tuple
from .. import tag
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
2-D with shape [batch, in_dim]
- weight : tvm.Tensor
+ weight : tvm.te.Tensor
2-D with shape [out_dim, in_dim]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D with shape [batch, out_dim]
"""
data_packed = bitpack(data, data_bits, pack_axis=1, bit_axis=1, pack_type=pack_dtype)
wvshape = (out_dim//VY, in_dim//VK, WB, VY, VK)
oshape = (batch, out_dim)
- k = tvm.reduce_axis((0, in_dim), name='k')
- db = tvm.reduce_axis((0, DB), name='db')
- wb = tvm.reduce_axis((0, WB), name='wb')
+ k = te.reduce_axis((0, in_dim), name='k')
+ db = te.reduce_axis((0, DB), name='db')
+ wb = te.reduce_axis((0, WB), name='wb')
# Tile data and weights
- weight_vec = tvm.compute(wvshape, lambda yo, ko, wb, vy, vk:
- weight_packed[yo*VY+vy][wb][ko*VK+vk], name='weight_vec')
- matmul_unipolar = tvm.compute(oshape, lambda x, y: tvm.sum(
- (tvm.popcount(weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) &
- data_packed[x, db, k].astype(out_dtype)) -
- tvm.popcount(~weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) &
- data_packed[x, db, k].astype(out_dtype)))
+ weight_vec = te.compute(wvshape, lambda yo, ko, wb, vy, vk:
+ weight_packed[yo*VY+vy][wb][ko*VK+vk], name='weight_vec')
+ matmul_unipolar = te.compute(oshape, lambda x, y: te.sum(
+ (tvm.tir.popcount(weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) &
+ data_packed[x, db, k].astype(out_dtype)) -
+ tvm.tir.popcount(~weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) &
+ data_packed[x, db, k].astype(out_dtype)))
<< (wb+db).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense_unipolar')
- matmul = tvm.compute(oshape, lambda x, y: tvm.sum(
- tvm.popcount(weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) &
- data_packed[x, db, k].astype(out_dtype))
+ matmul = te.compute(oshape, lambda x, y: te.sum(
+ tvm.tir.popcount(weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) &
+ data_packed[x, db, k].astype(out_dtype))
<< (wb+db).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense')
cfg.add_flop(batch * out_dim * in_dim * binary_op_multiplier(pack_dtype))
s: Schedule
The computation schedule for bitserial_dense.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(cfg, s, data_vec, weight_vec, output, unipolar):
if op not in s.outputs:
s[op].compute_inline()
for tensor in op.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp):
+ if isinstance(tensor.op, tvm.te.ComputeOp):
traverse(tensor.op)
elif op.tag == 'bitserial_dense' or 'bitserial_dense_unipolar':
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from tvm import autotvm
import tvm.contrib.nnpack
from ..nn.util import get_const_int, get_pad_tuple
from ..nn.winograd_util import winograd_transform_matrices
from .conv2d_spatial_pack import conv2d_spatial_pack_nchw, \
- conv2d_spatial_pack_nhwc, \
- schedule_conv2d_spatial_pack_nchw, \
- schedule_conv2d_spatial_pack_nhwc
+ conv2d_spatial_pack_nhwc, \
+ schedule_conv2d_spatial_pack_nchw, \
+ schedule_conv2d_spatial_pack_nhwc
@autotvm.register_topi_compute("conv2d_nchw_spatial_pack.arm_cpu")
@autotvm.register_topi_schedule("conv2d_nchw_spatial_pack.arm_cpu")
def schedule_conv2d_nchw_spatial_pack(cfg, outs):
"""Create schedule for conv2d_nchw"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
# schedule conv2d
kernel = kernel_vec.op.input_tensors[0]
else:
kernel = kernel_vec
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
schedule_conv2d_spatial_pack_nchw(cfg, s, data_vec, kernel_vec,
@autotvm.register_topi_schedule("conv2d_nhwc_spatial_pack.arm_cpu")
def schedule_conv2d_nhwc_spatial_pack(cfg, outs):
"""Create schedule for conv2d_nhwc"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if 'spatial_conv_output_NHWC' in op.tag:
@autotvm.register_topi_schedule("conv2d_nchw_winograd.arm_cpu")
def schedule_conv2d_nchw_winograd(cfg, outs):
"""Create schedule for conv2d_nchw_winograd"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if 'winograd_conv2d_output' in op.tag:
assert KH == 3 and KW == 3 and HSTR == 1 and WSTR == 1
data_pad = nn.pad(data, (0, 0, pt, pl), (0, 0, pb, pr), name="data_pad")
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
r = KW
m = tile_size
VK = cfg['tile_k'].size[-1]
# pack input tile
- input_tile = tvm.compute((C, idxd(P, VP), alpha, alpha, VP),
- lambda c, b, eps, nu, bb:
- data_pad[idxd(b*VP + bb, nH*nW), c,
- idxm(idxd(b*VP + bb, nW), nH) * m + eps,
- idxm(b*VP + bb, nW) * m + nu],
- name='d')
+ input_tile = te.compute((C, idxd(P, VP), alpha, alpha, VP),
+ lambda c, b, eps, nu, bb:
+ data_pad[idxd(b*VP + bb, nH*nW), c,
+ idxm(idxd(b*VP + bb, nW), nH) * m + eps,
+ idxm(b*VP + bb, nW) * m + nu],
+ name='d')
# transform kernel
if pre_computed:
U = kernel
else:
- r_kh = tvm.reduce_axis((0, KH), 'r_kh')
- r_kw = tvm.reduce_axis((0, KW), 'r_kw')
- U = tvm.compute((alpha, alpha, idxd(K, VK), C, VK), lambda eps, nu, k, c, kk:
- tvm.sum(kernel[k * VK + kk][c][r_kh][r_kw].astype(out_dtype) *
- G[eps][r_kh] * G[nu][r_kw], axis=[r_kh, r_kw]), name='U')
+ r_kh = te.reduce_axis((0, KH), 'r_kh')
+ r_kw = te.reduce_axis((0, KW), 'r_kw')
+ U = te.compute((alpha, alpha, idxd(K, VK), C, VK), lambda eps, nu, k, c, kk:
+ te.sum(kernel[k * VK + kk][c][r_kh][r_kw].astype(out_dtype) *
+ G[eps][r_kh] * G[nu][r_kw], axis=[r_kh, r_kw]), name='U')
# transform image
- r_eps = tvm.reduce_axis((0, alpha), 'r_eps')
- r_nu = tvm.reduce_axis((0, alpha), 'r_nu')
- V = tvm.compute((alpha, alpha, idxd(P, VP), C, VP), lambda eps, nu, b, c, bb:
- tvm.sum(input_tile[c][b][r_eps][r_nu][bb].astype(out_dtype) *
- B[r_eps][eps] * B[r_nu][nu], axis=[r_eps, r_nu]), name='V')
+ r_eps = te.reduce_axis((0, alpha), 'r_eps')
+ r_nu = te.reduce_axis((0, alpha), 'r_nu')
+ V = te.compute((alpha, alpha, idxd(P, VP), C, VP), lambda eps, nu, b, c, bb:
+ te.sum(input_tile[c][b][r_eps][r_nu][bb].astype(out_dtype) *
+ B[r_eps][eps] * B[r_nu][nu], axis=[r_eps, r_nu]), name='V')
# batch gemm
- c = tvm.reduce_axis((0, C), name='c')
- M = tvm.compute((alpha, alpha, K, P), lambda eps, nu, k, b:
- tvm.sum(U[eps][nu][idxd(k, VK)][c][idxm(k, VK)] *
- V[eps][nu][idxd(b, VP)][c][idxm(b, VP)], axis=c), name='M')
+ c = te.reduce_axis((0, C), name='c')
+ M = te.compute((alpha, alpha, K, P), lambda eps, nu, k, b:
+ te.sum(U[eps][nu][idxd(k, VK)][c][idxm(k, VK)] *
+ V[eps][nu][idxd(b, VP)][c][idxm(b, VP)], axis=c), name='M')
# inverse transform
- r_eps = tvm.reduce_axis((0, alpha), 'r_eps')
- r_nu = tvm.reduce_axis((0, alpha), 'r_nu')
- Y = tvm.compute((K, P, m, m), lambda k, b, vh, vw:
- tvm.sum(M[r_eps][r_nu][k][b] * A[r_eps][vh] * A[r_nu][vw],
- axis=[r_eps, r_nu]), name='Y')
+ r_eps = te.reduce_axis((0, alpha), 'r_eps')
+ r_nu = te.reduce_axis((0, alpha), 'r_nu')
+ Y = te.compute((K, P, m, m), lambda k, b, vh, vw:
+ te.sum(M[r_eps][r_nu][k][b] * A[r_eps][vh] * A[r_nu][vw],
+ axis=[r_eps, r_nu]), name='Y')
# unpack output
- output = tvm.compute((N, K, H, W), lambda n, k, h, w:
- Y[k][n * nH * nW + idxd(h, m) * nW + idxd(w, m),
- idxm(h, m), idxm(w, m)],
- name='output', tag='winograd_conv2d_output')
+ output = te.compute((N, K, H, W), lambda n, k, h, w:
+ Y[k][n * nH * nW + idxd(h, m) * nW + idxd(w, m),
+ idxm(h, m), idxm(w, m)],
+ name='output', tag='winograd_conv2d_output')
# we have to manually assign effective GFLOP for winograd
cfg.add_flop(2 * N * K * H * W * KH * KW * C)
s[d].compute_inline()
# transform kernel
- if isinstance(U.op, tvm.tensor.ComputeOp):
+ if isinstance(U.op, tvm.te.ComputeOp):
kernel, G = U.op.input_tensors
s[G].compute_inline()
eps, nu, k, c, kk, = s[U].op.axis
s[U].vectorize(kk)
s[U].parallel(k)
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
# transform image
@autotvm.register_topi_schedule("conv2d_nchw_winograd_nnpack.arm_cpu")
def schedule_conv2d_nchw_winograd_nnpack(cfg, outs):
"""Create schedule for conv2d_nchw_winograd_nnpack"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if 'winograd_nnpack_conv2d_output' in op.tag:
cfg.define_knob('winograd_nnpack_algorithm', [convolution_algorithm])
assert N == 1
- with tvm.tag_scope("winograd_nnpack_conv2d_weight_transform"):
+ with tvm.te.tag_scope("winograd_nnpack_conv2d_weight_transform"):
transformed_kernel = tvm.contrib.nnpack.convolution_inference_weight_transform(
kernel, algorithm=cfg['winograd_nnpack_algorithm'].val)
if autotvm.GLOBAL_SCOPE.in_tuning:
- transformed_kernel = tvm.compute(transformed_kernel.shape, lambda *args: 0.0)
+ transformed_kernel = te.compute(transformed_kernel.shape, lambda *args: 0.0)
- with tvm.tag_scope("winograd_nnpack_conv2d_output"):
+ with tvm.te.tag_scope("winograd_nnpack_conv2d_output"):
output = tvm.contrib.nnpack.convolution_inference_without_weight_transform(
data, transformed_kernel,
bias=None,
(X, TK) = output.op.input_tensors[:2]
# transform kernel
- assert isinstance(TK.op, (tvm.tensor.ComputeOp, tvm.tensor.ExternOp, tvm.tensor.PlaceholderOp))
- if autotvm.GLOBAL_SCOPE.in_tuning and isinstance(TK.op, tvm.tensor.ComputeOp):
+ assert isinstance(TK.op, (te.tensor.ComputeOp, te.tensor.ExternOp, te.tensor.PlaceholderOp))
+ if autotvm.GLOBAL_SCOPE.in_tuning and isinstance(TK.op, te.tensor.ComputeOp):
# kernel transformation will be pre-computed during compilation, so we skip
# this part to make tuning records correct
s[TK].pragma(s[TK].op.axis[0], 'debug_skip_region')
W = (IW + pl + pr - 3) // WSTR + 1
assert N == 1
- with tvm.tag_scope("winograd_nnpack_conv2d_output"):
+ with tvm.te.tag_scope("winograd_nnpack_conv2d_output"):
output = tvm.contrib.nnpack.convolution_inference_without_weight_transform(
data=data,
transformed_kernel=transformed_kernel,
@autotvm.register_topi_schedule("conv2d_nchw_winograd_nnpack_without_weight_transform.arm_cpu")
def schedule_conv2d_nchw_winograd_nnpack_without_weight_transform(cfg, outs):
"""TOPI schedule callback"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if 'winograd_nnpack_conv2d_output' in op.tag:
import logging
import tvm
+from tvm import te
from tvm import relay
from tvm import autotvm
data, kernel = tinfos
out_dtype = out_type.dtype
- idxd = tvm.indexdiv
+ idxd = tvm.tir.indexdiv
if topi_tmpl == "conv2d_nchw_spatial_pack.arm_cpu":
assert data_layout == "NCHW" and kernel_layout == "OIHW"
new_attrs['kernel_layout'] = 'OIHW%do' % VC
new_data = data
- new_kernel = tvm.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype)
+ new_kernel = te.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, strides, padding, dilation, out_dtype],
"conv2d_nchw_spatial_pack.arm_cpu")
new_attrs['kernel_layout'] = 'OHWI%do' % VC
new_data = data
- new_kernel = tvm.placeholder((idxd(CO, VC), KH, KW, CI, VC), dtype=kernel.dtype)
+ new_kernel = te.placeholder((idxd(CO, VC), KH, KW, CI, VC), dtype=kernel.dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, strides, padding, dilation, out_dtype],
"conv2d_nhwc_spatial_pack.arm_cpu")
new_attrs['tile_size'] = tile_size
new_data = data
- new_kernel = tvm.placeholder((KH + tile_size - 1,
- KW + tile_size -1,
- idxd(CO, VC), CI, VC),
- kernel.dtype)
+ new_kernel = te.placeholder((KH + tile_size - 1,
+ KW + tile_size -1,
+ idxd(CO, VC), CI, VC),
+ kernel.dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, strides, padding, dilation, out_dtype],
'conv2d_nchw_winograd.arm_cpu')
out_dtype=weight_dtype)
new_data = data
- new_kernel = tvm.placeholder((CO, CI, 8, 8), "float32")
+ new_kernel = te.placeholder((CO, CI, 8, 8), "float32")
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, None, strides, padding, dilation, out_dtype],
# Store the same config for the altered operator (workload)
new_data = data
- new_kernel = tvm.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype)
+ new_kernel = te.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, strides, padding, dilation, out_dtype],
"depthwise_conv2d_nchw_spatial_pack.arm_cpu")
# under the License.
# pylint: disable=invalid-name,unused-variable,unused-argument,no-member
"""Conv2D int8 schedule on ARM"""
-
-import tvm
+from tvm import te
from tvm import autotvm
from .. import tag
from ..util import get_const_tuple
# If no config was set, we can fallback to NCHW config.
if cfg.is_fallback:
- _get_default_config(cfg, tvm.placeholder((n, in_channel, ih, iw), dtype=data.dtype),
- tvm.placeholder((num_filter, in_channel, kh, kw), dtype=kernel.dtype),
+ _get_default_config(cfg, te.placeholder((n, in_channel, ih, iw), dtype=data.dtype),
+ te.placeholder((num_filter, in_channel, kh, kw), dtype=kernel.dtype),
strides, padding, out_dtype)
return nn.conv2d_NCHWc_int8_compute(data,
kernel,
@autotvm.register_topi_schedule("conv2d_NCHWc_int8.arm_cpu")
def schedule_conv2d_NCHWc_int8(cfg, outs):
"""Create schedule for tensors"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
scheduled_ops = []
def traverse(op):
if op not in s.outputs:
s[op].compute_inline()
for tensor in op.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
if 'conv2d_NCHWc_int8' in op.tag:
kernel_vec = conv_out.op.input_tensors[1]
data_vec = conv_out.op.input_tensors[0]
data = data_vec.op.input_tensors[0] \
- if isinstance(data_vec.op, tvm.tensor.ComputeOp) and "pad" not in data_vec.op.tag \
+ if isinstance(data_vec.op, te.tensor.ComputeOp) and "pad" not in data_vec.op.tag \
else data_vec
- if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
+ if isinstance(data.op, te.tensor.ComputeOp) and "pad" in data.op.tag:
data_pad = data
data = data_pad.op.input_tensors[0]
"""Conv2D spatial pack implementation for ARM CPU"""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from tvm import autotvm
from .. import nn
from ..util import get_const_tuple
if dilation_h != 1 or dilation_w != 1:
# undilate input data
dvshape = (N, OH // VH, OW // VW, CI, KH, KW, VH, VW)
- data_vec = tvm.compute(dvshape, lambda n, h, w, ci, kh, kw, vh, vw:
- data_pad[n][ci][(h*VH+vh)*HSTR+kh*dilation_h]
- [(w*VW+vw)*WSTR+kw*dilation_w],
- name='data_vec_undilated')
+ data_vec = te.compute(dvshape, lambda n, h, w, ci, kh, kw, vh, vw:
+ data_pad[n][ci][(h*VH+vh)*HSTR+kh*dilation_h]
+ [(w*VW+vw)*WSTR+kw*dilation_w],
+ name='data_vec_undilated')
else:
dvshape = (N, OH // VH, OW // VW, CI, VH*HSTR + KH-1, VW*WSTR + KW-1)
- data_vec = tvm.compute(dvshape, lambda n, h, w, ci, vh, vw:
- data_pad[n][ci][h*VH*HSTR+vh][w*VW*WSTR+vw],
- name='data_vec')
+ data_vec = te.compute(dvshape, lambda n, h, w, ci, vh, vw:
+ data_pad[n][ci][h*VH*HSTR+vh][w*VW*WSTR+vw],
+ name='data_vec')
if pre_packed:
kernel_vec = kernel
else:
- kernel_vec = tvm.compute(kvshape, lambda co, ci, kh, kw, vc:
- kernel[co*VC+vc][ci][kh][kw],
- name='kernel_vec')
+ kernel_vec = te.compute(kvshape, lambda co, ci, kh, kw, vc:
+ kernel[co*VC+vc][ci][kh][kw],
+ name='kernel_vec')
- ci = tvm.reduce_axis((0, CI), name='ci')
- kh = tvm.reduce_axis((0, KH), name='kh')
- kw = tvm.reduce_axis((0, KW), name='kw')
+ ci = te.reduce_axis((0, CI), name='ci')
+ kh = te.reduce_axis((0, KH), name='kh')
+ kw = te.reduce_axis((0, KW), name='kw')
if dilation_h != 1 or dilation_w != 1:
- conv = tvm.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \
- tvm.sum(data_vec[n, h, w, ci, kh, kw, vh, vw].astype(out_dtype) *
- kernel_vec[co, ci, kh, kw, vc].astype(out_dtype),
- axis=[ci, kh, kw]), name='conv')
+ conv = te.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \
+ te.sum(data_vec[n, h, w, ci, kh, kw, vh, vw].astype(out_dtype) *
+ kernel_vec[co, ci, kh, kw, vc].astype(out_dtype),
+ axis=[ci, kh, kw]), name='conv')
else:
- conv = tvm.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \
- tvm.sum(data_vec[n, h, w, ci, vh*HSTR+kh, vw*WSTR+kw].astype(out_dtype) *
- kernel_vec[co, ci, kh, kw, vc].astype(out_dtype),
- axis=[ci, kh, kw]), name='conv')
-
- idxdiv = tvm.indexdiv
- idxmod = tvm.indexmod
-
- output = tvm.compute(oshape, lambda n, co, h, w:
- conv[n,
- idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW),
- idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)],
- name='output_unpack', tag='spatial_conv2d_output')
+ conv = te.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \
+ te.sum(data_vec[n, h, w, ci, vh*HSTR+kh, vw*WSTR+kw].astype(out_dtype) *
+ kernel_vec[co, ci, kh, kw, vc].astype(out_dtype),
+ axis=[ci, kh, kw]), name='conv')
+
+ idxdiv = tvm.tir.indexdiv
+ idxmod = tvm.tir.indexmod
+
+ output = te.compute(oshape, lambda n, co, h, w:
+ conv[n,
+ idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW),
+ idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)],
+ name='output_unpack', tag='spatial_conv2d_output')
return output
def schedule_conv2d_spatial_pack_nchw(cfg, s, data_vec, kernel_vec,
dilated_kernel_w = (KW - 1) * dilation_w + 1
pad_top, pad_left, pad_down, pad_right = \
- get_pad_tuple(padding, (dilated_kernel_h, dilated_kernel_w))
+ get_pad_tuple(padding, (dilated_kernel_h, dilated_kernel_w))
HSTR, WSTR = strides if isinstance(strides, (tuple, list)) else (strides, strides)
OH = (IH + pad_top + pad_down - dilated_kernel_h) // HSTR + 1
if dilation_h != 1 or dilation_w != 1:
# undilate input data
dvshape = (N, OHO, OWO, KH, KW, IC, OHI, OWI)
- data_vec = tvm.compute(dvshape, lambda n, oho, owo, kh, kw, ic, ohi, owi:
- data_pad[n][(oho*OHI+ohi)*HSTR+kh*dilation_h]
- [(owo*OWI+owi)*WSTR+kw*dilation_w][ic],
- name='data_vec_undilated')
+ data_vec = te.compute(dvshape, lambda n, oho, owo, kh, kw, ic, ohi, owi:
+ data_pad[n][(oho*OHI+ohi)*HSTR+kh*dilation_h]
+ [(owo*OWI+owi)*WSTR+kw*dilation_w][ic],
+ name='data_vec_undilated')
else:
dvshape = (N, OHO, OWO, KH + (OHI-1)*HSTR, KW + (OWI-1)*WSTR, IC)
- data_vec = tvm.compute(dvshape, lambda n, oho, owo, ohi, owi, ic:
- data_pad[n][oho*OHI*HSTR+ohi][owo*OWI*WSTR+owi][ic],
- name='data_vec')
- kernel_vec = tvm.compute(kvshape, lambda oco, kh, kw, ic, oci: \
- kernel[kh][kw][ic][oco*OCI+oci],
- name='kernel_vec')
+ data_vec = te.compute(dvshape, lambda n, oho, owo, ohi, owi, ic:
+ data_pad[n][oho*OHI*HSTR+ohi][owo*OWI*WSTR+owi][ic],
+ name='data_vec')
+ kernel_vec = te.compute(kvshape, lambda oco, kh, kw, ic, oci: \
+ kernel[kh][kw][ic][oco*OCI+oci],
+ name='kernel_vec')
- ic = tvm.reduce_axis((0, IC), name='ic')
- kh = tvm.reduce_axis((0, KH), name='kh')
- kw = tvm.reduce_axis((0, KW), name='kw')
+ ic = te.reduce_axis((0, IC), name='ic')
+ kh = te.reduce_axis((0, KH), name='kh')
+ kw = te.reduce_axis((0, KW), name='kw')
if dilation_h != 1 or dilation_w != 1:
- conv = tvm.compute(ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \
- tvm.sum(data_vec[n, oho, owo, kh, kw, ohi, owi, ic].astype(out_dtype) *
- kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype),
- axis=[ic, kh, kw]), name='conv')
+ conv = te.compute(ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \
+ te.sum(data_vec[n, oho, owo, kh, kw, ohi, owi, ic].astype(out_dtype) *
+ kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype),
+ axis=[ic, kh, kw]), name='conv')
else:
- conv = tvm.compute(ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \
- tvm.sum(data_vec[n, oho, owo, ohi*HSTR+kh, owi*WSTR+kw, ic].astype(out_dtype) *
- kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype),
- axis=[ic, kh, kw]), name='conv')
-
- idiv = tvm.indexdiv
- imod = tvm.indexmod
- output = tvm.compute(oshape, lambda n, oho, owo, oc:
- conv[n][idiv(oho, OHI)][idiv(owo, OWI)][idiv(oc, OCI)]\
- [imod(oho, OHI)][imod(owo, OWI)][imod(oc, OCI)],
- name='output_unpack', tag='spatial_conv_output_NHWC')
+ conv = te.compute(
+ ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \
+ te.sum(data_vec[n, oho, owo, ohi*HSTR+kh, owi*WSTR+kw, ic].astype(out_dtype) *
+ kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype),
+ axis=[ic, kh, kw]), name='conv')
+
+ idiv = tvm.tir.indexdiv
+ imod = tvm.tir.indexmod
+ output = te.compute(oshape, lambda n, oho, owo, oc:
+ conv[n][idiv(oho, OHI)][idiv(owo, OWI)][idiv(oc, OCI)]\
+ [imod(oho, OHI)][imod(owo, OWI)][imod(oc, OCI)],
+ name='output_unpack', tag='spatial_conv_output_NHWC')
return output
def schedule_conv2d_spatial_pack_nhwc(cfg, s, op, output):
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from tvm import autotvm
from ..nn import dilate, pad, get_pad_tuple
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
4-D with shape [in_channel, num_filter, filter_height, filter_width]
strides : tuple of two ints
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
return _decl_spatial_pack(cfg, Input, Filter, strides, padding, "NCHW", out_dtype, 2)
ovshape = (N, CO // VC, OH // VH, OW // VW, VH, VW, VC)
oshape = (N, CO, OH, OW)
- data_vec = tvm.compute(dvshape, lambda n, h, w, ci, vh, vw:
- data_pad[n][ci][h*VH + vh][w*VW + vw],
- name='data_vec')
+ data_vec = te.compute(dvshape, lambda n, h, w, ci, vh, vw:
+ data_pad[n][ci][h*VH + vh][w*VW + vw],
+ name='data_vec')
- kernel_vec = tvm.compute(kvshape, lambda co, ci, kh, kw, vc:
- kernel[ci][co*VC+vc][kh][kw],
- name='kernel_vec_conv2d_transpose')
+ kernel_vec = te.compute(kvshape, lambda co, ci, kh, kw, vc:
+ kernel[ci][co*VC+vc][kh][kw],
+ name='kernel_vec_conv2d_transpose')
- ci = tvm.reduce_axis((0, CI), name='ci')
- kh = tvm.reduce_axis((0, KH), name='kh')
- kw = tvm.reduce_axis((0, KW), name='kw')
+ ci = te.reduce_axis((0, CI), name='ci')
+ kh = te.reduce_axis((0, KH), name='kh')
+ kw = te.reduce_axis((0, KW), name='kw')
- conv = tvm.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \
- tvm.sum(data_vec[n, h, w, ci, vh + kh, vw + kw].astype(out_dtype) *
- kernel_vec[co, ci, KH - 1 - kh, KW - 1 - kw, vc].astype(out_dtype),
- axis=[ci, kh, kw]), name='conv')
+ conv = te.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \
+ te.sum(data_vec[n, h, w, ci, vh + kh, vw + kw].astype(out_dtype) *
+ kernel_vec[co, ci, KH - 1 - kh, KW - 1 - kw, vc].astype(out_dtype),
+ axis=[ci, kh, kw]), name='conv')
- idxdiv = tvm.indexdiv
- idxmod = tvm.indexmod
+ idxdiv = tvm.tir.indexdiv
+ idxmod = tvm.tir.indexmod
- output = tvm.compute(oshape, lambda n, co, h, w:
- conv[n,
- idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW),
- idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)],
- name='output_unpack', tag='spatial_conv2d_transpose_output')
+ output = te.compute(oshape, lambda n, co, h, w:
+ conv[n,
+ idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW),
+ idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)],
+ name='output_unpack', tag='spatial_conv2d_transpose_output')
return output
@autotvm.register_topi_schedule("conv2d_transpose_nchw.arm_cpu")
def schedule_conv2d_transpose_nchw(cfg, outs):
"""Schedule conv2d transpose for arm cpu"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if 'spatial_conv2d_transpose_output' in op.tag:
kernel = kernel_vec.op.input_tensors[0]
else:
kernel = kernel_vec
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
schedule_conv2d_spatial_pack_nchw(cfg, s, data_vec, kernel_vec,
"""Depthwise convolution schedule for ARM CPU"""
import tvm
+from tvm import te
from tvm import autotvm
from .. import nn
s: Schedule
The computation schedule for depthwise_conv2d nchw.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(cfg, s, data, data_pad, kernel, output):
A, B, C = data, kernel, output
kernel = op.input_tensors[1]
data = op.input_tensors[0]
data_pad = None
- if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
+ if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag:
data_pad = data
data = data_pad.op.input_tensors[0]
_schedule(cfg, s, data, data_pad, kernel, output)
cfg: ConfigEntity
The config for this template
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
4-D with shape [num_filter, multiplier, filter_height, filter_width] or
pre-packed 5-D with shape [num_filter_chunk, multiplier, filter_height,
filter_width, num_filter_block]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
@autotvm.register_topi_schedule("depthwise_conv2d_nchw_spatial_pack.arm_cpu")
def schedule_depthwise_conv2d_nchw_spatial_pack(cfg, outs):
"""Create the schedule for depthwise_conv2d_nchw_spatial_pack"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'spatial_depthwise_conv2d_nchw_output':
kernel = kernel_vec.op.input_tensors[0]
else:
kernel = kernel_vec
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
_schedule_spatial_pack(cfg, s, data_vec, kernel_vec, conv, output, outs[0])
if dilation_h != 1 or dilation_w != 1:
# undilate input data
dvshape = (N, OH // VH, OW // VW, C, KH, KW, VH, VW)
- data_vec = tvm.compute(dvshape, lambda n, h, w, c, kh, kw, vh, vw:
- data_pad[n][c][(h * VH + vh) * HSTR + kh * dilation_h]
- [(w*VW+vw)*WSTR+kw*dilation_w],
- name='data_vec_undilated')
+ data_vec = te.compute(dvshape, lambda n, h, w, c, kh, kw, vh, vw:
+ data_pad[n][c][(h * VH + vh) * HSTR + kh * dilation_h]
+ [(w*VW+vw)*WSTR+kw*dilation_w],
+ name='data_vec_undilated')
else:
dvshape = (N, OH // VH, OW // VW, C, VH*HSTR + KH-1, VW*WSTR + KW-1)
- data_vec = tvm.compute(dvshape, lambda n, h, w, c, vh, vw:
- data_pad[n][c][h * VH * HSTR + vh][w * VW * WSTR + vw],
- name='data_vec')
+ data_vec = te.compute(dvshape, lambda n, h, w, c, vh, vw:
+ data_pad[n][c][h * VH * HSTR + vh][w * VW * WSTR + vw],
+ name='data_vec')
if pre_packed:
kernel_vec = kernel
else:
- kernel_vec = tvm.compute(kvshape, lambda co, m, kh, kw, vc:
- kernel[co*VC+vc][m][kh][kw],
- name='kernel_vec')
+ kernel_vec = te.compute(kvshape, lambda co, m, kh, kw, vc:
+ kernel[co*VC+vc][m][kh][kw],
+ name='kernel_vec')
- kh = tvm.reduce_axis((0, KH), name='kh')
- kw = tvm.reduce_axis((0, KW), name='kw')
+ kh = te.reduce_axis((0, KH), name='kh')
+ kw = te.reduce_axis((0, KW), name='kw')
- idxdiv = tvm.indexdiv
- idxmod = tvm.indexmod
+ idxdiv = tvm.tir.indexdiv
+ idxmod = tvm.tir.indexmod
if dilation_h != 1 or dilation_w != 1:
- conv = tvm.compute(
+ conv = te.compute(
ovshape, lambda n, co, h, w, vh, vw, vc: \
- tvm.sum(data_vec[n, h, w, idxdiv(co * VC + vc, M), kh, kw, vh, vw]
- .astype(out_dtype) *
- kernel_vec[idxdiv(co, M), idxmod(co, M), kh, kw, vc].astype(out_dtype),
- axis=[kh, kw]), name='depthwise_conv')
+ te.sum(data_vec[n, h, w, idxdiv(co * VC + vc, M), kh, kw, vh, vw]
+ .astype(out_dtype) *
+ kernel_vec[idxdiv(co, M), idxmod(co, M), kh, kw, vc].astype(out_dtype),
+ axis=[kh, kw]), name='depthwise_conv')
else:
- conv = tvm.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \
- tvm.sum(data_vec[n, h, w, idxdiv((co * VC + vc), M), vh * HSTR + kh,
- vw * WSTR + kw].astype(out_dtype) *
- kernel_vec[idxdiv(co, M),
- idxmod(co, M),
- kh, kw, vc].astype(out_dtype),
- axis=[kh, kw]), name='depthwise_conv')
-
- output = tvm.compute(oshape, lambda n, co, h, w:
- conv[n,
- idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW),
- idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)],
- name='output_unpack', tag='spatial_depthwise_conv2d_nchw_output')
+ conv = te.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \
+ te.sum(data_vec[n, h, w, idxdiv((co * VC + vc), M), vh * HSTR + kh,
+ vw * WSTR + kw].astype(out_dtype) *
+ kernel_vec[idxdiv(co, M),
+ idxmod(co, M),
+ kh, kw, vc].astype(out_dtype),
+ axis=[kh, kw]), name='depthwise_conv')
+
+ output = te.compute(oshape, lambda n, co, h, w:
+ conv[n,
+ idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW),
+ idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)],
+ name='output_unpack', tag='spatial_depthwise_conv2d_nchw_output')
return output
def _schedule_spatial_pack(cfg, s, data_vec, kernel_vec,
data_pad = data_vec.op.input_tensors[0]
if data_pad.op.name == "data_pad":
- assert isinstance(data_pad.op, tvm.tensor.ComputeOp)
+ assert isinstance(data_pad.op, tvm.te.ComputeOp)
has_padding = True
else:
- assert isinstance(data_pad.op, tvm.tensor.PlaceholderOp)
+ assert isinstance(data_pad.op, tvm.te.PlaceholderOp)
has_padding = False
cfg.define_knob('data_pad_inline', [0, 1, 2, 3, 4])
# pylint: disable=invalid-name, unused-variable
"""Schedule for pooling operators"""
import tvm
+from tvm import te
from ..util import is_empty_shape
def schedule_injective_from_existing(sch, out):
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
x = outs[0]
if list(s[x].op.axis):
# do not vectorize for broadcast
(io, ii) = s[x].split(list(s[x].op.axis)[-1], 8)
s[x].vectorize(ii)
- tvm.schedule.AutoInlineInjective(s)
+ tvm.te.schedule.AutoInlineInjective(s)
if not is_empty_shape(x.shape):
schedule_injective_from_existing(s, x)
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
x = outs[0]
- tvm.schedule.AutoInlineInjective(s)
+ tvm.te.schedule.AutoInlineInjective(s)
if len(s[x].op.axis) >= 4:
fused = s[x].fuse(s[x].op.axis[0], s[x].op.axis[1], s[x].op.axis[2])
s[x].parallel(fused)
"""Conv2D int8 schedule on ARM"""
import tvm
+from tvm import te
def dot_int8_int8_int32(int32_lanes, dtype='uint'):
"""
"""
num_int8_elements = 4 # 4 int8 elements in int32
- data = tvm.placeholder((num_int8_elements,), dtype='%s8' % dtype, name='data')
- kernel = tvm.placeholder((int32_lanes, num_int8_elements), dtype='%s8' % dtype, name='kernel')
+ data = te.placeholder((num_int8_elements,), dtype='%s8' % dtype, name='data')
+ kernel = te.placeholder((int32_lanes, num_int8_elements), dtype='%s8' % dtype, name='kernel')
- k = tvm.reduce_axis((0, num_int8_elements), name='k')
- C = tvm.compute((int32_lanes,),
- lambda i: tvm.sum(data[k].astype('%s32' % dtype) *
- kernel[i, k].astype('%s32' % dtype),
- axis=k), name="C")
+ k = te.reduce_axis((0, num_int8_elements), name='k')
+ C = te.compute((int32_lanes,),
+ lambda i: te.sum(data[k].astype('%s32' % dtype) *
+ kernel[i, k].astype('%s32' % dtype),
+ axis=k), name="C")
- a_buffer = tvm.decl_buffer(data.shape, dtype='%s8' % dtype, name="a_buffer",
- offset_factor=1,
- strides=[1])
- b_buffer = tvm.decl_buffer(kernel.shape, dtype='%s8' % dtype, name="b_buffer",
- offset_factor=1,
- strides=[tvm.var('s'), 1])
+ a_buffer = tvm.tir.decl_buffer(data.shape, dtype='%s8' % dtype, name="a_buffer",
+ offset_factor=1,
+ strides=[1])
+ b_buffer = tvm.tir.decl_buffer(kernel.shape, dtype='%s8' % dtype, name="b_buffer",
+ offset_factor=1,
+ strides=[te.var('s'), 1])
def _intrin_func(ins, outs):
def _instr(index):
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
if index == 1:
- ib.emit(outs[0].vstore(0, tvm.const(0, '%s32x%d' % (dtype, int32_lanes))))
+ ib.emit(outs[0].vstore(0, tvm.tir.const(0, '%s32x%d' % (dtype, int32_lanes))))
return ib.get()
dtype_a = '%s8x%d' % (dtype, num_int8_elements)
dtype_c = '%s32x%d' % (dtype, int32_lanes)
a_int8 = ins[0].vload([0], dtype_a)
- re_int32 = tvm.call_pure_intrin('%s32' % dtype, 'reinterpret', a_int8)
+ re_int32 = tvm.tir.call_pure_intrin('%s32' % dtype, 'reinterpret', a_int8)
# broadcast a
vec_ai32 = re_int32.astype(dtype_c)
- vec_a = tvm.call_pure_intrin(dtype_b, 'reinterpret', vec_ai32)
+ vec_a = tvm.tir.call_pure_intrin(dtype_b, 'reinterpret', vec_ai32)
vec_b = ins[1].vload([0, 0], dtype_b)
vec_c = outs[0].vload([0], dtype_c)
inst = 'udot' if dtype == 'uint' else 'sdot'
inst = 'llvm.aarch64.neon.%s.v%di32.v%di8' % (
inst, int32_lanes, int32_lanes * num_int8_elements)
- vdot = tvm.call_llvm_intrin(dtype_c,
- inst,
- tvm.const(2, 'uint32'),
- vec_c, vec_a, vec_b)
+ vdot = tvm.tir.call_llvm_intrin(dtype_c,
+ inst,
+ tvm.tir.const(2, 'uint32'),
+ vec_c, vec_a, vec_b)
ib.emit(outs[0].vstore(0, vdot))
return ib.get()
# body, reset, update
return _instr(0), _instr(1), _instr(2)
- with tvm.build_config(offset_factor=1, partition_const_loop=True):
- return tvm.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
+ with tvm.target.build_config(offset_factor=1, partition_const_loop=True):
+ return te.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
"""conv2d schedule on ARM Mali (Bifrost) GPU"""
import tvm
+from tvm import te
from tvm import relay
from tvm import autotvm
cfg: ConfigEntity
The config for this template
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
4-D with shape [num_filter, in_channel, filter_height, filter_width] or
pre-packed 5-D with shape [num_filter_chunk, in_channel, filter_height,
filter_width, num_filter_block]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
return conv2d_spatial_pack_nchw(cfg, data, kernel, strides, padding,
s: Schedule
The computation schedule for conv2d
"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
# schedule conv2d
kernel = kernel_vec.op.input_tensors[0]
else:
kernel = kernel_vec
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
_schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec)
BW, TW, VW = cfg["tile_ow"].size
# schedule padding
- if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
+ if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag:
data_pad = data
s[data_pad].compute_inline()
# schedule data packing
- if isinstance(data_vec.op, tvm.tensor.ComputeOp) and data_vec.op.name == 'data_vec_undilated':
+ if isinstance(data_vec.op, te.tensor.ComputeOp) and data_vec.op.name == 'data_vec_undilated':
_, h, w, ci, _, _, vh, vw = s[data_vec].op.axis
else:
_, h, w, ci, vh, vw = s[data_vec].op.axis
if vw.dom.extent.value < max_unroll:
s[data_vec].unroll(vw)
- if isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and kernel_vec.name == 'kernel_vec':
+ if isinstance(kernel_vec.op, tvm.te.ComputeOp) and kernel_vec.name == 'kernel_vec':
if autotvm.GLOBAL_SCOPE.in_tuning:
# kernel packing will be pre-computed during compilation, so we skip
# this part to make tuning records correct
fused = s[kernel_vec].fuse(co, ci, kh, kw, vc)
fused, vec = s[kernel_vec].split(fused, VC)
bb, tt = s[kernel_vec].split(fused, max_threads)
- s[kernel_vec].bind(bb, tvm.thread_axis("blockIdx.x"))
- s[kernel_vec].bind(tt, tvm.thread_axis("threadIdx.x"))
+ s[kernel_vec].bind(bb, te.thread_axis("blockIdx.x"))
+ s[kernel_vec].bind(tt, te.thread_axis("threadIdx.x"))
if VC in vec_size:
s[kernel_vec].vectorize(vec)
@autotvm.register_topi_schedule("conv2d_nchw_winograd.bifrost")
def schedule_conv2d_nchw_winograd(cfg, outs):
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if 'winograd_conv2d_output' in op.tag:
Parameters
----------
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
The kernel to transform
tile_size : int
Returns
-------
- U : tvm.Tensor
+ U : tvm.te.Tensor
Transformed kernel
"""
# Padded Kernel [K_round, C, KH, KW]
# Pad the number of kernels to multiple of ALIGN
- padded_kernel = tvm.compute((K_round, C, KH, KW),
- lambda k, c, h, w:
- tvm.if_then_else(k < K,
- kernel[k][c][h][w],
- tvm.const(0, out_dtype)),
- name='padded_kernel')
+ padded_kernel = te.compute((K_round, C, KH, KW),
+ lambda k, c, h, w:
+ tvm.tir.if_then_else(k < K,
+ kernel[k][c][h][w],
+ tvm.tir.const(0, out_dtype)),
+ name='padded_kernel')
# U [alpha, alpha, K_round, C]
# Perform the kernel transform
- r_kh = tvm.reduce_axis((0, KH), 'r_kh')
- r_kw = tvm.reduce_axis((0, KW), 'r_kw')
- U = tvm.compute((alpha, alpha, K_round, C),
- lambda eps, nu, k, c:
- tvm.sum(padded_kernel[k][c][r_kh][r_kw] * G[eps][r_kh] * G[nu][r_kw],
- axis=[r_kh, r_kw]),
- name='U')
+ r_kh = te.reduce_axis((0, KH), 'r_kh')
+ r_kw = te.reduce_axis((0, KW), 'r_kw')
+ U = te.compute((alpha, alpha, K_round, C),
+ lambda eps, nu, k, c:
+ te.sum(padded_kernel[k][c][r_kh][r_kw] * G[eps][r_kh] * G[nu][r_kw],
+ axis=[r_kh, r_kw]),
+ name='U')
return U
cfg.define_knob("data_transform_wgy", [1, 2, 4, 8, 16, 32, 64])
# Pack input tile
- input_tile = tvm.compute((N, C, H + 2, W + 2),
- lambda n, c, h, w:
- data_pad[n][c][h][w],
- name='d')
+ input_tile = te.compute((N, C, H + 2, W + 2),
+ lambda n, c, h, w:
+ data_pad[n][c][h][w],
+ name='d')
if pre_computed:
U = kernel
# V [alpha * alpha, C, P_round)
# Perform the image transform
- r_eps = tvm.reduce_axis((0, alpha), 'r_eps')
- r_nu = tvm.reduce_axis((0, alpha), 'r_nu')
- V = tvm.compute((alpha * alpha, C, P_round),
- lambda epsnu, c, b:
- tvm.sum(input_tile[b // (nH*nW)][c][b // nW % nH * m + r_eps][b % nW * m +r_nu]\
- * B[r_eps][epsnu // alpha] * B[r_nu][epsnu % alpha],
- axis=[r_eps, r_nu]),
- name='V')
+ r_eps = te.reduce_axis((0, alpha), 'r_eps')
+ r_nu = te.reduce_axis((0, alpha), 'r_nu')
+ V = te.compute((alpha * alpha, C, P_round),
+ lambda epsnu, c, b:
+ te.sum(input_tile[b // (nH*nW)][c][b // nW % nH * m + r_eps][b % nW * m +r_nu]\
+ * B[r_eps][epsnu // alpha] * B[r_nu][epsnu % alpha],
+ axis=[r_eps, r_nu]),
+ name='V')
# Winograd GEMM is a wrapper around batched GEMM to convert U to a 3D Tensor
_, M = decl_winograd_gemm(cfg, U, V)
# Y [K, P, m, m]
# Winograd output transform
- r_eps = tvm.reduce_axis((0, alpha), 'r_eps')
- r_nu = tvm.reduce_axis((0, alpha), 'r_nu')
- Y = tvm.compute((K, P, m, m), lambda k, b, vh, vw:
- tvm.sum(M[r_eps * alpha + r_nu][k][b] * A[r_eps][vh] * A[r_nu][vw],
- axis=[r_eps, r_nu]), name='Y')
+ r_eps = te.reduce_axis((0, alpha), 'r_eps')
+ r_nu = te.reduce_axis((0, alpha), 'r_nu')
+ Y = te.compute((K, P, m, m), lambda k, b, vh, vw:
+ te.sum(M[r_eps * alpha + r_nu][k][b] * A[r_eps][vh] * A[r_nu][vw],
+ axis=[r_eps, r_nu]), name='Y')
# Output [N, K, H, W]
# Unpack back to NCHW format
# The last term ensures alignment is not lost to bound inference
- output = tvm.compute((N, K, H, W), lambda n, k, h, w:
- Y[k][n * nH * nW + (h//m) * nW + w//m][h % m][w % m]
- + tvm.const(0, out_dtype) * M[(alpha*alpha)-1][K_round-1][P_round-1],
- name='output', tag='winograd_conv2d_output')
+ output = te.compute((N, K, H, W), lambda n, k, h, w:
+ Y[k][n * nH * nW + (h//m) * nW + w//m][h % m][w % m]
+ + tvm.tir.const(0, out_dtype) * M[(alpha*alpha)-1][K_round-1][P_round-1],
+ name='output', tag='winograd_conv2d_output')
return output
d, B = s[V].op.input_tensors
data_pad = s[d].op.input_tensors[0]
- if isinstance(U.op, tvm.tensor.ComputeOp):
+ if isinstance(U.op, tvm.te.ComputeOp):
padded_kernel, G = s[U].op.input_tensors
kernel = s[padded_kernel].op.input_tensors[0]
s[G].compute_inline()
yo, xo, yi, xi = tile_and_bind(s, U, k, c, 1, 4)
# Dilation
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
# Pad data
data, kernel = tinfos
out_dtype = out_type.dtype
- idxd = tvm.indexdiv
+ idxd = tvm.tir.indexdiv
if topi_tmpl == "conv2d_nchw_spatial_pack.bifrost":
assert data_layout == "NCHW" and kernel_layout == "OIHW"
new_attrs['kernel_layout'] = 'OIHW%do' % VC
new_data = data
- new_kernel = tvm.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype)
+ new_kernel = te.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, strides, padding, dilation, out_dtype],
"conv2d_nchw_spatial_pack.bifrost")
new_attrs['tile_size'] = tile_size
new_data = data
- new_kernel = tvm.placeholder(
+ new_kernel = te.placeholder(
(KH + tile_size - 1, KW + tile_size -1, CO, CI), kernel.dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, strides, padding, dilation, out_dtype],
# under the License.
# pylint: disable=invalid-name,unused-variable
"""dense schedule on ARM Mali Biforst GPU"""
-
-from __future__ import absolute_import as _abs
-
-import tvm
+from tvm import te
from tvm import autotvm
from .. import nn
s: Schedule
The computation schedule for dense.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'dense':
by, ty, yi = cfg['tile_y'].apply(s, output, y)
bx, tx, xi = cfg['tile_x'].apply(s, output, x)
- s[output].bind(by, tvm.thread_axis('blockIdx.y'))
- s[output].bind(bx, tvm.thread_axis('blockIdx.x'))
- s[output].bind(ty, tvm.thread_axis('threadIdx.y'))
- s[output].bind(tx, tvm.thread_axis('threadIdx.x'))
+ s[output].bind(by, te.thread_axis('blockIdx.y'))
+ s[output].bind(bx, te.thread_axis('blockIdx.x'))
+ s[output].bind(ty, te.thread_axis('threadIdx.y'))
+ s[output].bind(tx, te.thread_axis('threadIdx.x'))
if cfg['tile_y'].size[-1] < max_unroll:
s[output].unroll(yi)
axis = axis or s[tensor].op.axis
fused = s[tensor].fuse(*axis)
bx, tx = s[tensor].split(fused, num_thread)
- s[tensor].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[tensor].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[tensor].bind(bx, te.thread_axis("blockIdx.x"))
+ s[tensor].bind(tx, te.thread_axis("threadIdx.x"))
return bx, tx
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from .. import util
from .. import tag
s: Schedule
The computation schedule for depthwise_conv2d nchw.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(pad_data, kernel, conv):
raw_data = s[pad_data].op.input_tensors[0]
zo, zi = s[tensor].split(z, z_factor)
yo, yi = s[tensor].split(y, y_factor)
xo, xi = s[tensor].split(x, x_factor)
- s[tensor].bind(zo, tvm.thread_axis("blockIdx.z"))
- s[tensor].bind(zi, tvm.thread_axis("threadIdx.z"))
- s[tensor].bind(yo, tvm.thread_axis("blockIdx.y"))
- s[tensor].bind(yi, tvm.thread_axis("threadIdx.y"))
- s[tensor].bind(xo, tvm.thread_axis("blockIdx.x"))
- s[tensor].bind(xi, tvm.thread_axis("threadIdx.x"))
+ s[tensor].bind(zo, te.thread_axis("blockIdx.z"))
+ s[tensor].bind(zi, te.thread_axis("threadIdx.z"))
+ s[tensor].bind(yo, te.thread_axis("blockIdx.y"))
+ s[tensor].bind(yi, te.thread_axis("threadIdx.y"))
+ s[tensor].bind(xo, te.thread_axis("blockIdx.x"))
+ s[tensor].bind(xi, te.thread_axis("threadIdx.x"))
return zo, zi, yo, yi, xo, xi
# set tunable parameters
if op.tag == 'depthwise_conv2d_nchw':
pad_data = op.input_tensors[0]
kernel = op.input_tensors[1]
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag:
s[kernel].compute_inline()
conv = op.output(0)
_schedule(pad_data, kernel, conv)
# under the License.
# pylint: disable=invalid-name,unused-variable,unused-argument
"""GEMM schedules for Mali Bifrost"""
-
-import tvm
-
from .transforms import tile_and_bind, tile_and_bind3d, interleave_transpose, \
transpose_interleave
from .. import util
cfg : Config
Schedule configuration
- A : tvm.Tensor
+ A : tvm.te.Tensor
2D Tensor, shape [n, k]
- B : tvm.Tensor
+ B : tvm.te.Tensor
2D Tensor, shape [k, m]
Returns
-------
- C : tvm.Tensor
+ C : tvm.te.Tensor
2D Tensor, shape [n, m]
"""
if unroll_gemm == 1:
# No unrolling case must have the same set of tensors to keep scheduling consistent
# Create identity tensors to take the place of A_unrolled, B_unrolled and R
- A_unrolled = tvm.compute((n, k_size), lambda i, j: A[i, j], name="A_unrolled")
- B_unrolled = tvm.compute((k_size, m), lambda i, j: B[i, j], name="B_unrolled")
+ A_unrolled = te.compute((n, k_size), lambda i, j: A[i, j], name="A_unrolled")
+ B_unrolled = te.compute((k_size, m), lambda i, j: B[i, j], name="B_unrolled")
# Declare standard GEMM
- k = tvm.reduce_axis((0, A.shape[1]), name='k')
- C = tvm.compute((n, m), lambda i, j:
- tvm.sum(A_unrolled[i, k] * B_unrolled[k, j], axis=k), name='C')
+ k = te.reduce_axis((0, A.shape[1]), name='k')
+ C = te.compute((n, m), lambda i, j:
+ te.sum(A_unrolled[i, k] * B_unrolled[k, j], axis=k), name='C')
- R = tvm.compute((n, m), lambda i, j: C[i, j], name="R")
+ R = te.compute((n, m), lambda i, j: C[i, j], name="R")
else:
unrolled_k_size = k_size // unroll_gemm
# Unroll the two input matrices along the shared k axis
- A_unrolled = tvm.compute((unroll_gemm, n, unrolled_k_size), lambda b, i, j:
- A[i][unrolled_k_size * b + j], name='A_unrolled')
+ A_unrolled = te.compute((unroll_gemm, n, unrolled_k_size), lambda b, i, j:
+ A[i][unrolled_k_size * b + j], name='A_unrolled')
- B_unrolled = tvm.compute((unroll_gemm, unrolled_k_size, m), lambda b, i, j:
- B[unrolled_k_size * b + i][j], name='B_unrolled')
+ B_unrolled = te.compute((unroll_gemm, unrolled_k_size, m), lambda b, i, j:
+ B[unrolled_k_size * b + i][j], name='B_unrolled')
# Declare a batched GEMM
- k = tvm.reduce_axis((0, unrolled_k_size), name='k')
- C = tvm.compute((unroll_gemm, n, m), lambda b, i, j:
- tvm.sum(A_unrolled[b][i][k] * B_unrolled[b][k][j], axis=k), name='C')
+ k = te.reduce_axis((0, unrolled_k_size), name='k')
+ C = te.compute((unroll_gemm, n, m), lambda b, i, j:
+ te.sum(A_unrolled[b][i][k] * B_unrolled[b][k][j], axis=k), name='C')
# Then declare a reduction to reduce the sub matrices
- k = tvm.reduce_axis((0, unroll_gemm), name='k')
- R = tvm.compute((n, m), lambda i, j:
- tvm.sum(C[k][i][j], axis=k), name='R')
+ k = te.reduce_axis((0, unroll_gemm), name='k')
+ R = te.compute((n, m), lambda i, j:
+ te.sum(C[k][i][j], axis=k), name='R')
return R
cfg : Config
Schedule configuration
- A : tvm.Tensor
+ A : tvm.te.Tensor
3D Tensor, shape [b, n, k]
- B : tvm.Tensor
+ B : tvm.te.Tensor
3D Tensor, shape [b, k, m]
Returns
-------
- C : tvm.Tensor
+ C : tvm.te.Tensor
3D Tensor, shape [b, n, m]
"""
b_size = util.get_const_int(A.shape[0])
# Declare a batched GEMM
- k = tvm.reduce_axis((0, k_size), name='k')
- C = tvm.compute((b_size, n, m), lambda b, i, j:
- tvm.sum(A[b][i][k] * B[b][k][j], axis=k), name='C')
+ k = te.reduce_axis((0, k_size), name='k')
+ C = te.compute((b_size, n, m), lambda b, i, j:
+ te.sum(A[b][i][k] * B[b][k][j], axis=k), name='C')
return C
cfg : Config
Schedule configuration
- A : tvm.Tensor
+ A : tvm.te.Tensor
4D Tensor, shape [a, a, n, k]
- B : tvm.Tensor
+ B : tvm.te.Tensor
4D Tensor, shape [a * a, k, m]
Returns
n = util.get_const_int(A.shape[2])
k = util.get_const_int(A.shape[3])
- A_3D = tvm.compute((alpha * alpha, n, k), lambda b, i, j:
- A[b // alpha][b % alpha][i][j], name='A_3D')
+ A_3D = te.compute((alpha * alpha, n, k), lambda b, i, j:
+ A[b // alpha][b % alpha][i][j], name='A_3D')
C = decl_batched_gemm(cfg, A_3D, B)
return A_3D, C
cfg : Config
Schedule configuration
- s : tvm.schedule.Schedule
+ s : tvm.te.schedule.Schedule
Operator schedule
- A : tvm.Tensor
+ A : tvm.te.Tensor
2D/3D Tensor, shape [n, k]/[b, n, k]
- B : tvm.Tensor
+ B : tvm.te.Tensor
2D/3D Tensor, shape [k, m]/[b, k, m]
- C : tvm.Tensor
+ C : tvm.te.Tensor
2D/3D Tensor, shape [n, m]/[b, n, m]
batched : bool
cfg : Config
Schedule configuration
- s : tvm.schedule.Schedule
+ s : tvm.te.schedule.Schedule
Operator schedule
- A : tvm.Tensor
+ A : tvm.te.Tensor
2D/3D Tensor, shape [n, k]/[b, n, k]
- B : tvm.Tensor
+ B : tvm.te.Tensor
2D/3D Tensor, shape [k, m]/[b, k, m]
- C : tvm.Tensor
+ C : tvm.te.Tensor
2D/3D Tensor, shape [n, m]/[b, n, m]
- R : tvm.Tensor
+ R : tvm.te.Tensor
2D Tensor, shape [n, m]
"""
Parameters
----------
- R : tvm.Tensor
+ R : tvm.te.Tensor
Reduced tensor, final stage of GEMM
Returns
-------
- A_unrolled : tvm.Tensor
+ A_unrolled : tvm.te.Tensor
Matrix A unrolled along k
- B_unrolled: tvm.Tensor
+ B_unrolled: tvm.te.Tensor
Matrix B unrolled along k
- C : tvm.Tensor
+ C : tvm.te.Tensor
Result of batched GEMM
- R : tvm.Tensor
+ R : tvm.te.Tensor
Reduction of C, result of unrollable GEMM
"""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
def fuse_and_bind(s, tensor, axis=None, num_thread=None):
"""Fuse all the axis and bind to GPU threads"""
fused = s[tensor].fuse(*axis)
max_threads = tvm.target.Target.current(allow_none=False).max_num_threads
bx, tx = s[tensor].split(fused, num_thread or max_threads)
- s[tensor].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[tensor].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[tensor].bind(bx, te.thread_axis("blockIdx.x"))
+ s[tensor].bind(tx, te.thread_axis("threadIdx.x"))
return bx, tx
def tile_and_bind(s, tensor, y, x, y_factor, x_factor=None):
"""Tile and bind to GPU threads"""
x_factor = x_factor or y_factor
yo, xo, yi, xi = s[tensor].tile(y, x, y_factor, x_factor)
- s[tensor].bind(xo, tvm.thread_axis("blockIdx.x"))
- s[tensor].bind(xi, tvm.thread_axis("threadIdx.x"))
- s[tensor].bind(yo, tvm.thread_axis("blockIdx.y"))
- s[tensor].bind(yi, tvm.thread_axis("threadIdx.y"))
+ s[tensor].bind(xo, te.thread_axis("blockIdx.x"))
+ s[tensor].bind(xi, te.thread_axis("threadIdx.x"))
+ s[tensor].bind(yo, te.thread_axis("blockIdx.y"))
+ s[tensor].bind(yi, te.thread_axis("threadIdx.y"))
return yo, xo, yi, xi
def tile_and_bind3d(s, tensor, z, y, x, z_factor=2, y_factor=None, x_factor=None):
zo, zi = s[tensor].split(z, z_factor)
yo, yi = s[tensor].split(y, y_factor)
xo, xi = s[tensor].split(x, x_factor)
- s[tensor].bind(zo, tvm.thread_axis("blockIdx.z"))
- s[tensor].bind(zi, tvm.thread_axis("threadIdx.z"))
- s[tensor].bind(yo, tvm.thread_axis("blockIdx.y"))
- s[tensor].bind(yi, tvm.thread_axis("threadIdx.y"))
- s[tensor].bind(xo, tvm.thread_axis("blockIdx.x"))
- s[tensor].bind(xi, tvm.thread_axis("threadIdx.x"))
+ s[tensor].bind(zo, te.thread_axis("blockIdx.z"))
+ s[tensor].bind(zi, te.thread_axis("threadIdx.z"))
+ s[tensor].bind(yo, te.thread_axis("blockIdx.y"))
+ s[tensor].bind(yi, te.thread_axis("threadIdx.y"))
+ s[tensor].bind(xo, te.thread_axis("blockIdx.x"))
+ s[tensor].bind(xi, te.thread_axis("threadIdx.x"))
return zo, yo, xo, zi, yi, xi
def pack_tensor(s, tensor, factor, readers):
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
The input data
shape : list or tuple
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return _cpp.broadcast_to(data, shape)
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- lhs : tvm.Tensor or Expr
+ lhs : tvm.te.Tensor or Expr
The left operand
- rhs : tvm.Tensor or Expr
+ rhs : tvm.te.Tensor or Expr
The right operand
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if both operands are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- data : tvm.Tensor or Expr
+ data : tvm.te.Tensor or Expr
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if the operand are Expr.
Otherwise returns Tensor.
"""
Parameters
----------
- data : tvm.Tensor or Expr
+ data : tvm.te.Tensor or Expr
Returns
-------
- ret : tvm.Tensor or Expr
+ ret : tvm.te.Tensor or Expr
Returns Expr if the operand are Expr.
Otherwise returns Tensor.
"""
# under the License.
# pylint: disable=invalid-name,too-many-locals,unused-variable
"""cuda batch_matmul operators"""
-from __future__ import absolute_import as _abs
-import tvm
+from tvm import te
from tvm.contrib import cublas
from ..util import traverse_inline, get_const_tuple, get_max_power2_factor
s: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(op):
C = op.output(0)
x_nthreads = min(x_bn, 8)
ty, yi = s[C].split(y, nparts=y_nthreads)
tx, xi = s[C].split(x, nparts=x_nthreads)
- thread_x = tvm.thread_axis((0, x_nthreads), "threadIdx.x")
- thread_y = tvm.thread_axis((0, y_nthreads), "threadIdx.y")
+ thread_x = te.thread_axis((0, x_nthreads), "threadIdx.x")
+ thread_y = te.thread_axis((0, y_nthreads), "threadIdx.y")
s[C].reorder(b, by, bx, ty, tx, yi, xi)
- s[C].bind(b, tvm.thread_axis("blockIdx.z"))
- s[C].bind(by, tvm.thread_axis("blockIdx.y"))
- s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
+ s[C].bind(b, te.thread_axis("blockIdx.z"))
+ s[C].bind(by, te.thread_axis("blockIdx.y"))
+ s[C].bind(bx, te.thread_axis("blockIdx.x"))
s[C].bind(ty, thread_y)
s[C].bind(tx, thread_x)
s[C].pragma(yi, "auto_unroll_max_step", 16)
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
3-D with shape [batch, M, K]
- y : tvm.Tensor
+ y : tvm.te.Tensor
3-D with shape [batch, N, K]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
3-D with shape [batch, M, N]
"""
return cublas.batch_matmul(x, y, False, True)
# pylint: disable=invalid-name, unused-argument
"""Compute definition for conv1d with cuda backend"""
import tvm
+from tvm import te
from tvm import autotvm
from .. import nn
s : Schedule
The computation schedule for conv1d.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'conv1d_ncw':
##### space definition end #####
if isinstance(kernel.op,
- tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag:
+ tvm.te.ComputeOp) and 'dilate' in kernel.op.tag:
s[kernel].compute_inline()
if conv.op in s.outputs:
bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x)
s[output].reorder(bn, bf, bx, vn, vf, vx, tn, tf, tx, ni, fi, xi)
- s[output].bind(bn, tvm.thread_axis("blockIdx.z"))
- s[output].bind(bf, tvm.thread_axis("blockIdx.y"))
- s[output].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[output].bind(vn, tvm.thread_axis("vthread"))
- s[output].bind(vf, tvm.thread_axis("vthread"))
- s[output].bind(vx, tvm.thread_axis("vthread"))
-
- s[output].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(bn, te.thread_axis("blockIdx.z"))
+ s[output].bind(bf, te.thread_axis("blockIdx.y"))
+ s[output].bind(bx, te.thread_axis("blockIdx.x"))
+ s[output].bind(vn, te.thread_axis("vthread"))
+ s[output].bind(vf, te.thread_axis("vthread"))
+ s[output].bind(vx, te.thread_axis("vthread"))
+
+ s[output].bind(tx, te.thread_axis("threadIdx.x"))
s[OL].compute_at(s[output], tx)
# number of threads
n_tz = cfg["tile_n"].size[2] * cfg["tile_f"].size[2]
fused = s[load].fuse(f, x)
tz, fused = s[load].split(fused, nparts=n_tz)
tx, fused = s[load].split(fused, nparts=n_tx)
- s[load].bind(tz, tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
s[output].pragma(kernel_scope, 'auto_unroll_max_step',
cfg['auto_unroll_max_step'].val)
s : Schedule
The computation schedule for conv1d.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'conv1d_nwc':
##### space definition end #####
if isinstance(kernel.op,
- tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag:
+ tvm.te.ComputeOp) and 'dilate' in kernel.op.tag:
s[kernel].compute_inline()
if conv.op in s.outputs:
bf, vf, tf, fi = cfg["tile_f"].apply(s, output, f)
s[output].reorder(bn, bx, bf, vn, vx, vf, tn, tx, tf, ni, xi, fi)
- s[output].bind(bn, tvm.thread_axis("blockIdx.z"))
- s[output].bind(bx, tvm.thread_axis("blockIdx.y"))
- s[output].bind(bf, tvm.thread_axis("blockIdx.x"))
- s[output].bind(vn, tvm.thread_axis("vthread"))
- s[output].bind(vx, tvm.thread_axis("vthread"))
- s[output].bind(vf, tvm.thread_axis("vthread"))
-
- s[output].bind(tf, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(bn, te.thread_axis("blockIdx.z"))
+ s[output].bind(bx, te.thread_axis("blockIdx.y"))
+ s[output].bind(bf, te.thread_axis("blockIdx.x"))
+ s[output].bind(vn, te.thread_axis("vthread"))
+ s[output].bind(vx, te.thread_axis("vthread"))
+ s[output].bind(vf, te.thread_axis("vthread"))
+
+ s[output].bind(tf, te.thread_axis("threadIdx.x"))
s[OL].compute_at(s[output], tf)
# number of threads
n_tz = cfg["tile_n"].size[2] * cfg["tile_x"].size[2]
fused = s[load].fuse(x, f)
tz, fused = s[load].split(fused, nparts=n_tz)
tx, fused = s[load].split(fused, nparts=n_tx)
- s[load].bind(tz, tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
s[output].pragma(kernel_scope, 'auto_unroll_max_step',
cfg['auto_unroll_max_step'].val)
"""Conv1d transpose template for cuda backend"""
import tvm
+from tvm import te
from tvm import autotvm
from .. import nn
from ..util import get_const_tuple, traverse_inline
----------
cfg: ConfigEntity
The config for this template
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
3-D with shape [batch, in_channel, inp_width]
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
3-D with shape [in_channel, num_filter, kernel_size]
stride : tuple of one int
The spatial stride along width
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
u 3-D with shape [batch, out_channel, out_width]
"""
if isinstance(stride, (tuple, list)):
pad_left = kernel_size - 1 - pad_left
pad_right = kernel_size - 1 - pad_right
dilated_width = stride * (inp_width - 1) + 1
- data = tvm.compute(
+ data = te.compute(
(batch, inp_channels, pad_left + dilated_width + pad_right),
- lambda n, c, x: tvm.if_then_else(
- tvm.all(x >= pad_left,
- x < pad_left + dilated_width,
- tvm.indexmod(x - pad_left, stride).equal(0)),
- data[n, c, tvm.indexdiv(x - pad_left, stride)],
- tvm.const(0., "float32")),
+ lambda n, c, x: tvm.tir.if_then_else(
+ tvm.tir.all(x >= pad_left,
+ x < pad_left + dilated_width,
+ tvm.tir.indexmod(x - pad_left, stride).equal(0)),
+ data[n, c, tvm.tir.indexdiv(x - pad_left, stride)],
+ tvm.tir.const(0., "float32")),
name='data_pad')
- dc = tvm.reduce_axis((0, inp_channels), name='dc')
- dw = tvm.reduce_axis((0, kernel_size), name='dw')
- data_out = tvm.compute(
+ dc = te.reduce_axis((0, inp_channels), name='dc')
+ dw = te.reduce_axis((0, kernel_size), name='dw')
+ data_out = te.compute(
(batch, out_channels, out_width),
- lambda b, c, w: tvm.sum(
+ lambda b, c, w: te.sum(
data[b, dc, w + dw].astype(out_dtype) *
kernel[dc, c, kernel_size - 1 - dw].astype(out_dtype),
axis=[dc, dw]), tag="conv1d_transpose_ncw")
s: Schedule
The computation schedule for conv1d transpose.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'conv1d_transpose_ncw':
##### space definition end #####
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag:
s[kernel].compute_inline()
if conv.op in s.outputs:
bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x)
s[output].reorder(bn, bf, bx, vn, vf, vx, tn, tf, tx, ni, fi, xi)
- s[output].bind(bn, tvm.thread_axis("blockIdx.z"))
- s[output].bind(bf, tvm.thread_axis("blockIdx.y"))
- s[output].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[output].bind(vn, tvm.thread_axis("vthread"))
- s[output].bind(vf, tvm.thread_axis("vthread"))
- s[output].bind(vx, tvm.thread_axis("vthread"))
-
- s[output].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(bn, te.thread_axis("blockIdx.z"))
+ s[output].bind(bf, te.thread_axis("blockIdx.y"))
+ s[output].bind(bx, te.thread_axis("blockIdx.x"))
+ s[output].bind(vn, te.thread_axis("vthread"))
+ s[output].bind(vf, te.thread_axis("vthread"))
+ s[output].bind(vx, te.thread_axis("vthread"))
+
+ s[output].bind(tx, te.thread_axis("threadIdx.x"))
s[OL].compute_at(s[output], tx)
# number of threads
n_tz = cfg["tile_n"].size[2] * cfg["tile_f"].size[2]
fused = s[load].fuse(f, x)
tz, fused = s[load].split(fused, nparts=n_tz)
tx, fused = s[load].split(fused, nparts=n_tx)
- s[load].bind(tz, tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val)
s[output].pragma(kernel_scope, 'unroll_explicit', cfg['unroll_explicit'].val)
# under the License.
# pylint: disable=invalid-name, unused-argument
"""Compute definition for conv2d with cuda backend"""
-import tvm
+from tvm import te
from tvm import autotvm
from tvm.contrib import cudnn
@autotvm.register_topi_schedule("conv2d_nchw.cuda")
def schedule_conv2d_nchw(cfg, outs):
"""Create the schedule for conv2d_nchw"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'conv2d_nchw':
#
# @autotvm.register_topi_schedule("conv2d_nhwc.cuda")
# def schedule_conv2d_nhwc(cfg, outs):
-# outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
-# s = tvm.create_schedule([x.op for x in outs])
+# outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+# s = te.create_schedule([x.op for x in outs])
#
# def _callback(op):
# if op.tag == 'conv2d_nhwc':
import logging
import tvm
+from tvm import te
from tvm import relay
from tvm import autotvm
ic_block_factor = oc_block_factor = 4
# Store the same config for the altered operator (workload)
- new_data = tvm.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor),
- dtype=data.dtype)
- new_kernel = tvm.placeholder((CO // oc_block_factor, CI // ic_block_factor, KH, KW, \
- oc_block_factor, ic_block_factor), dtype=kernel.dtype)
+ new_data = te.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor),
+ dtype=data.dtype)
+ new_kernel = te.placeholder((CO // oc_block_factor, CI // ic_block_factor, KH, KW, \
+ oc_block_factor, ic_block_factor), dtype=kernel.dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, strides, padding, dilation, new_layout, out_dtype],
"conv2d_NCHWc_int8.cuda")
# Store the same config for the altered operator (workload)
new_data = data
- new_weight = tvm.placeholder((KH + tile_size - 1, KW + tile_size - 1, CI, CO),
- dtype=kernel.dtype)
+ new_weight = te.placeholder((KH + tile_size - 1, KW + tile_size - 1, CI, CO),
+ dtype=kernel.dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_weight, strides, padding, dilation, out_dtype],
"conv2d_nchw_winograd_without_weight_transform.cuda")
ic_block_factor = oc_block_factor = 4
# Store the same config for the altered operator (workload)
- new_data = tvm.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor),
- dtype=data.dtype)
- new_kernel = tvm.placeholder((CO // oc_block_factor, CI // ic_block_factor // groups,
- KH, KW, oc_block_factor, ic_block_factor),
- dtype=kernel.dtype)
+ new_data = te.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor),
+ dtype=data.dtype)
+ new_kernel = te.placeholder((CO // oc_block_factor, CI // ic_block_factor // groups,
+ KH, KW, oc_block_factor, ic_block_factor),
+ dtype=kernel.dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, strides, padding, dilation, groups, out_dtype],
"group_conv2d_NCHWc_int8.cuda")
# pylint: disable=invalid-name
"""The templates for cuda conv2d operators"""
import tvm
+from tvm import te
from tvm import autotvm
from ..util import get_const_tuple
pad_data, kernel = s[conv].op.input_tensors
s[pad_data].compute_inline()
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag:
s[kernel].compute_inline()
if conv.op in s.outputs:
bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x)
bf = s[output].fuse(n, bf)
- s[output].bind(bf, tvm.thread_axis("blockIdx.z"))
- s[output].bind(by, tvm.thread_axis("blockIdx.y"))
- s[output].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[output].bind(vf, tvm.thread_axis("vthread"))
- s[output].bind(vy, tvm.thread_axis("vthread"))
- s[output].bind(vx, tvm.thread_axis("vthread"))
- s[output].bind(tf, tvm.thread_axis("threadIdx.z"))
- s[output].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[output].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(bf, te.thread_axis("blockIdx.z"))
+ s[output].bind(by, te.thread_axis("blockIdx.y"))
+ s[output].bind(bx, te.thread_axis("blockIdx.x"))
+ s[output].bind(vf, te.thread_axis("vthread"))
+ s[output].bind(vy, te.thread_axis("vthread"))
+ s[output].bind(vx, te.thread_axis("vthread"))
+ s[output].bind(tf, te.thread_axis("threadIdx.z"))
+ s[output].bind(ty, te.thread_axis("threadIdx.y"))
+ s[output].bind(tx, te.thread_axis("threadIdx.x"))
s[output].reorder(bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi)
s[OL].compute_at(s[output], tx)
tz, fused = s[load].split(fused, nparts=cfg["tile_f"].size[2])
ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2])
tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2])
- s[load].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[load].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.z"))
+ s[load].bind(ty, te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
# unroll
s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val)
# pylint: disable=invalid-name, too-many-locals, too-many-statements, unused-argument
"""Schedule for conv2d_hwcn with auto fusion"""
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import SplitEntity
s: Schedule
The computation schedule for conv2d_hwcn.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- sch = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ sch = te.create_schedule([x.op for x in outs])
def schedule(Apad, W, B):
"""Schedule conv2d_hwcn"""
sch[Apad].compute_inline()
bx, txz, tx, ni = cfg['tile_ni'].apply(sch, Out, ni)
sch[Out].reorder(bz, by, bx, tyz, txz, ty, tx, fi, ni)
- sch[Out].bind(bz, tvm.thread_axis('blockIdx.z'))
- sch[Out].bind(by, tvm.thread_axis('blockIdx.y'))
- sch[Out].bind(bx, tvm.thread_axis('blockIdx.x'))
- sch[Out].bind(tyz, tvm.thread_axis('vthread'))
- sch[Out].bind(txz, tvm.thread_axis('vthread'))
- sch[Out].bind(ty, tvm.thread_axis('threadIdx.y'))
- sch[Out].bind(tx, tvm.thread_axis('threadIdx.x'))
+ sch[Out].bind(bz, te.thread_axis('blockIdx.z'))
+ sch[Out].bind(by, te.thread_axis('blockIdx.y'))
+ sch[Out].bind(bx, te.thread_axis('blockIdx.x'))
+ sch[Out].bind(tyz, te.thread_axis('vthread'))
+ sch[Out].bind(txz, te.thread_axis('vthread'))
+ sch[Out].bind(ty, te.thread_axis('threadIdx.y'))
+ sch[Out].bind(tx, te.thread_axis('threadIdx.x'))
# Schedule BL local write
sch[BL].compute_at(sch[Out], tx)
tx, ni = sch[AA].split(ni, nparts=cfg['tile_ni'].size[2])
_, ni = sch[AA].split(ni, factor=4)
sch[AA].reorder(ty, tx, yi, xi, ci, ni)
- sch[AA].bind(ty, tvm.thread_axis('threadIdx.y'))
- sch[AA].bind(tx, tvm.thread_axis('threadIdx.x'))
+ sch[AA].bind(ty, te.thread_axis('threadIdx.y'))
+ sch[AA].bind(tx, te.thread_axis('threadIdx.x'))
sch[AA].vectorize(ni)
# Schedule for W's shared memory load
yi, xi, ci, fi = sch[WW].op.axis
tx, fi = sch[WW].split(fi, nparts=cfg['tile_ni'].size[2])
_, fi = sch[WW].split(fi, factor=4)
sch[WW].reorder(ty, tx, yi, xi, ci, fi)
- sch[WW].bind(ty, tvm.thread_axis('threadIdx.y'))
- sch[WW].bind(tx, tvm.thread_axis('threadIdx.x'))
+ sch[WW].bind(ty, te.thread_axis('threadIdx.y'))
+ sch[WW].bind(tx, te.thread_axis('threadIdx.x'))
sch[WW].vectorize(fi)
scheduled_ops = []
if operator not in sch.outputs:
sch[operator].compute_inline()
for tensor in operator.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
elif operator.tag == 'conv2d_hwcn':
Apad = operator.input_tensors[0]
W = operator.input_tensors[1]
- if isinstance(W.op, tvm.tensor.ComputeOp) and 'dilate' in W.op.tag:
+ if isinstance(W.op, tvm.te.ComputeOp) and 'dilate' in W.op.tag:
sch[W].compute_inline()
B = operator.output(0)
schedule(Apad, W, B)
# pylint: disable=invalid-name
"""Int8 conv2d in NCHWc layout"""
import tvm
+from tvm import te
from tvm import autotvm
from .injective import schedule_injective_from_existing
cfg: ConfigEntity
The config for this template
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width] or
5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block]
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
4-D with shape [num_filter, in_channel, filter_height, filter_width] or
6-D with shape [num_filter_chunk, in_channel_chunk, filter_height,
filter_width, num_filter_block, in_channel_block]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
5-D with shape [batch, out_channel_chunk, out_height, out_width, out_channel_block]
"""
assert layout in ["NCHW", "NCHW4c"]
assert channels % ic_block_factor == 0, \
"Number of input channels should be multiple of {}".format(
ic_block_factor)
- packed_data = tvm.compute((batch, channels // ic_block_factor, height, width,
- ic_block_factor),
- lambda n, c, h, w, vc: data[n, c*ic_block_factor + vc, h, w],
- name="packed_data")
+ packed_data = te.compute((batch, channels // ic_block_factor, height, width,
+ ic_block_factor),
+ lambda n, c, h, w, vc: data[n, c*ic_block_factor + vc, h, w],
+ name="packed_data")
out_channels, in_channels, kernel_h, kernel_w = get_const_tuple(
kernel.shape)
assert out_channels % 4 == 0, \
"Number of output channels should be multiple of {}".format(
oc_block_factor)
- packed_kernel = tvm.compute(
+ packed_kernel = te.compute(
(out_channels // oc_block_factor, in_channels // ic_block_factor, kernel_h, kernel_w,
oc_block_factor, ic_block_factor),
lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block:
oshape = (batch, oc_chunk, out_height, out_width, oc_block)
- icc = tvm.reduce_axis((0, ic_chunk), name='ic_chunk')
- icb = tvm.reduce_axis((0, ic_block), name='ic_block')
- kh = tvm.reduce_axis((0, kernel_h), name='kh')
- kw = tvm.reduce_axis((0, kernel_w), name='kw')
+ icc = te.reduce_axis((0, ic_chunk), name='ic_chunk')
+ icb = te.reduce_axis((0, ic_block), name='ic_block')
+ kh = te.reduce_axis((0, kernel_h), name='kh')
+ kw = te.reduce_axis((0, kernel_w), name='kw')
- conv = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
- tvm.sum(pad_data[n, icc, oh*stride_h+kh*dilation_h, \
- ow*stride_w+kw*dilation_w, icb]
- .astype('int32') *
- packed_kernel[oc_chunk, icc,
- kh, kw, oc_block, icb]
- .astype('int32'),
- axis=[icc, kh, kw, icb]))
+ conv = te.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
+ te.sum(pad_data[n, icc, oh*stride_h+kh*dilation_h, \
+ ow*stride_w+kw*dilation_w, icb]
+ .astype('int32') *
+ packed_kernel[oc_chunk, icc,
+ kh, kw, oc_block, icb]
+ .astype('int32'),
+ axis=[icc, kh, kw, icb]))
- output = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
- conv[n, oc_chunk, oh, ow, oc_block].astype(out_dtype),
- tag="conv2d_NCHWc_int8")
+ output = te.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
+ conv[n, oc_chunk, oh, ow, oc_block].astype(out_dtype),
+ tag="conv2d_NCHWc_int8")
# num flop
num_flop = batch * oc_chunk * oc_block * out_height * out_width * \
@autotvm.register_topi_schedule("conv2d_NCHWc_int8.cuda")
def schedule_conv2d_NCHWc_int8(cfg, outs):
"""Schedule conv2d int8 NCHWc template"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'conv2d_NCHWc_int8':
conv = output.op.input_tensors[0]
packed_data, packed_kernel = conv.op.input_tensors
- if isinstance(packed_data.op, tvm.tensor.ComputeOp) and "pad" in packed_data.op.tag:
+ if isinstance(packed_data.op, tvm.te.ComputeOp) and "pad" in packed_data.op.tag:
pad_data = packed_data
packed_data = pad_data.op.input_tensors[0]
else:
s[packed_data].pragma(s[packed_data].op.axis[0], "debug_skip_region")
s[packed_kernel].pragma(s[packed_kernel].op.axis[0], "debug_skip_region")
else:
- if isinstance(packed_kernel.op, tvm.tensor.ComputeOp) and\
- packed_kernel.name == 'packed_kernel':
+ if isinstance(packed_kernel.op, tvm.te.ComputeOp) and\
+ packed_kernel.name == 'packed_kernel':
# data and kernel are not pre-computed, schedule layout transform here
schedule_injective_from_existing(s, packed_data)
schedule_injective_from_existing(s, packed_kernel)
bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x)
s[output].reorder(bn, bf, by, bx, vn, vf, vy, vx, tn, tf, ty, tx, ni, fi, yi, xi)
- s[output].bind(bn, tvm.thread_axis("blockIdx.z"))
- s[output].bind(bf, tvm.thread_axis("blockIdx.y"))
- s[output].bind(s[output].fuse(by, bx), tvm.thread_axis("blockIdx.x"))
- s[output].bind(vn, tvm.thread_axis("vthread"))
- s[output].bind(vf, tvm.thread_axis("vthread"))
- s[output].bind(vy, tvm.thread_axis("vthread"))
- s[output].bind(vx, tvm.thread_axis("vthread"))
+ s[output].bind(bn, te.thread_axis("blockIdx.z"))
+ s[output].bind(bf, te.thread_axis("blockIdx.y"))
+ s[output].bind(s[output].fuse(by, bx), te.thread_axis("blockIdx.x"))
+ s[output].bind(vn, te.thread_axis("vthread"))
+ s[output].bind(vf, te.thread_axis("vthread"))
+ s[output].bind(vy, te.thread_axis("vthread"))
+ s[output].bind(vx, te.thread_axis("vthread"))
cfg.define_knob("fuse_yx", [0, 1]) # fuse ty,tx or tn,tf
if cfg["fuse_yx"].val:
- s[output].bind(tn, tvm.thread_axis("threadIdx.z"))
- s[output].bind(tf, tvm.thread_axis("threadIdx.y"))
+ s[output].bind(tn, te.thread_axis("threadIdx.z"))
+ s[output].bind(tf, te.thread_axis("threadIdx.y"))
tyx = s[output].fuse(ty, tx)
- s[output].bind(tyx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(tyx, te.thread_axis("threadIdx.x"))
s[conv].compute_at(s[output], tyx)
# number of threads
n_ty = cfg["tile_f"].size[2]
n_tx = cfg["tile_y"].size[2] * cfg["tile_x"].size[2]
else:
- s[output].bind(s[output].fuse(tn, tf), tvm.thread_axis("threadIdx.z"))
- s[output].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[output].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(s[output].fuse(tn, tf), te.thread_axis("threadIdx.z"))
+ s[output].bind(ty, te.thread_axis("threadIdx.y"))
+ s[output].bind(tx, te.thread_axis("threadIdx.x"))
s[conv].compute_at(s[output], tx)
# number of threads
fused, tx = s[load].split(fused, factor=n_tx)
fused, ty = s[load].split(fused, factor=n_ty)
fused, tz = s[load].split(fused, factor=n_tz)
- s[load].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[load].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.z"))
+ s[load].bind(ty, te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
# double buffer
cfg.define_knob('AA_double_buffer', [0, 1])
"""Conv2d transpose template for cuda backend"""
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity
from .. import nn
----------
cfg: ConfigEntity
The config for this template
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
4-D with shape [in_channel, num_filter, filter_height, filter_width]
strides : tuple of two ints
The spatial stride along height and width
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
batch, inp_channels, inp_height, inp_width = get_const_tuple(data.shape)
padding, (kernel_height, kernel_width))
out_width = (inp_width - 1) * stride_width + \
- kernel_width - pad_left - pad_right
+ kernel_width - pad_left - pad_right
pad_left = kernel_width - 1 - pad_left
pad_right = kernel_width - 1 - pad_right
dilated_width = stride_width * (inp_width - 1) + 1
out_height = (inp_height - 1) * stride_height + \
- kernel_height - pad_top - pad_bottom
+ kernel_height - pad_top - pad_bottom
pad_top = kernel_height - 1 - pad_top
pad_bottom = kernel_height - 1 - pad_bottom
dilated_height = stride_height * (inp_height - 1) + 1
# compute pad
- data = tvm.compute(
+ data = te.compute(
(batch, inp_channels,
pad_top + dilated_height + pad_bottom,
pad_left + dilated_width + pad_right),
- lambda n, c, y, x: tvm.if_then_else(
- tvm.all(x >= pad_left,
- x < pad_left + dilated_width,
- tvm.indexmod(x - pad_left, stride_width).equal(0),
- y >= pad_top,
- y < pad_top + dilated_height,
- tvm.indexmod(y - pad_top, stride_height).equal(0)),
+ lambda n, c, y, x: tvm.tir.if_then_else(
+ tvm.tir.all(x >= pad_left,
+ x < pad_left + dilated_width,
+ tvm.tir.indexmod(x - pad_left, stride_width).equal(0),
+ y >= pad_top,
+ y < pad_top + dilated_height,
+ tvm.tir.indexmod(y - pad_top, stride_height).equal(0)),
data[n, c,
- tvm.indexdiv(y - pad_top, stride_height),
- tvm.indexdiv(x - pad_left, stride_width)],
- tvm.const(0., "float32")),
+ tvm.tir.indexdiv(y - pad_top, stride_height),
+ tvm.tir.indexdiv(x - pad_left, stride_width)],
+ tvm.tir.const(0., "float32")),
name='data_pad')
# compute transposed conv
- dc = tvm.reduce_axis((0, inp_channels), name='dc')
- dh = tvm.reduce_axis((0, kernel_height), name='dh')
- dw = tvm.reduce_axis((0, kernel_width), name='dw')
- data_out = tvm.compute(
+ dc = te.reduce_axis((0, inp_channels), name='dc')
+ dh = te.reduce_axis((0, kernel_height), name='dh')
+ dw = te.reduce_axis((0, kernel_width), name='dw')
+ data_out = te.compute(
(batch, out_channels, out_height, out_width),
- lambda b, c, h, w: tvm.sum(
+ lambda b, c, h, w: te.sum(
data[b, dc, h + dh, w + dw].astype(out_dtype) *
kernel[dc,
c,
s: Schedule
The computation schedule for conv2d transpose.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _fallback_schedule(N, F, Y, X):
# pylint: disable=unused-argument
##### space definition end #####
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag:
s[kernel].compute_inline()
if conv.op in s.outputs:
bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x)
s[output].reorder(bn, bf, by, bx, vn, vf, vy, vx, tn, tf, ty, tx, ni, fi, yi, xi)
- s[output].bind(bn, tvm.thread_axis("blockIdx.z"))
- s[output].bind(bf, tvm.thread_axis("blockIdx.y"))
- s[output].bind(s[output].fuse(by, bx), tvm.thread_axis("blockIdx.x"))
- s[output].bind(vn, tvm.thread_axis("vthread"))
- s[output].bind(vf, tvm.thread_axis("vthread"))
- s[output].bind(vy, tvm.thread_axis("vthread"))
- s[output].bind(vx, tvm.thread_axis("vthread"))
+ s[output].bind(bn, te.thread_axis("blockIdx.z"))
+ s[output].bind(bf, te.thread_axis("blockIdx.y"))
+ s[output].bind(s[output].fuse(by, bx), te.thread_axis("blockIdx.x"))
+ s[output].bind(vn, te.thread_axis("vthread"))
+ s[output].bind(vf, te.thread_axis("vthread"))
+ s[output].bind(vy, te.thread_axis("vthread"))
+ s[output].bind(vx, te.thread_axis("vthread"))
cfg.define_knob("fuse_yx", [0, 1]) # fuse ty,tx or tn,tf
if cfg["fuse_yx"].val:
- s[output].bind(tn, tvm.thread_axis("threadIdx.z"))
- s[output].bind(tf, tvm.thread_axis("threadIdx.y"))
+ s[output].bind(tn, te.thread_axis("threadIdx.z"))
+ s[output].bind(tf, te.thread_axis("threadIdx.y"))
tyx = s[output].fuse(ty, tx)
- s[output].bind(s[output].fuse(ty, tx), tvm.thread_axis("threadIdx.x"))
+ s[output].bind(s[output].fuse(ty, tx), te.thread_axis("threadIdx.x"))
s[OL].compute_at(s[output], tyx)
# number of threads
n_ty = cfg["tile_f"].size[2]
n_tx = cfg["tile_y"].size[2] * cfg["tile_x"].size[2]
else:
- s[output].bind(s[output].fuse(tn, tf), tvm.thread_axis("threadIdx.z"))
- s[output].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[output].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(s[output].fuse(tn, tf), te.thread_axis("threadIdx.z"))
+ s[output].bind(ty, te.thread_axis("threadIdx.y"))
+ s[output].bind(tx, te.thread_axis("threadIdx.x"))
s[OL].compute_at(s[output], tx)
# number of threads
tz, fused = s[load].split(fused, nparts=n_tz)
ty, fused = s[load].split(fused, nparts=n_ty)
tx, fused = s[load].split(fused, nparts=n_tx)
- s[load].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[load].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.z"))
+ s[load].bind(ty, te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val)
s[output].pragma(kernel_scope, 'unroll_explicit', cfg['unroll_explicit'].val)
import logging
import tvm
+from tvm import te
from tvm import autotvm
from .. import nn
# transform kernel
if not pre_computed:
- r_kh = tvm.reduce_axis((0, KH), name='r_kh')
- r_kw = tvm.reduce_axis((0, KW), name='r_kw')
- kernel_pack = tvm.compute((alpha, alpha, CI, CO), lambda eps, nu, ci, co:
- tvm.sum(kernel[co][ci][r_kh][r_kw] *
- G[eps][r_kh] * G[nu][r_kw],
- axis=[r_kh, r_kw]), name='kernel_pack')
+ r_kh = te.reduce_axis((0, KH), name='r_kh')
+ r_kw = te.reduce_axis((0, KW), name='r_kw')
+ kernel_pack = te.compute((alpha, alpha, CI, CO), lambda eps, nu, ci, co:
+ te.sum(kernel[co][ci][r_kh][r_kw] *
+ G[eps][r_kh] * G[nu][r_kw],
+ axis=[r_kh, r_kw]), name='kernel_pack')
else:
kernel_pack = kernel
- idxdiv = tvm.indexdiv
- idxmod = tvm.indexmod
+ idxdiv = tvm.tir.indexdiv
+ idxmod = tvm.tir.indexmod
# pack input tile
- input_tile = tvm.compute((CI, P, alpha, alpha), lambda c, p, eps, nu:
- data_pad[idxdiv(p, (nH * nW))][c][idxmod(idxdiv(p, nW), nH) * m + eps]
- [idxmod(p, nW) * m + nu], name='d')
+ input_tile = te.compute((CI, P, alpha, alpha), lambda c, p, eps, nu:
+ data_pad[idxdiv(p, (nH * nW))][c][idxmod(idxdiv(p, nW), nH) * m + eps]
+ [idxmod(p, nW) * m + nu], name='d')
# transform data
- r_a = tvm.reduce_axis((0, alpha), 'r_a')
- r_b = tvm.reduce_axis((0, alpha), 'r_a')
- data_pack = tvm.compute((alpha, alpha, CI, P), lambda eps, nu, ci, p:
- tvm.sum(input_tile[ci][p][r_a][r_b] * B[r_a][eps] * B[r_b][nu],
- axis=[r_a, r_b]), name='data_pack')
+ r_a = te.reduce_axis((0, alpha), 'r_a')
+ r_b = te.reduce_axis((0, alpha), 'r_a')
+ data_pack = te.compute((alpha, alpha, CI, P), lambda eps, nu, ci, p:
+ te.sum(input_tile[ci][p][r_a][r_b] * B[r_a][eps] * B[r_b][nu],
+ axis=[r_a, r_b]), name='data_pack')
# do batch gemm
- ci = tvm.reduce_axis((0, CI), name='ci')
- bgemm = tvm.compute((alpha, alpha, CO, P), lambda eps, nu, co, p:
- tvm.sum(kernel_pack[eps][nu][ci][co] *
- data_pack[eps][nu][ci][p],
- axis=[ci]), name='bgemm')
+ ci = te.reduce_axis((0, CI), name='ci')
+ bgemm = te.compute((alpha, alpha, CO, P), lambda eps, nu, co, p:
+ te.sum(kernel_pack[eps][nu][ci][co] *
+ data_pack[eps][nu][ci][p],
+ axis=[ci]), name='bgemm')
# inverse transform
- r_a = tvm.reduce_axis((0, alpha), 'r_a')
- r_b = tvm.reduce_axis((0, alpha), 'r_a')
- inverse = tvm.compute((CO, P, m, m), lambda co, p, vh, vw:
- tvm.sum(bgemm[r_a][r_b][co][p] * A[r_a][vh] * A[r_b][vw],
- axis=[r_a, r_b]), name='inverse')
+ r_a = te.reduce_axis((0, alpha), 'r_a')
+ r_b = te.reduce_axis((0, alpha), 'r_a')
+ inverse = te.compute((CO, P, m, m), lambda co, p, vh, vw:
+ te.sum(bgemm[r_a][r_b][co][p] * A[r_a][vh] * A[r_b][vw],
+ axis=[r_a, r_b]), name='inverse')
# output
- output = tvm.compute((N, CO, H, W), lambda n, co, h, w:
- inverse[co,
- n * nH * nW + idxdiv(h, m) * nW + idxdiv(w, m),
- idxmod(h, m),
- idxmod(w, m)],
- name='output', tag='conv2d_nchw_winograd')
+ output = te.compute((N, CO, H, W), lambda n, co, h, w:
+ inverse[co,
+ n * nH * nW + idxdiv(h, m) * nW + idxdiv(w, m),
+ idxmod(h, m),
+ idxmod(w, m)],
+ name='output', tag='conv2d_nchw_winograd')
cfg.add_flop(2 * N * CO * H * W * CI * KH * KW)
return output
fused = s[data_pack].fuse(c, p)
bb, tt = s[data_pack].split(fused, 128)
s[data_pack].reorder(bb, tt, pi, eps, nu)
- s[data_pack].bind(bb, tvm.thread_axis("blockIdx.x"))
- s[data_pack].bind(tt, tvm.thread_axis("threadIdx.x"))
+ s[data_pack].bind(bb, te.thread_axis("blockIdx.x"))
+ s[data_pack].bind(tt, te.thread_axis("threadIdx.x"))
s[data_l].compute_at(s[data_pack], pi)
s[input_tile].compute_at(s[data_pack], pi)
fused = s[kernel_pack].fuse(ci, co)
bb, tt = s[kernel_pack].split(fused, 128)
s[kernel_pack].reorder(bb, tt, eps, nu, r_a, r_b)
- s[kernel_pack].bind(bb, tvm.thread_axis("blockIdx.x"))
- s[kernel_pack].bind(tt, tvm.thread_axis("threadIdx.x"))
+ s[kernel_pack].bind(bb, te.thread_axis("blockIdx.x"))
+ s[kernel_pack].bind(tt, te.thread_axis("threadIdx.x"))
else:
kernel = kernel_pack
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
##### space definition begin #####
bz, vz, tz, zi = cfg["tile_b"].apply(s, C, b)
by, vy, ty, yi = cfg["tile_y"].apply(s, C, y)
bx, vx, tx, xi = cfg["tile_x"].apply(s, C, x)
- s[C].bind(bz, tvm.thread_axis("blockIdx.z"))
- s[C].bind(by, tvm.thread_axis("blockIdx.y"))
- s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[C].bind(vz, tvm.thread_axis("vthread"))
- s[C].bind(vy, tvm.thread_axis("vthread"))
- s[C].bind(vx, tvm.thread_axis("vthread"))
- s[C].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[C].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[C].bind(bz, te.thread_axis("blockIdx.z"))
+ s[C].bind(by, te.thread_axis("blockIdx.y"))
+ s[C].bind(bx, te.thread_axis("blockIdx.x"))
+ s[C].bind(vz, te.thread_axis("vthread"))
+ s[C].bind(vy, te.thread_axis("vthread"))
+ s[C].bind(vx, te.thread_axis("vthread"))
+ s[C].bind(tz, te.thread_axis("threadIdx.z"))
+ s[C].bind(ty, te.thread_axis("threadIdx.y"))
+ s[C].bind(tx, te.thread_axis("threadIdx.x"))
s[C].reorder(bgemm_scope, bz, by, bx, vz, vy, vx, tz, ty, tx, zi, yi, xi)
# tile reduction axes
fused, tx = s[load].split(fused, cfg["tile_x"].size[2])
fused, ty = s[load].split(fused, cfg["tile_y"].size[2])
fused, tz = s[load].split(fused, cfg["tile_b"].size[2])
- s[load].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[load].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.z"))
+ s[load].bind(ty, te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
s[C].pragma(bgemm_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val)
s[C].pragma(bgemm_scope, 'unroll_explicit', cfg['unroll_explicit'].val)
fused = s[output].fuse(n, co, ho, wo)
bb, tt = s[output].split(fused, 128)
- s[output].bind(bb, tvm.thread_axis("blockIdx.x"))
- s[output].bind(tt, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(bb, te.thread_axis("blockIdx.x"))
+ s[output].bind(tt, te.thread_axis("threadIdx.x"))
if OL is not None:
s[OL].compute_at(s[output], tt)
@autotvm.register_topi_schedule("conv2d_nchw_winograd.cuda")
def schedule_conv2d_nchw_winograd(cfg, outs):
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if 'conv2d_nchw_winograd' in op.tag:
@autotvm.register_topi_schedule("conv2d_nchw_winograd_without_weight_transform.cuda")
def schedule_conv2d_nchw_winograd_without_weight_transform(cfg, outs):
"""TOPI schedule callback"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if 'conv2d_nchw_winograd' in op.tag:
# under the License.
# pylint: disable=invalid-name, unused-argument
"""Compute definition for conv3d with cuda backend"""
-import tvm
+from tvm import te
from tvm import autotvm
from tvm.contrib import cudnn
cfg: ConfigEntity
The config for this template
- data : tvm.Tensor
+ data : tvm.te.Tensor
5-D with shape [batch, in_channel, in_depth, in_height, in_width]
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width]
strides : int or a list/tuple of three ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
5-D with shape [batch, out_channel, out_depth, out_height, out_width]
"""
return nn.conv3d_ncdhw(data, kernel, strides, padding, dilation, out_dtype)
s: Schedule
The computation schedule for conv2d.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'conv3d_ncdhw':
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
5-D with shape [batch, in_depth, in_height, in_width, in_channel]
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
5-D with shape [filter_depth, filter_height, filter_width, in_channel, num_filter]
stride : int or a list/tuple of three ints
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
5-D with shape [batch, out_depth, out_height, out_width, out_channel]
"""
return nn.conv3d_ndhwc(data, kernel, strides, padding, dilation, out_dtype)
s: Schedule
The computation schedule for conv2d.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'conv3d_ndhwc':
cfg: ConfigEntity
The config for this template
- data : tvm.Tensor
+ data : tvm.te.Tensor
5-D with shape [batch, in_channel, in_depth, in_height, in_width]
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width]
strides : int or a list/tuple of three ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
5-D with shape [batch, out_channel, out_depth, out_height, out_width]
"""
if layout == 'NCDHW':
# pylint: disable=invalid-name
"""The templates for cuda conv3d operators"""
import tvm
+from tvm import te
from tvm import autotvm
from ..util import get_const_tuple
pad_data, kernel = s[conv].op.input_tensors
s[pad_data].compute_inline()
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag:
s[kernel].compute_inline()
if conv.op in s.outputs:
bf = s[output].fuse(n, bf)
s[output].reorder(bf, bd, by, bx, vf, vd, vy, vx, tf, td, ty, tx, fi, di, yi, xi)
- s[output].bind(bf, tvm.thread_axis("blockIdx.z"))
- s[output].bind(s[output].fuse(bd, by), tvm.thread_axis("blockIdx.y"))
- s[output].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[output].bind(vf, tvm.thread_axis("vthread"))
- s[output].bind(vd, tvm.thread_axis("vthread"))
- s[output].bind(vy, tvm.thread_axis("vthread"))
- s[output].bind(vx, tvm.thread_axis("vthread"))
- s[output].bind(s[output].fuse(td, tf), tvm.thread_axis("threadIdx.z"))
- s[output].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[output].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(bf, te.thread_axis("blockIdx.z"))
+ s[output].bind(s[output].fuse(bd, by), te.thread_axis("blockIdx.y"))
+ s[output].bind(bx, te.thread_axis("blockIdx.x"))
+ s[output].bind(vf, te.thread_axis("vthread"))
+ s[output].bind(vd, te.thread_axis("vthread"))
+ s[output].bind(vy, te.thread_axis("vthread"))
+ s[output].bind(vx, te.thread_axis("vthread"))
+ s[output].bind(s[output].fuse(td, tf), te.thread_axis("threadIdx.z"))
+ s[output].bind(ty, te.thread_axis("threadIdx.y"))
+ s[output].bind(tx, te.thread_axis("threadIdx.x"))
s[OL].compute_at(s[output], tx)
# tile reduction axes
td, fused = s[load].split(fused, nparts=cfg["tile_d"].size[2])
ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2])
tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2])
- s[load].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[load].bind(s[load].fuse(td, ty), tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.z"))
+ s[load].bind(s[load].fuse(td, ty), te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
# unroll
s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val)
# pylint: disable=invalid-name,unused-argument
"""Schedule template of deformable conv2d with cuda backend"""
import tvm
+from tvm import te
from tvm import autotvm
from .. import nn
from ..util import traverse_inline
s: Schedule
The computation schedule for conv2d.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'deformable_conv2d_nchw':
data_deform, kernel = s[conv].op.input_tensors
s[data_deform].compute_inline()
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag:
s[kernel].compute_inline()
if conv.op in s.outputs:
bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x)
bf = s[output].fuse(n, bf)
- s[output].bind(bf, tvm.thread_axis("blockIdx.z"))
- s[output].bind(by, tvm.thread_axis("blockIdx.y"))
- s[output].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[output].bind(vf, tvm.thread_axis("vthread"))
- s[output].bind(vy, tvm.thread_axis("vthread"))
- s[output].bind(vx, tvm.thread_axis("vthread"))
- s[output].bind(tf, tvm.thread_axis("threadIdx.z"))
- s[output].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[output].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(bf, te.thread_axis("blockIdx.z"))
+ s[output].bind(by, te.thread_axis("blockIdx.y"))
+ s[output].bind(bx, te.thread_axis("blockIdx.x"))
+ s[output].bind(vf, te.thread_axis("vthread"))
+ s[output].bind(vy, te.thread_axis("vthread"))
+ s[output].bind(vx, te.thread_axis("vthread"))
+ s[output].bind(tf, te.thread_axis("threadIdx.z"))
+ s[output].bind(ty, te.thread_axis("threadIdx.y"))
+ s[output].bind(tx, te.thread_axis("threadIdx.x"))
s[output].reorder(bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi)
s[OL].compute_at(s[output], tx)
tz, fused = s[load].split(fused, nparts=cfg["tile_f"].size[2])
ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2])
tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2])
- s[load].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[load].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.z"))
+ s[load].bind(ty, te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
# unroll
s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val)
# under the License.
# pylint: disable=invalid-name, unused-argument
"""Schedule for dense operator"""
-from __future__ import absolute_import as _abs
import logging
-import tvm
+from tvm import te
import tvm.autotvm as autotvm
from tvm.autotvm.task.space import SplitEntity
from tvm.contrib import cublas
matmul = cublas.matmul(data, weight, False, True)
cfg.add_flop(batch * in_dim * out_dim * 2)
if bias is not None:
- matmul = tvm.compute((batch, out_dim),
- lambda i, j: matmul[i, j] + bias[j],
- tag=tag.BROADCAST)
+ matmul = te.compute((batch, out_dim),
+ lambda i, j: matmul[i, j] + bias[j],
+ tag=tag.BROADCAST)
return matmul
@autotvm.register_topi_schedule("dense_small_batch.cuda")
def schedule_dense_small_batch(cfg, outs):
"""Schedule float32/64 dense with small batch size"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'dense':
else:
Out = s.outputs[0].output(0)
s[C].compute_at(s[Out], s[Out].op.axis[1])
- s[Out].bind(s[Out].op.axis[0], tvm.thread_axis("blockIdx.y"))
- s[Out].bind(s[Out].op.axis[1], tvm.thread_axis("blockIdx.x"))
+ s[Out].bind(s[Out].op.axis[0], te.thread_axis("blockIdx.y"))
+ s[Out].bind(s[Out].op.axis[1], te.thread_axis("blockIdx.x"))
tx = s[C].op.reduce_axis[0]
- thread_x = tvm.thread_axis("threadIdx.x")
+ thread_x = te.thread_axis("threadIdx.x")
s[C].bind(tx, thread_x)
s[CF].compute_at(s[C], tx)
s[C].set_store_predicate(thread_x.var.equal(0))
@autotvm.register_topi_schedule("dense_large_batch.cuda")
def schedule_dense_large_batch(cfg, outs):
"""Schedule float32/64 dense with large batch size"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'dense':
s[CC].compute_at(s[C], tx)
# Binding
- s[C].bind(by, tvm.thread_axis("blockIdx.y"))
- s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[C].bind(tyz, tvm.thread_axis("vthread"))
- s[C].bind(txz, tvm.thread_axis("vthread"))
- s[C].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[C].bind(by, te.thread_axis("blockIdx.y"))
+ s[C].bind(bx, te.thread_axis("blockIdx.x"))
+ s[C].bind(tyz, te.thread_axis("vthread"))
+ s[C].bind(txz, te.thread_axis("vthread"))
+ s[C].bind(ty, te.thread_axis("threadIdx.y"))
+ s[C].bind(tx, te.thread_axis("threadIdx.x"))
# Split reduction
yo, xo = CC.op.axis
ty, _ = s[AA].split(s[AA].op.axis[0], nparts=num_thread_x)
_, xi = s[AA].split(s[AA].op.axis[1], factor=num_thread_x * 4)
tx, xi = s[AA].split(xi, nparts=num_thread_x)
- s[AA].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[AA].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[AA].bind(ty, te.thread_axis("threadIdx.y"))
+ s[AA].bind(tx, te.thread_axis("threadIdx.x"))
s[AA].double_buffer()
# Schedule for B' shared memory load
ty, _ = s[BB].split(s[BB].op.axis[0], nparts=num_thread_y)
_, xi = s[BB].split(s[BB].op.axis[1], factor=num_thread_y * 4)
tx, xi = s[BB].split(xi, nparts=num_thread_y)
- s[BB].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[BB].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[BB].bind(ty, te.thread_axis("threadIdx.y"))
+ s[BB].bind(tx, te.thread_axis("threadIdx.x"))
s[BB].double_buffer()
batch, in_dim = get_const_tuple(data.shape)
out_dim, _ = get_const_tuple(weight.shape)
- k = tvm.reduce_axis((0, in_dim), name='k')
+ k = te.reduce_axis((0, in_dim), name='k')
- matmul = tvm.compute((batch, out_dim),
- lambda i, j: tvm.sum(data[i, k].astype(out_dtype) *
- weight[j, k].astype(out_dtype), axis=[k]),
- tag="dense_int8")
+ matmul = te.compute((batch, out_dim),
+ lambda i, j: te.sum(data[i, k].astype(out_dtype) *
+ weight[j, k].astype(out_dtype), axis=[k]),
+ tag="dense_int8")
cfg.add_flop(batch * in_dim * out_dim * 2)
if bias is not None:
- matmul = tvm.compute((batch, out_dim),
- lambda i, j: matmul[i, j] + bias[j].astype(out_dtype),
- tag=tag.BROADCAST)
+ matmul = te.compute((batch, out_dim),
+ lambda i, j: matmul[i, j] + bias[j].astype(out_dtype),
+ tag=tag.BROADCAST)
cfg.add_flop(batch * out_dim)
return matmul
@autotvm.register_topi_schedule("dense_int8.cuda")
def schedule_dense_int8(cfg, outs):
"""Dense schedule for int8 on CUDA"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if "dense_int8" in op.tag:
bx, vx, tx, xi = cfg['tile_x'].apply(s, output, x)
s[output].reorder(by, bx, vy, vx, ty, tx, yi, xi)
- s[output].bind(by, tvm.thread_axis('blockIdx.y'))
- s[output].bind(bx, tvm.thread_axis('blockIdx.x'))
- s[output].bind(vy, tvm.thread_axis('vthread'))
- s[output].bind(vx, tvm.thread_axis('vthread'))
- s[output].bind(ty, tvm.thread_axis('threadIdx.y'))
- s[output].bind(tx, tvm.thread_axis('threadIdx.x'))
+ s[output].bind(by, te.thread_axis('blockIdx.y'))
+ s[output].bind(bx, te.thread_axis('blockIdx.x'))
+ s[output].bind(vy, te.thread_axis('vthread'))
+ s[output].bind(vx, te.thread_axis('vthread'))
+ s[output].bind(ty, te.thread_axis('threadIdx.y'))
+ s[output].bind(tx, te.thread_axis('threadIdx.x'))
n_ty = cfg['tile_y'].size[2]
n_tx = cfg['tile_x'].size[2]
fused, tx = s[load].split(fused, factor=n_tx)
fused, ty = s[load].split(fused, factor=n_ty)
- s[load].bind(tx, tvm.thread_axis('threadIdx.x'))
- s[load].bind(ty, tvm.thread_axis('threadIdx.y'))
+ s[load].bind(tx, te.thread_axis('threadIdx.x'))
+ s[load].bind(ty, te.thread_axis('threadIdx.y'))
s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val)
s[output].pragma(kernel_scope, 'unroll_explicit', False)
# pylint: disable=invalid-name, unused-argument
"""Schedule for depthwise_conv2d with auto fusion"""
import tvm
+from tvm import te
from tvm import autotvm
from ..util import traverse_inline
from .. import tag
s: Schedule
The computation schedule for depthwise_conv2d nchw.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'depthwise_conv2d_nchw':
##### space definition end #####
s[pad_data].compute_inline()
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag:
s[kernel].compute_inline()
if conv.op in s.outputs:
kernel_scope, n = s[output].split(n, nparts=1)
bf = s[output].fuse(n, bf)
- s[output].bind(bf, tvm.thread_axis("blockIdx.z"))
- s[output].bind(by, tvm.thread_axis("blockIdx.y"))
- s[output].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[output].bind(vf, tvm.thread_axis("vthread"))
- s[output].bind(vy, tvm.thread_axis("vthread"))
- s[output].bind(vx, tvm.thread_axis("vthread"))
- s[output].bind(tf, tvm.thread_axis("threadIdx.z"))
- s[output].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[output].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(bf, te.thread_axis("blockIdx.z"))
+ s[output].bind(by, te.thread_axis("blockIdx.y"))
+ s[output].bind(bx, te.thread_axis("blockIdx.x"))
+ s[output].bind(vf, te.thread_axis("vthread"))
+ s[output].bind(vy, te.thread_axis("vthread"))
+ s[output].bind(vx, te.thread_axis("vthread"))
+ s[output].bind(tf, te.thread_axis("threadIdx.z"))
+ s[output].bind(ty, te.thread_axis("threadIdx.y"))
+ s[output].bind(tx, te.thread_axis("threadIdx.x"))
s[output].reorder(bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi)
s[OL].compute_at(s[output], tx)
fused, tx = s[load].split(fused, cfg["tile_x"].size[2])
fused, ty = s[load].split(fused, cfg["tile_y"].size[2])
fused, tz = s[load].split(fused, cfg["tile_f"].size[2])
- s[load].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[load].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.z"))
+ s[load].bind(ty, te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val)
s[output].pragma(kernel_scope, 'unroll_explicit', cfg['unroll_explicit'].val)
s: Schedule
The computation schedule for depthwise_conv2d nhwc.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(temp, Filter, DepthwiseConv2d):
s[temp].compute_inline()
Output = outs[0].op.output(0)
s[DepthwiseConv2d].set_scope("local")
- block_x = tvm.thread_axis("blockIdx.x")
- thread_x = tvm.thread_axis("threadIdx.x")
+ block_x = te.thread_axis("blockIdx.x")
+ thread_x = te.thread_axis("threadIdx.x")
b, h, w, c = s[Output].op.axis
# num_thread here could be 728, it is larger than cuda.max_num_threads
- num_thread = tvm.ir_pass.Simplify(temp.shape[3]).value
+ num_thread = tvm.tir.ir_pass.Simplify(temp.shape[3]).value
target = tvm.target.Target.current()
if target and (target.target_name not in ["cuda", "nvptx"]):
num_thread = target.max_num_threads
if OP not in s.outputs:
s[OP].compute_inline()
for tensor in OP.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
# schedule depthwise_conv2d
if OP.tag == 'depthwise_conv2d_nhwc':
PaddedInput = OP.input_tensors[0]
Filter = OP.input_tensors[1]
- if isinstance(Filter.op, tvm.tensor.ComputeOp) and 'dilate' in Filter.op.tag:
+ if isinstance(Filter.op, tvm.te.ComputeOp) and 'dilate' in Filter.op.tag:
s[Filter].compute_inline()
DepthwiseConv2d = OP.output(0)
_schedule(PaddedInput, Filter, DepthwiseConv2d)
The computation schedule for depthwise_conv2d backward
wrt input with layout nhwc.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(Padded_out_grad, In_grad):
s[Padded_out_grad].compute_inline()
- block_x = tvm.thread_axis("blockIdx.x")
- thread_x = tvm.thread_axis("threadIdx.x")
+ block_x = te.thread_axis("blockIdx.x")
+ thread_x = te.thread_axis("threadIdx.x")
_, h, w, c = In_grad.op.axis
fused_hwc = s[In_grad].fuse(h, w, c)
The computation schedule for depthwise_conv2d backward
wrt weight with layout nhwc.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(Weight_grad):
- block_x = tvm.thread_axis("blockIdx.x")
- thread_y = tvm.thread_axis("threadIdx.y")
- thread_x = tvm.thread_axis("threadIdx.x")
+ block_x = te.thread_axis("blockIdx.x")
+ thread_y = te.thread_axis("threadIdx.y")
+ thread_x = te.thread_axis("threadIdx.x")
db, dh, dw = Weight_grad.op.reduce_axis
# pylint: disable=invalid-name
"""The template for cuda group_conv2d_nchw"""
import tvm
+from tvm import te
from tvm import autotvm
from .injective import schedule_injective_from_existing
s: Schedule
The computation schedule for group conv2d.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == "group_conv2d_nchw":
bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x)
s[output].reorder(bn, bg, bf, by, bx, vn, vg, vf, vy, vx, tn, tf, ty, tx, ni, fi, yi, xi)
- s[output].bind(bn, tvm.thread_axis("blockIdx.z"))
- s[output].bind(s[output].fuse(bg, bf), tvm.thread_axis("blockIdx.y"))
- s[output].bind(s[output].fuse(by, bx), tvm.thread_axis("blockIdx.x"))
- s[output].bind(vn, tvm.thread_axis("vthread"))
- s[output].bind(vg, tvm.thread_axis("vthread"))
- s[output].bind(vf, tvm.thread_axis("vthread"))
- s[output].bind(vy, tvm.thread_axis("vthread"))
- s[output].bind(vx, tvm.thread_axis("vthread"))
+ s[output].bind(bn, te.thread_axis("blockIdx.z"))
+ s[output].bind(s[output].fuse(bg, bf), te.thread_axis("blockIdx.y"))
+ s[output].bind(s[output].fuse(by, bx), te.thread_axis("blockIdx.x"))
+ s[output].bind(vn, te.thread_axis("vthread"))
+ s[output].bind(vg, te.thread_axis("vthread"))
+ s[output].bind(vf, te.thread_axis("vthread"))
+ s[output].bind(vy, te.thread_axis("vthread"))
+ s[output].bind(vx, te.thread_axis("vthread"))
cfg.define_knob("fuse_yx", [0, 1]) # fuse ty,tx or tn,tf
if cfg["fuse_yx"].val:
- s[output].bind(tn, tvm.thread_axis("threadIdx.z"))
- s[output].bind(tf, tvm.thread_axis("threadIdx.y"))
+ s[output].bind(tn, te.thread_axis("threadIdx.z"))
+ s[output].bind(tf, te.thread_axis("threadIdx.y"))
tyx = s[output].fuse(ty, tx)
- s[output].bind(tyx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(tyx, te.thread_axis("threadIdx.x"))
s[OL].compute_at(s[output], tyx)
# number of threads
n_ty = cfg["tile_f"].size[2]
n_tx = cfg["tile_y"].size[2] * cfg["tile_x"].size[2]
else:
- s[output].bind(s[output].fuse(tn, tf), tvm.thread_axis("threadIdx.z"))
- s[output].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[output].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(s[output].fuse(tn, tf), te.thread_axis("threadIdx.z"))
+ s[output].bind(ty, te.thread_axis("threadIdx.y"))
+ s[output].bind(tx, te.thread_axis("threadIdx.x"))
s[OL].compute_at(s[output], tx)
# number of threads
fused, tx = s[load].split(fused, factor=n_tx)
fused, ty = s[load].split(fused, factor=n_ty)
fused, tz = s[load].split(fused, factor=n_tz)
- s[load].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[load].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.z"))
+ s[load].bind(ty, te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
# unroll
s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val)
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width] or
5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block]
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
4-D with shape [num_filter, in_channel // groups, filter_height, filter_width] or
6-D with shape [num_filter_chunk, in_channel_chunk // groups, filter_height,
filter_width, num_filter_block, in_channel_block]
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
5-D with shape [batch, out_channel, out_height, out_width, out_channel_block]
"""
ic_block_factor = 4
assert out_channels % oc_block_factor == 0, \
"Number of output channels per group must divide {}".format(oc_block_factor)
- packed_data = tvm.compute((batch, channels // ic_block_factor, height, width,
- ic_block_factor),
- lambda n, c, h, w, vc: data[n, c*ic_block_factor + vc, h, w],
- name="packed_data")
- packed_kernel = tvm.compute(
+ packed_data = te.compute((batch, channels // ic_block_factor, height, width,
+ ic_block_factor),
+ lambda n, c, h, w, vc: data[n, c*ic_block_factor + vc, h, w],
+ name="packed_data")
+ packed_kernel = te.compute(
(out_channels // oc_block_factor, in_channels // ic_block_factor, kernel_h, kernel_w,
oc_block_factor, ic_block_factor),
lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block:
oshape = (batch, oc_chunk, out_height, out_width, oc_block)
- icc = tvm.reduce_axis((0, ic_chunk // groups), name='ic_chunk')
- icb = tvm.reduce_axis((0, ic_block_factor), name='ic_block')
- kh = tvm.reduce_axis((0, kernel_h), name='kh')
- kw = tvm.reduce_axis((0, kernel_w), name='kw')
+ icc = te.reduce_axis((0, ic_chunk // groups), name='ic_chunk')
+ icb = te.reduce_axis((0, ic_block_factor), name='ic_block')
+ kh = te.reduce_axis((0, kernel_h), name='kh')
+ kw = te.reduce_axis((0, kernel_w), name='kw')
# NOTE(kumasento): explanation of this snippet -
# oc_chunk//groups and ic_chunk//groups give you the number of blocks,
#
# Compared with a normal convolution, group convolution only sums
# input channels from the group that an output channel resides in.
- conv = tvm.compute(
+ conv = te.compute(
oshape, lambda n, occ, oh, ow, ocb:
- tvm.sum(pad_data[n, occ//(oc_chunk//groups)*(ic_chunk//groups)+icc,
- oh*stride_h+kh*dilation_h, ow*stride_w+kw*dilation_w, icb]
- .astype('int32') *
- packed_kernel[occ, icc, kh, kw, ocb, icb].astype('int32'),
- axis=[icc, kh, kw, icb]))
+ te.sum(pad_data[n, occ//(oc_chunk//groups)*(ic_chunk//groups)+icc,
+ oh*stride_h+kh*dilation_h, ow*stride_w+kw*dilation_w, icb]
+ .astype('int32') *
+ packed_kernel[occ, icc, kh, kw, ocb, icb].astype('int32'),
+ axis=[icc, kh, kw, icb]))
# Type conversion
- output = tvm.compute(oshape, lambda *index: conv(*index).astype(out_dtype),
- tag='group_conv2d_NCHWc_int8')
+ output = te.compute(oshape, lambda *index: conv(*index).astype(out_dtype),
+ tag='group_conv2d_NCHWc_int8')
num_flop = batch * oc_chunk * oc_block * out_height * out_width * \
- ic_chunk * ic_block * kernel_h * kernel_w * 2 // groups
+ ic_chunk * ic_block * kernel_h * kernel_w * 2 // groups
cfg.add_flop(num_flop)
return output
s: Schedule
The computation schedule for group conv2d.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == "group_conv2d_NCHWc_int8":
conv = output.op.input_tensors[0]
packed_data, packed_kernel = conv.op.input_tensors
- if isinstance(packed_data.op, tvm.tensor.ComputeOp) and "pad" in packed_data.op.tag:
+ if isinstance(packed_data.op, tvm.te.ComputeOp) and "pad" in packed_data.op.tag:
pad_data = packed_data
packed_data = pad_data.op.input_tensors[0]
else:
s[packed_kernel].pragma(
s[packed_kernel].op.axis[0], "debug_skip_region")
else:
- if isinstance(packed_kernel.op, tvm.tensor.ComputeOp) and \
+ if isinstance(packed_kernel.op, tvm.te.ComputeOp) and \
packed_kernel.name == 'packed_kernel':
# data and kernel are not pre-computed, schedule layout transform here
schedule_injective_from_existing(s, packed_data)
kernel_scope, n = s[output].split(n, nparts=1)
g, f = s[output].split(f, nparts=groups)
- s[output].bind(n, tvm.thread_axis('blockIdx.z'))
+ s[output].bind(n, te.thread_axis('blockIdx.z'))
bn, vn, tn, ni = cfg["tile_n"].apply(s, output, n)
bg, vg = cfg["tile_g"].apply(s, output, g)
bf, vf, tf, fi = cfg["tile_f"].apply(s, output, f)
s[output].reorder(bn, bg, bf, by, bx, vn, vg, vf, vy,
vx, tn, tf, ty, tx, ni, fi, yi, xi)
- s[output].bind(bn, tvm.thread_axis("blockIdx.z"))
- s[output].bind(s[output].fuse(bg, bf), tvm.thread_axis("blockIdx.y"))
- s[output].bind(s[output].fuse(by, bx), tvm.thread_axis("blockIdx.x"))
- s[output].bind(vn, tvm.thread_axis("vthread"))
- s[output].bind(vg, tvm.thread_axis("vthread"))
- s[output].bind(vf, tvm.thread_axis("vthread"))
- s[output].bind(vy, tvm.thread_axis("vthread"))
- s[output].bind(vx, tvm.thread_axis("vthread"))
+ s[output].bind(bn, te.thread_axis("blockIdx.z"))
+ s[output].bind(s[output].fuse(bg, bf), te.thread_axis("blockIdx.y"))
+ s[output].bind(s[output].fuse(by, bx), te.thread_axis("blockIdx.x"))
+ s[output].bind(vn, te.thread_axis("vthread"))
+ s[output].bind(vg, te.thread_axis("vthread"))
+ s[output].bind(vf, te.thread_axis("vthread"))
+ s[output].bind(vy, te.thread_axis("vthread"))
+ s[output].bind(vx, te.thread_axis("vthread"))
cfg.define_knob("fuse_yx", [0, 1]) # fuse ty,tx or tn,tf
if cfg["fuse_yx"].val:
- s[output].bind(tn, tvm.thread_axis("threadIdx.z"))
- s[output].bind(tf, tvm.thread_axis("threadIdx.y"))
+ s[output].bind(tn, te.thread_axis("threadIdx.z"))
+ s[output].bind(tf, te.thread_axis("threadIdx.y"))
tyx = s[output].fuse(ty, tx)
- s[output].bind(tyx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(tyx, te.thread_axis("threadIdx.x"))
s[conv].compute_at(s[output], tyx)
# number of threads
n_ty = cfg["tile_f"].size[2]
n_tx = cfg["tile_y"].size[2] * cfg["tile_x"].size[2]
else:
- s[output].bind(tn, tvm.thread_axis("threadIdx.z"))
- s[output].bind(s[output].fuse(tn, tf), tvm.thread_axis("threadIdx.z"))
- s[output].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[output].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(tn, te.thread_axis("threadIdx.z"))
+ s[output].bind(s[output].fuse(tn, tf), te.thread_axis("threadIdx.z"))
+ s[output].bind(ty, te.thread_axis("threadIdx.y"))
+ s[output].bind(tx, te.thread_axis("threadIdx.x"))
s[conv].compute_at(s[output], tx)
# number of threads
fused, tx = s[load].split(fused, factor=n_tx)
fused, ty = s[load].split(fused, factor=n_ty)
fused, tz = s[load].split(fused, factor=n_tz)
- s[load].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[load].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.z"))
+ s[load].bind(ty, te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
# double buffer
cfg.define_knob('AA_double_buffer', [0, 1])
# pylint: disable=invalid-name, unused-variable,
"""Schedule for composition of injective operator"""
import tvm
+from tvm import te
from .. import util
def schedule_injective_from_existing(sch, out):
xo, xi = sch[out].split(fused, factor=num_thread * max_block)
bx, tx = sch[out].split(xi, factor=num_thread)
sch[out].reorder(bx, tx, xo)
- sch[out].bind(bx, tvm.thread_axis("blockIdx.x"))
- sch[out].bind(tx, tvm.thread_axis("threadIdx.x"))
+ sch[out].bind(bx, te.thread_axis("blockIdx.x"))
+ sch[out].bind(tx, te.thread_axis("threadIdx.x"))
else:
bx, tx = sch[out].split(fused, factor=num_thread)
- sch[out].bind(tx, tvm.thread_axis("threadIdx.x"))
- sch[out].bind(bx, tvm.thread_axis("blockIdx.x"))
+ sch[out].bind(tx, te.thread_axis("threadIdx.x"))
+ sch[out].bind(bx, te.thread_axis("blockIdx.x"))
return sch
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
- tvm.schedule.AutoInlineInjective(s)
+ tvm.te.schedule.AutoInlineInjective(s)
for out in outs:
if not util.is_empty_shape(out.shape):
schedule_injective_from_existing(s, out)
"""Non-maximum suppression operator"""
import math
import tvm
+from tvm import te
-from tvm import api
-from tvm.intrin import if_then_else
+from tvm.tir import if_then_else
from .sort import argsort
from .. import tag
def cuda_atomic_add_rule(op):
if op.dtype == "float32":
- return tvm.call_pure_extern("float32", "atomicAdd", op.args[0], op.args[1])
+ return tvm.tir.call_pure_extern("float32", "atomicAdd", op.args[0], op.args[1])
if op.dtype == "float64":
- return tvm.call_pure_extern("float64", "atomicAdd", op.args[0], op.args[1])
+ return tvm.tir.call_pure_extern("float64", "atomicAdd", op.args[0], op.args[1])
if op.dtype == "int32":
- return tvm.call_pure_extern("int32", "atomicAdd", op.args[0], op.args[1])
+ return tvm.tir.call_pure_extern("int32", "atomicAdd", op.args[0], op.args[1])
raise RuntimeError("only support int32, float32 and float64")
def atomic_add(x, y):
- return tvm.call_pure_intrin(y.dtype, "atomic_add", x, y)
+ return tvm.tir.call_pure_intrin(y.dtype, "atomic_add", x, y)
def get_valid_counts_ir(data, valid_count, flag, score_threshold, id_index, score_index):
num_anchors = data.shape[1]
elem_length = data.shape[2]
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
data = ib.buffer_ptr(data)
flag = ib.buffer_ptr(flag)
atomic_add_return = ib.allocate(
valid_count.dtype, (1,), name='atomic_add_return', scope='local')
- one_count = tvm.const(1, dtype=valid_count.dtype)
- score_threshold = tvm.make.node(
+ one_count = tvm.tir.const(1, dtype=valid_count.dtype)
+ score_threshold = tvm.ir.make_node(
"FloatImm", dtype="float32", value=score_threshold)
- id_index = tvm.make.node("IntImm", dtype="int32", value=id_index)
- score_index = tvm.make.node("IntImm", dtype="int32", value=score_index)
+ id_index = tvm.ir.make_node("IntImm", dtype="int32", value=id_index)
+ score_index = tvm.ir.make_node("IntImm", dtype="int32", value=score_index)
max_threads = int(tvm.target.Target.current(
allow_none=False).max_num_threads)
nthread_tx = max_threads
nthread_bx = batch_size * num_anchors // max_threads + 1
- tx = tvm.thread_axis("threadIdx.x")
- bx = tvm.thread_axis("blockIdx.x")
+ tx = te.thread_axis("threadIdx.x")
+ bx = te.thread_axis("blockIdx.x")
ib.scope_attr(tx, "thread_extent", nthread_tx)
ib.scope_attr(bx, "thread_extent", nthread_bx)
tid = bx * max_threads + tx
- idxd = tvm.indexdiv
+ idxd = tvm.tir.indexdiv
# initialize valid_count
with ib.if_scope(tid < batch_size):
flag[tid] = 0
with ib.if_scope(tid < batch_size * num_anchors):
i = idxd(tid, num_anchors)
- with ib.if_scope(tvm.all(data[tid * elem_length + score_index] > score_threshold,
- tvm.any(id_index < 0, data[tid * elem_length + id_index] >= 0))):
+ with ib.if_scope(
+ tvm.tir.all(data[tid * elem_length + score_index] > score_threshold,
+ tvm.tir.any(id_index < 0, data[tid * elem_length + id_index] >= 0))):
flag[tid] = 1
- atomic_add_return[0] = atomic_add(tvm.call_pure_intrin("handle", "tvm_address_of",
- valid_count[i]), one_count)
+ atomic_add_return[0] = atomic_add(tvm.tir.call_pure_intrin("handle", "tvm_address_of",
+ valid_count[i]), one_count)
return ib.get()
batch_size = flag.shape[0]
num_anchors = flag.shape[1]
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
flag = ib.buffer_ptr(flag)
prefix_sum = ib.buffer_ptr(prefix_sum)
allow_none=False).max_num_threads)
nthread_tx = max_threads
nthread_bx = batch_size * num_anchors // max_threads + 1
- tx = tvm.thread_axis("threadIdx.x")
- bx = tvm.thread_axis("blockIdx.x")
+ tx = te.thread_axis("threadIdx.x")
+ bx = te.thread_axis("blockIdx.x")
ib.scope_attr(tx, "thread_extent", nthread_tx)
ib.scope_attr(bx, "thread_extent", nthread_bx)
tid = bx * max_threads + tx
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
# initialize prefix_sum
with ib.if_scope(tid < batch_size * num_anchors):
num_anchors = out.shape[1]
elem_length = out.shape[2]
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
- one = tvm.const(1, dtype=out.dtype)
+ one = tvm.tir.const(1, dtype=out.dtype)
data = ib.buffer_ptr(data)
flag = ib.buffer_ptr(flag)
valid_count = ib.buffer_ptr(valid_count)
allow_none=False).max_num_threads)
nthread_tx = max_threads
nthread_bx = batch_size * num_anchors // max_threads + 1
- tx = tvm.thread_axis("threadIdx.x")
- bx = tvm.thread_axis("blockIdx.x")
+ tx = te.thread_axis("threadIdx.x")
+ bx = te.thread_axis("blockIdx.x")
ib.scope_attr(tx, "thread_extent", nthread_tx)
ib.scope_attr(bx, "thread_extent", nthread_bx)
tid = bx * max_threads + tx
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
with ib.if_scope(tid < batch_size * num_anchors):
i = idxd(tid, num_anchors)
j = idxm(tid, num_anchors)
base_idx = i * num_anchors * elem_length
- with ib.if_scope(tvm.all(flag[tid] > 0, prefix_sum[tid] >= 0,
- prefix_sum[tid] < num_anchors)):
+ with ib.if_scope(tvm.tir.all(flag[tid] > 0, prefix_sum[tid] >= 0,
+ prefix_sum[tid] < num_anchors)):
with ib.for_range(0, elem_length) as k:
out[base_idx + prefix_sum[tid] * elem_length +
k] = data[tid * elem_length + k]
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
Input data. 3-D tensor with shape [batch_size, num_anchors, elem_length].
score_threshold : optional, float
Returns
-------
- valid_count : tvm.Tensor
+ valid_count : tvm.te.Tensor
1-D tensor for valid number of boxes.
- out_tensor : tvm.Tensor
+ out_tensor : tvm.te.Tensor
Rearranged data tensor.
"""
batch_size = data.shape[0]
num_anchors = data.shape[1]
- data_buf = api.decl_buffer(
+ data_buf = tvm.tir.decl_buffer(
data.shape, data.dtype, "data_buf", data_alignment=8)
- valid_count_buf = api.decl_buffer(
+ valid_count_buf = tvm.tir.decl_buffer(
(batch_size,), "int32", "valid_count_buf", data_alignment=8)
- temp_flag_buf = api.decl_buffer(
+ temp_flag_buf = tvm.tir.decl_buffer(
(batch_size, num_anchors,), "int32", "temp_flag", data_alignment=8)
- temp_partial_buf = api.decl_buffer(
+ temp_partial_buf = tvm.tir.decl_buffer(
(batch_size, num_anchors), "int32", "temp_partial", data_alignment=8)
- out_buf = api.decl_buffer(
+ out_buf = tvm.tir.decl_buffer(
data.shape, data.dtype, "out_buf", data_alignment=8)
valid_count, temp_flag = \
- tvm.extern([(batch_size,), (batch_size, num_anchors)], [data],
- lambda ins, outs: get_valid_counts_ir(
+ te.extern([(batch_size,), (batch_size, num_anchors)], [data],
+ lambda ins, outs: get_valid_counts_ir(
ins[0], outs[0], outs[1], score_threshold, id_index, score_index),
dtype=["int32", "int32"],
in_buffers=[data_buf],
tag="get_valid_counts_gpu")
temp_partial = \
- tvm.extern([(batch_size, num_anchors)], [temp_flag],
- lambda ins, outs: flag_scan(
+ te.extern([(batch_size, num_anchors)], [temp_flag],
+ lambda ins, outs: flag_scan(
ins[0], outs[0]),
dtype=["int32"],
in_buffers=[temp_flag_buf],
name="flag_scan")
out = \
- tvm.extern([data.shape], [data, temp_flag, temp_partial, valid_count],
- lambda ins, outs: out_rewrite(
+ te.extern([data.shape], [data, temp_flag, temp_partial, valid_count],
+ lambda ins, outs: out_rewrite(
ins[0], ins[1], ins[2], ins[3], outs[0]),
dtype=[data.dtype],
in_buffers=[data_buf, temp_flag_buf,
def calculate_overlap(out_tensor, box_a_idx, box_b_idx):
"""Calculate overlap of two boxes.
"""
- w = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2])
- - tvm.max(out_tensor[box_a_idx], out_tensor[box_b_idx]))
- h = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3])
- - tvm.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]))
+ w = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2])
+ - tvm.te.max(out_tensor[box_a_idx], out_tensor[box_b_idx]))
+ h = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3])
+ - tvm.te.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]))
i = w * h
u = (out_tensor[box_a_idx + 2] - out_tensor[box_a_idx]) * \
(out_tensor[box_a_idx + 3] - out_tensor[box_a_idx + 1]) + \
(out_tensor[box_b_idx + 2] - out_tensor[box_b_idx]) * \
(out_tensor[box_b_idx + 3] - out_tensor[box_b_idx + 1]) - i
- return tvm.expr.Select(u <= 0.0, 0.0, i / u)
+ return tvm.tir.Select(u <= 0.0, 0.0, i / u)
batch_size = data.shape[0]
num_anchors = data.shape[1]
box_data_length = data.shape[2]
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
data = ib.buffer_ptr(data)
sorted_index = ib.buffer_ptr(sorted_index)
tvm.target.Target.current(allow_none=False).max_num_threads)
nthread_tx = max_threads
nthread_bx = num_anchors // max_threads + 1
- tx = tvm.thread_axis("threadIdx.x")
- bx = tvm.thread_axis("blockIdx.x")
+ tx = te.thread_axis("threadIdx.x")
+ bx = te.thread_axis("blockIdx.x")
ib.scope_attr(tx, "thread_extent", nthread_tx)
ib.scope_attr(bx, "thread_extent", nthread_bx)
j = bx * max_threads + tx
- iou_threshold = tvm.make.node(
+ iou_threshold = tvm.ir.make_node(
"FloatImm", dtype="float32", value=iou_threshold)
- top_k = tvm.make.node("IntImm", dtype="int32", value=top_k)
- coord_start = tvm.make.node("IntImm", dtype="int32", value=coord_start)
- id_index = tvm.make.node("IntImm", dtype="int32", value=id_index)
- score_index = tvm.make.node("IntImm", dtype="int32", value=score_index)
- force_suppress = tvm.make.node(
+ top_k = tvm.ir.make_node("IntImm", dtype="int32", value=top_k)
+ coord_start = tvm.ir.make_node("IntImm", dtype="int32", value=coord_start)
+ id_index = tvm.ir.make_node("IntImm", dtype="int32", value=id_index)
+ score_index = tvm.ir.make_node("IntImm", dtype="int32", value=score_index)
+ force_suppress = tvm.ir.make_node(
"IntImm", dtype="int32", value=1 if force_suppress else 0)
with ib.for_range(0, batch_size, for_type="unroll") as i:
base_idx = i * num_anchors * box_data_length
- with ib.if_scope(tvm.all(iou_threshold > 0, valid_count[i] > 0)):
+ with ib.if_scope(tvm.tir.all(iou_threshold > 0, valid_count[i] > 0)):
# Reorder output
nkeep = if_then_else(
- tvm.all(top_k > 0, top_k < valid_count[i]),
+ tvm.tir.all(top_k > 0, top_k < valid_count[i]),
top_k, valid_count[i])
with ib.if_scope(j < nkeep):
with ib.for_range(0, box_data_length) as k:
* box_data_length + k)]
box_indices[i * num_anchors +
j] = sorted_index[i * num_anchors + j]
- with ib.if_scope(tvm.all(top_k > 0, top_k < valid_count[i])):
+ with ib.if_scope(tvm.tir.all(top_k > 0, top_k < valid_count[i])):
with ib.if_scope(j < valid_count[i] - nkeep):
with ib.for_range(0, box_data_length) as k:
out[(base_idx + (j + nkeep) * box_data_length + k)] = -1.0
# Apply nms
with ib.for_range(0, valid_count[i]) as k:
offset_k = k * box_data_length
- with ib.if_scope(tvm.all(out[base_idx + offset_k + score_index] > 0,
- tvm.any(id_index < 0, out[base_idx +
- offset_k + id_index] >= 0))):
+ with ib.if_scope(
+ tvm.tir.all(out[base_idx + offset_k + score_index] > 0,
+ tvm.tir.any(id_index < 0, out[base_idx +
+ offset_k + id_index] >= 0))):
with ib.if_scope(j < valid_count[i]):
offset_j = j * box_data_length
- with ib.if_scope(tvm.all(j > k,
- out[base_idx + offset_j +
- score_index] > 0,
- tvm.any(id_index < 0,
- out[base_idx + offset_j + id_index] >= 0),
- tvm.any(force_suppress > 0, id_index < 0,
- out[base_idx + offset_k + id_index] ==
- out[base_idx + offset_j + id_index]))):
+ with ib.if_scope(
+ tvm.tir.all(j > k,
+ out[base_idx + offset_j +
+ score_index] > 0,
+ tvm.tir.any(id_index < 0,
+ out[base_idx + offset_j + id_index] >= 0),
+ tvm.tir.any(force_suppress > 0, id_index < 0,
+ out[base_idx + offset_k + id_index] ==
+ out[base_idx + offset_j + id_index]))):
iou = calculate_overlap(out, base_idx + offset_j + coord_start,
base_idx + offset_k + coord_start)
with ib.if_scope(iou >= iou_threshold):
num_anchors = data.shape[1]
elem_length = data.shape[2]
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
data = ib.buffer_ptr(data)
flag = ib.buffer_ptr(flag)
tvm.target.Target.current(allow_none=False).max_num_threads))
nthread_tx = max_threads
nthread_bx = num_anchors // max_threads + 1
- tx = tvm.thread_axis("threadIdx.x")
- bx = tvm.thread_axis("blockIdx.x")
+ tx = te.thread_axis("threadIdx.x")
+ bx = te.thread_axis("blockIdx.x")
ib.scope_attr(tx, "thread_extent", nthread_tx)
ib.scope_attr(bx, "thread_extent", nthread_bx)
j = bx * max_threads + tx
num_anchors = data.shape[1]
elem_length = data.shape[2]
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
data = ib.buffer_ptr(data)
flag = ib.buffer_ptr(flag)
tvm.target.Target.current(allow_none=False).max_num_threads))
nthread_tx = max_threads
nthread_bx = num_anchors // max_threads + 1
- tx = tvm.thread_axis("threadIdx.x")
- bx = tvm.thread_axis("blockIdx.x")
+ tx = te.thread_axis("threadIdx.x")
+ bx = te.thread_axis("blockIdx.x")
ib.scope_attr(tx, "thread_extent", nthread_tx)
ib.scope_attr(bx, "thread_extent", nthread_bx)
j = bx * max_threads + tx
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
3-D tensor with shape [batch_size, num_anchors, elem_length].
The last dimension should be in format of
[class_id, score, box_left, box_top, box_right, box_bottom].
- valid_count : tvm.Tensor
+ valid_count : tvm.te.Tensor
1-D tensor for valid number of boxes.
max_output_size : optional, int
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
3-D tensor with shape [batch_size, num_anchors, elem_length].
Example
# An example to use nms
dshape = (1, 5, 6)
- data = tvm.placeholder(dshape, name="data")
- valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count")
+ data = te.placeholder(dshape, name="data")
+ valid_count = te.placeholder((dshape[0],), dtype="int32", name="valid_count")
iou_threshold = 0.7
force_suppress = True
top_k = -1
num_anchors = data.shape[1]
valid_count_dtype = "int32"
- valid_count_buf = api.decl_buffer(valid_count.shape, valid_count_dtype,
- "valid_count_buf", data_alignment=4)
+ valid_count_buf = tvm.tir.decl_buffer(valid_count.shape, valid_count_dtype,
+ "valid_count_buf", data_alignment=4)
score_axis = score_index
score_shape = (batch_size, num_anchors)
- score_tensor = tvm.compute(
+ score_tensor = te.compute(
score_shape, lambda i, j: data[i, j, score_axis], tag=tag.ELEMWISE)
sort_tensor = argsort(
score_tensor, valid_count=valid_count, axis=1, is_ascend=False)
- sort_tensor_buf = api.decl_buffer(sort_tensor.shape, sort_tensor.dtype,
- "sort_tensor_buf", data_alignment=8)
+ sort_tensor_buf = tvm.tir.decl_buffer(sort_tensor.shape, sort_tensor.dtype,
+ "sort_tensor_buf", data_alignment=8)
- data_buf = api.decl_buffer(
+ data_buf = tvm.tir.decl_buffer(
data.shape, data.dtype, "data_buf", data_alignment=8)
- out_buf = api.decl_buffer(
+ out_buf = tvm.tir.decl_buffer(
data.shape, data.dtype, "out_buf", data_alignment=8)
out, box_indices = \
- tvm.extern([data.shape, score_shape],
- [data, sort_tensor, valid_count],
- lambda ins, outs: nms_ir(
- ins[0], ins[1], ins[2], outs[0], outs[1],
- max_output_size, iou_threshold, force_suppress,
- top_k, coord_start, id_index, score_index),
- dtype=[data.dtype, "int32"],
- in_buffers=[data_buf, sort_tensor_buf, valid_count_buf],
- name="nms",
- tag="nms")
+ te.extern([data.shape, score_shape],
+ [data, sort_tensor, valid_count],
+ lambda ins, outs: nms_ir(
+ ins[0], ins[1], ins[2], outs[0], outs[1],
+ max_output_size, iou_threshold, force_suppress,
+ top_k, coord_start, id_index, score_index),
+ dtype=[data.dtype, "int32"],
+ in_buffers=[data_buf, sort_tensor_buf, valid_count_buf],
+ name="nms",
+ tag="nms")
if return_indices:
return box_indices
if invalid_to_bottom:
- output_buf = api.decl_buffer(
+ output_buf = tvm.tir.decl_buffer(
data.shape, data.dtype, "output_buf", data_alignment=8)
- temp_flag_buf = api.decl_buffer(
+ temp_flag_buf = tvm.tir.decl_buffer(
score_shape, valid_count_dtype, "temp_flag", data_alignment=8)
- temp_idx_buf = api.decl_buffer(
+ temp_idx_buf = tvm.tir.decl_buffer(
score_shape, valid_count_dtype, "temp_idx", data_alignment=8)
- temp_flag, temp_idx = tvm.extern([score_shape, score_shape], [out],
- lambda ins, outs: invalid_to_bottom_pre(
- ins[0], outs[0], outs[1]),
- dtype=["int32", "int32"],
- in_buffers=[out_buf],
- out_buffers=[
- temp_flag_buf, temp_idx_buf],
- name="invalid_to_bottom_phase_one")
-
- output = tvm.extern([data.shape], [out, temp_flag, temp_idx],
- lambda ins, outs: invalid_to_bottom_ir(
- ins[0], ins[1], ins[2], outs[0]),
- dtype=[data.dtype],
- in_buffers=[out_buf, temp_flag_buf, temp_idx_buf],
- out_buffers=[output_buf],
- name="invalid_to_bottom",
- tag="invalid_to_bottom")
+ temp_flag, temp_idx = te.extern([score_shape, score_shape], [out],
+ lambda ins, outs: invalid_to_bottom_pre(
+ ins[0], outs[0], outs[1]),
+ dtype=["int32", "int32"],
+ in_buffers=[out_buf],
+ out_buffers=[
+ temp_flag_buf, temp_idx_buf],
+ name="invalid_to_bottom_phase_one")
+
+ output = te.extern([data.shape], [out, temp_flag, temp_idx],
+ lambda ins, outs: invalid_to_bottom_ir(
+ ins[0], ins[1], ins[2], outs[0]),
+ dtype=[data.dtype],
+ in_buffers=[out_buf, temp_flag_buf, temp_idx_buf],
+ out_buffers=[output_buf],
+ name="invalid_to_bottom",
+ tag="invalid_to_bottom")
return output
return out
# pylint: disable=invalid-name, unused-variable, unused-argument
"""Schedule for pooling operators"""
import tvm
+from tvm import te
from .. import tag
from ..util import traverse_inline
s: Schedule
The computation schedule for adaptive_pool.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(Pool):
num_thread = 8
- block_x = tvm.thread_axis("blockIdx.x")
- block_y = tvm.thread_axis("blockIdx.y")
- thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x")
- thread_y = tvm.thread_axis((0, num_thread), "threadIdx.y")
+ block_x = te.thread_axis("blockIdx.x")
+ block_y = te.thread_axis("blockIdx.y")
+ thread_x = te.thread_axis((0, num_thread), "threadIdx.x")
+ thread_y = te.thread_axis((0, num_thread), "threadIdx.y")
if Pool.op in s.outputs:
Out = Pool
OL = s.cache_write(Pool, "local")
if OP not in s.outputs:
s[OP].compute_inline()
for tensor in OP.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
# schedule global_pool
elif OP.tag.startswith('adaptive_pool'):
s: Schedule
The computation schedule for pool.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(PaddedInput, Pool):
- if isinstance(PaddedInput.op, tvm.tensor.ComputeOp):
+ if isinstance(PaddedInput.op, tvm.te.ComputeOp):
s[PaddedInput].compute_inline()
num_thread = tvm.target.Target.current(allow_none=False).max_num_threads
if Pool.op in s.outputs:
s[Pool].set_scope("local")
fused = s[Out].fuse(*s[Out].op.axis)
bx, tx = s[Out].split(fused, factor=num_thread)
- s[Out].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[Out].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[Out].bind(bx, te.thread_axis("blockIdx.x"))
+ s[Out].bind(tx, te.thread_axis("threadIdx.x"))
if Pool.op in s.outputs:
s[OL].compute_at(s[Out], tx)
else:
if OP not in s.outputs:
s[OP].compute_inline()
for tensor in OP.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
# schedule pool
elif OP.tag.startswith('pool'):
s: Schedule
The computation schedule for pool_grad.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule_pool_grad(op):
if op in s.outputs:
fused = s[out].fuse(*s[out].op.axis)
num_thread = tvm.target.Target.current(allow_none=False).max_num_threads
bx, tx = s[out].split(fused, factor=num_thread)
- s[out].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[out].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[out].bind(bx, te.thread_axis("blockIdx.x"))
+ s[out].bind(tx, te.thread_axis("threadIdx.x"))
if tag.COMM_REDUCE_IDX in op.input_tensors[0].op.tag:
max_pool_index = op.input_tensors[0]
s[max_pool_index].compute_at(s[out], tx)
pool_input = max_pool_index.op.input_tensors[0]
- if isinstance(pool_input.op, tvm.tensor.ComputeOp):
+ if isinstance(pool_input.op, tvm.te.ComputeOp):
# handle padding
s[pool_input].compute_inline()
if op not in s.outputs:
"""Proposal operator"""
import math
import tvm
+from tvm import te
from ...vision.rcnn import generate_anchor, reg_bbox, reg_iou
from ...util import get_const_tuple, get_const_int
Parameters
----------
- cls_prob_buf : tvm.schedule.Buffer
+ cls_prob_buf : tvm.te.schedule.Buffer
4-D with shape [batch, 2 * num_anchors, height, width]
- bbox_pred_buf : tvm.schedule.Buffer
+ bbox_pred_buf : tvm.te.schedule.Buffer
4-D with shape [batch, 4 * num_anchors, height, width]
- im_info_buf : tvm.schedule.Buffer
+ im_info_buf : tvm.te.schedule.Buffer
2-D with shape [batch, 3]
- out_buf : tvm.schedule.Buffer
+ out_buf : tvm.te.schedule.Buffer
3-D with shape [batch, num_bbox, 5]
The last dimension is in format of [w_start, h_start, w_end, h_end, score]
max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
nthread_tx = max_threads
nthread_bx = (batch * height * width) // max_threads + 1
- tx = tvm.thread_axis("threadIdx.x")
- bx = tvm.thread_axis("blockIdx.x")
+ tx = te.thread_axis("threadIdx.x")
+ bx = te.thread_axis("blockIdx.x")
tid = bx * max_threads + tx
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
ib.scope_attr(tx, "thread_extent", nthread_tx)
ib.scope_attr(bx, "thread_extent", nthread_bx)
p_im_info = ib.buffer_ptr(im_info_buf)
p_out = ib.buffer_ptr(out_buf)
- idxm = tvm.indexmod
- idxd = tvm.indexdiv
+ idxm = tvm.tir.indexmod
+ idxd = tvm.tir.indexdiv
with ib.if_scope(tid < batch * height * width):
w = idxm(tid, width)
regression_func = reg_iou if iou_loss else reg_bbox
pred_x1, pred_y1, pred_x2, pred_y2 = regression_func(x1, y1, x2, y2, *delta)
- pred_x1 = tvm.max(tvm.min(pred_x1, im_width - 1.0), 0.0)
- pred_y1 = tvm.max(tvm.min(pred_y1, im_height - 1.0), 0.0)
- pred_x2 = tvm.max(tvm.min(pred_x2, im_width - 1.0), 0.0)
- pred_y2 = tvm.max(tvm.min(pred_y2, im_height - 1.0), 0.0)
+ pred_x1 = tvm.te.max(tvm.te.min(pred_x1, im_width - 1.0), 0.0)
+ pred_y1 = tvm.te.max(tvm.te.min(pred_y1, im_height - 1.0), 0.0)
+ pred_x2 = tvm.te.max(tvm.te.min(pred_x2, im_width - 1.0), 0.0)
+ pred_y2 = tvm.te.max(tvm.te.min(pred_y2, im_height - 1.0), 0.0)
real_height = (im_height / feature_stride).astype('int32')
real_width = (im_width / feature_stride).astype('int32')
min_size = p_im_info[b * 3 + 2] * rpn_min_size
pred_score = p_score[((b * num_anchors * 2 + num_anchors + k) * height + h) * width + w]
- pred_score = tvm.expr.Select(tvm.any(h >= real_height, w >= real_width),
- -1.0, pred_score)
+ pred_score = tvm.tir.Select(tvm.tir.any(h >= real_height, w >= real_width),
+ -1.0, pred_score)
p_out[out_index * 5 + 0] = pred_x1
p_out[out_index * 5 + 1] = pred_y1
p_out[out_index * 5 + 2] = pred_x2
p_out[out_index * 5 + 3] = pred_y2
p_out[out_index * 5 + 4] = pred_score
- with ib.if_scope(tvm.any(bbox_w < min_size, bbox_h < min_size)):
+ with ib.if_scope(tvm.tir.any(bbox_w < min_size, bbox_h < min_size)):
p_out[out_index * 5 + 0] -= min_size / 2.0
p_out[out_index * 5 + 1] -= min_size / 2.0
p_out[out_index * 5 + 2] += min_size / 2.0
Parameters
----------
- data_buf : tvm.schedule.Buffer
+ data_buf : tvm.te.schedule.Buffer
2-D with shape [batch, num_bbox]
- out_index_buf : tvm.schedule.Buffer
+ out_index_buf : tvm.te.schedule.Buffer
2-D with shape [batch, num_bbox]. Indices of data in sorted order.
Returns
"""
batch, num_bbox = get_const_tuple(data_buf.shape)
max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
p_data = ib.buffer_ptr(data_buf)
index_out = ib.buffer_ptr(out_index_buf)
nthread_tx = max_threads
nthread_bx = (num_bbox + 1) // 2 // max_threads + 1
- tx = tvm.thread_axis("threadIdx.x")
- bx = tvm.thread_axis("vthread")
+ tx = te.thread_axis("threadIdx.x")
+ bx = te.thread_axis("vthread")
ib.scope_attr(tx, "thread_extent", nthread_tx)
ib.scope_attr(bx, "virtual_thread", nthread_bx)
tid = bx * nthread_tx + tx
temp_data = ib.allocate("float32", (1,), name="temp_data", scope="local")
temp_index = ib.allocate("int32", (1,), name="temp_index", scope="local")
- idxm = tvm.indexmod
+ idxm = tvm.tir.indexmod
with ib.for_range(0, batch, for_type="unroll") as b:
start = b * num_bbox
with ib.for_range(0, num_bbox) as k:
offset = start + 2 * tid + idxm(k, 2)
with ib.if_scope(
- tvm.all(offset + 1 < num_bbox, p_data[offset] < p_data[offset + 1])):
+ tvm.tir.all(offset + 1 < num_bbox, p_data[offset] < p_data[offset + 1])):
temp_data[0] = p_data[offset]
p_data[offset] = p_data[offset + 1]
p_data[offset + 1] = temp_data[0]
temp_index[0] = index_out[offset]
index_out[offset] = index_out[offset + 1]
index_out[offset + 1] = temp_index[0]
- ib.emit(tvm.make.Call(None, 'tvm_storage_sync',
- tvm.convert(['shared']),
- tvm.expr.Call.Intrinsic, None, 0))
+ ib.emit(tvm.tir.Call(None, 'tvm_storage_sync',
+ tvm.runtime.convert(['shared']),
+ tvm.tir.Call.Intrinsic, None, 0))
return ib.get()
Parameters
----------
- sorted_bbox_buf : tvm.schedule.Buffer
+ sorted_bbox_buf : tvm.te.schedule.Buffer
3-D with shape [batch, num_bbox, 5]. The last dimension is in format of
[w_start, h_start, w_end, h_end, score].
- out_buf : tvm.schedule.Buffer
+ out_buf : tvm.te.schedule.Buffer
2-D with shape [batch, num_bbox]. Boolean mask of whether a bounding box should be removed.
nms_threshold : float
def calculate_overlap(out_tensor, box_a_idx, box_b_idx):
"""Calculate overlap of two boxes.
"""
- w = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2])
- - tvm.max(out_tensor[box_a_idx], out_tensor[box_b_idx]) + 1.0)
- h = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3])
- - tvm.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]) + 1.0)
+ w = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2])
+ - tvm.te.max(out_tensor[box_a_idx], out_tensor[box_b_idx]) + 1.0)
+ h = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3])
+ - tvm.te.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]) + 1.0)
i = w * h
u = (out_tensor[box_a_idx + 2] - out_tensor[box_a_idx] + 1.0) * \
(out_tensor[box_a_idx + 3] - out_tensor[box_a_idx + 1] + 1.0) + \
batch, num_bbox = get_const_tuple(out_buf.shape)
max_threads = int(math.sqrt(tvm.target.Target.current(allow_none=False).max_num_threads))
- tx = tvm.thread_axis("threadIdx.x")
- bx = tvm.thread_axis("blockIdx.x")
- ib = tvm.ir_builder.create()
+ tx = te.thread_axis("threadIdx.x")
+ bx = te.thread_axis("blockIdx.x")
+ ib = tvm.tir.ir_builder.create()
p_data = ib.buffer_ptr(sorted_bbox_buf)
p_out = ib.buffer_ptr(out_buf)
nthread_tx = max_threads
with ib.if_scope(i < num_bbox):
p_out[base_idx + i] = False
with ib.for_range(0, num_bbox - 1) as l:
- with ib.if_scope(tvm.all(i < num_bbox, i > l, p_out[base_idx + l] == False)):
+ with ib.if_scope(tvm.tir.all(i < num_bbox, i > l, p_out[base_idx + l] == False)):
iou = calculate_overlap(p_data, (base_idx + l) * 5, (base_idx + i) * 5)
with ib.if_scope(iou > nms_threshold):
p_out[base_idx + i] = True
- ib.emit(tvm.make.Call(None, 'tvm_storage_sync',
- tvm.convert(['shared']),
- tvm.expr.Call.Intrinsic, None, 0))
+ ib.emit(tvm.tir.Call(None, 'tvm_storage_sync',
+ tvm.runtime.convert(['shared']),
+ tvm.tir.Call.Intrinsic, None, 0))
return ib.get()
Parameters
----------
- sorted_bbox_buf : tvm.schedule.Buffer
+ sorted_bbox_buf : tvm.te.schedule.Buffer
3-D with shape [batch, num_bbox, 5]. The last dimension is in format of
[w_start, h_start, w_end, h_end, score].
- remove_mask_buf : tvm.schedule.Buffer
+ remove_mask_buf : tvm.te.schedule.Buffer
2-D with shape [batch, num_bbox]. Boolean mask of whether a bounding box should be removed.
- out_buf : tvm.schedule.Buffer
+ out_buf : tvm.te.schedule.Buffer
2-D with shape [batch * rpn_post_nms_top_n, 5]. The last dimension is in format of
[batch_index, w_start, h_start, w_end, h_end].
batch, num_bbox, _ = get_const_tuple(sorted_bbox_buf.shape)
rpn_post_nms_top_n = get_const_int(out_buf.shape[0]) // batch
nthread_tx = batch
- tx = tvm.thread_axis("threadIdx.x")
- ib = tvm.ir_builder.create()
+ tx = te.thread_axis("threadIdx.x")
+ ib = tvm.tir.ir_builder.create()
ib.scope_attr(tx, "thread_extent", nthread_tx)
i = ib.allocate('int32', (1,), 'i', scope='local')
i[0] = 0
with ib.if_scope(p_remove[b * num_bbox + j] == False):
nkeep[0] += 1
with ib.if_scope(nkeep[0] > 0):
- with ib.for_range(0, tvm.ceil(
- tvm.const(rpn_post_nms_top_n, 'float32') / nkeep[0]).astype('int32')):
+ with ib.for_range(0, te.ceil(
+ tvm.tir.const(rpn_post_nms_top_n, 'float32') / nkeep[0]).astype('int32')):
with ib.for_range(0, num_bbox) as j:
offset_j = (b * num_bbox + j) * 5
offset_i = (b * rpn_post_nms_top_n + i[0]) * 5
- with ib.if_scope(tvm.all(i[0] < rpn_post_nms_top_n,
- p_remove[(b*num_bbox+j)] == False)):
- p_out[offset_i] = tvm.expr.Cast('float32', b)
+ with ib.if_scope(tvm.tir.all(i[0] < rpn_post_nms_top_n,
+ p_remove[(b*num_bbox+j)] == False)):
+ p_out[offset_i] = tvm.tir.Cast('float32', b)
with ib.for_range(0, 4, for_type='unroll') as k:
p_out[offset_i + k + 1] = p_sorted_bbox[offset_j + k]
i[0] = i[0] + 1
Parameters
----------
- cls_prob : tvm.Tensor
+ cls_prob : tvm.te.Tensor
4-D with shape [batch, 2 * num_anchors, height, width]
- bbox_pred : tvm.Tensor
+ bbox_pred : tvm.te.Tensor
4-D with shape [batch, 4 * num_anchors, height, width]
- im_info : tvm.Tensor
+ im_info : tvm.te.Tensor
2-D with shape [batch, 3]
scales : list/tuple of float
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
2-D tensor with shape [batch * rpn_post_nms_top_n, 5]. The last dimension is in format of
[batch_index, w_start, h_start, w_end, h_end].
"""
num_bbox = height * width * num_anchors
rpn_pre_nms_top_n = min(rpn_pre_nms_top_n, num_bbox) if rpn_pre_nms_top_n > 0 else num_bbox
- bbox = tvm.extern((batch, num_bbox, 5), [cls_prob, bbox_pred, im_info], lambda ins, outs:
- predict_bbox_ir(ins[0], ins[1], ins[2], outs[0], scales, ratios,
- feature_stride, rpn_min_size, iou_loss),
- dtype=bbox_pred.dtype)
- score = tvm.compute((batch, num_bbox), lambda b, i: bbox[b, i, 4], tag='bbox_score')
- sorted_index = tvm.extern([score.shape], [score],
- lambda ins, outs: argsort_ir(ins[0], outs[0]),
- dtype='int32')
- sorted_bbox = tvm.compute((batch, rpn_pre_nms_top_n, 5),
- lambda b, i, j: bbox[b, sorted_index[b, i], j], tag='sorted_bbox')
- nms_remove_mask = tvm.extern((batch, rpn_pre_nms_top_n), [sorted_bbox],
- lambda ins, outs: nms_ir(ins[0], outs[0], threshold),
- dtype='bool')
- nms_out = tvm.extern((batch * rpn_post_nms_top_n, 5), [sorted_bbox, nms_remove_mask],
- lambda ins, outs: prepare_output_ir(ins[0], ins[1], outs[0]),
- dtype=sorted_bbox.dtype)
+ bbox = te.extern((batch, num_bbox, 5), [cls_prob, bbox_pred, im_info], lambda ins, outs:
+ predict_bbox_ir(ins[0], ins[1], ins[2], outs[0], scales, ratios,
+ feature_stride, rpn_min_size, iou_loss),
+ dtype=bbox_pred.dtype)
+ score = te.compute((batch, num_bbox), lambda b, i: bbox[b, i, 4], tag='bbox_score')
+ sorted_index = te.extern([score.shape], [score],
+ lambda ins, outs: argsort_ir(ins[0], outs[0]),
+ dtype='int32')
+ sorted_bbox = te.compute((batch, rpn_pre_nms_top_n, 5),
+ lambda b, i, j: bbox[b, sorted_index[b, i], j], tag='sorted_bbox')
+ nms_remove_mask = te.extern((batch, rpn_pre_nms_top_n), [sorted_bbox],
+ lambda ins, outs: nms_ir(ins[0], outs[0], threshold),
+ dtype='bool')
+ nms_out = te.extern((batch * rpn_post_nms_top_n, 5), [sorted_bbox, nms_remove_mask],
+ lambda ins, outs: prepare_output_ir(ins[0], ins[1], outs[0]),
+ dtype=sorted_bbox.dtype)
return nms_out
"""Schedule for reduce operators"""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from .. import tag
from .injective import schedule_injective_from_existing
# without it, CL_INVALID_WORK_GROUP_SIZE occurred when running test_topi_reduce.py
# don't know why
num_thread = 16
- block_x = tvm.thread_axis("blockIdx.x")
- thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x")
- thread_y = tvm.thread_axis((0, num_thread), "threadIdx.y")
+ block_x = te.thread_axis("blockIdx.x")
+ thread_x = te.thread_axis((0, num_thread), "threadIdx.x")
+ thread_y = te.thread_axis((0, num_thread), "threadIdx.y")
else:
all_reduce = True
num_thread = tvm.target.Target.current(allow_none=False).max_num_threads
- thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x")
+ thread_x = te.thread_axis((0, num_thread), "threadIdx.x")
# Fuse and refactor the reduce axis
fused_reduce = sch[data_out].fuse(*[sch[data_out].op.reduce_axis[i]
else:
if is_idx_reduce:
spatial_axis = sch[real_output].fuse(*(sch[real_output].op.axis))
- sch[real_output].bind(spatial_axis, tvm.thread_axis("blockIdx.x"))
+ sch[real_output].bind(spatial_axis, te.thread_axis("blockIdx.x"))
sch[temp_idx_input].compute_at(sch[real_output],
spatial_axis)
sch[temp_val_input].compute_at(sch[real_output],
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- sch = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ sch = te.create_schedule([x.op for x in outs])
scheduled_ops = []
def traverse_before_reduce(operator):
"""Internal traverse function"""
- if isinstance(operator, tvm.tensor.PlaceholderOp):
+ if isinstance(operator, tvm.te.PlaceholderOp):
return
if tag.is_injective(operator.tag):
sch[operator].compute_inline()
# under the License.
# pylint: disable=invalid-name, unused-variable, trailing-whitespace
"""Schedule for softmax operator"""
-import tvm
+from tvm import te
from .injective import schedule_injective_from_existing
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
softmax = outs[0]
op_tag = softmax.op.tag
s = schedule_injective_from_existing(s, op.output(0))
else:
num_thread = 64
- block_x = tvm.thread_axis("blockIdx.x")
- thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x")
+ block_x = te.thread_axis("blockIdx.x")
+ thread_x = te.thread_axis((0, num_thread), "threadIdx.x")
if exp is not None:
s[exp].bind(exp.op.axis[0], block_x)
# pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, too-many-statements, singleton-comparison, unused-argument
"""Argsort operator """
import tvm
+from tvm import te
-from tvm import api
from .injective import schedule_injective_from_existing
from ..math import identity
from ..transform import strided_slice
s: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
scheduled_ops = []
def traverse(op):
elif i > axis:
axis_mul_after *= value
max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
data = ib.buffer_ptr(data)
values_out = ib.buffer_ptr(values_out)
if indices_out is not None:
nthread_tx = max_threads
nthread_bx = shape[axis] // max_threads + 1
- tx = tvm.thread_axis("threadIdx.x")
- bx = tvm.thread_axis("vthread")
+ tx = te.thread_axis("threadIdx.x")
+ bx = te.thread_axis("vthread")
ib.scope_attr(tx, "thread_extent", nthread_tx)
ib.scope_attr(bx, "virtual_thread", nthread_bx)
tid = bx * nthread_tx + tx
values_out[base_idx + tid * axis_mul_after] = data[base_idx + tid * axis_mul_after]
if indices_out is not None:
indices_out[base_idx + tid * axis_mul_after] = \
- tvm.generic.cast(tid, indices_out.dtype)
- ib.emit(tvm.make.Call(None, 'tvm_storage_sync',
- tvm.convert(['shared']),
- tvm.expr.Call.Intrinsic, None, 0))
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ tvm.tir.generic.cast(tid, indices_out.dtype)
+ ib.emit(tvm.tir.Call(None, 'tvm_storage_sync',
+ tvm.runtime.convert(['shared']),
+ tvm.tir.Call.Intrinsic, None, 0))
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
with ib.for_range(0, axis_mul_before) as i:
with ib.for_range(0, axis_mul_after) as j:
with ib.if_scope(tid < idxd(current_sort_num + 1, 2)):
offset = base_idx + (2 * tid + idxm(k, 2)) * axis_mul_after
if is_ascend:
- cond = tvm.all(2 * tid + idxm(k, 2) + 1 < current_sort_num,
- values_out[offset] > values_out[offset + axis_mul_after])
+ cond = tvm.tir.all(2 * tid + idxm(k, 2) + 1 < current_sort_num,
+ values_out[offset] > values_out[offset + axis_mul_after])
else:
- cond = tvm.all(2 * tid + idxm(k, 2) + 1 < current_sort_num,
- values_out[offset] < values_out[offset + axis_mul_after])
+ cond = tvm.tir.all(2 * tid + idxm(k, 2) + 1 < current_sort_num,
+ values_out[offset] < values_out[offset + axis_mul_after])
with ib.if_scope(cond):
temp_data[0] = values_out[offset]
values_out[offset] = values_out[offset + axis_mul_after]
temp_index[0] = indices_out[offset]
indices_out[offset] = indices_out[offset + axis_mul_after]
indices_out[offset + axis_mul_after] = temp_index[0]
- ib.emit(tvm.make.Call(None, 'tvm_storage_sync',
- tvm.convert(['shared']),
- tvm.expr.Call.Intrinsic, None, 0))
+ ib.emit(tvm.tir.Call(None, 'tvm_storage_sync',
+ tvm.runtime.convert(['shared']),
+ tvm.tir.Call.Intrinsic, None, 0))
return ib.get()
elif i > axis:
axis_mul_after *= value
max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
data = ib.buffer_ptr(data)
valid_count = ib.buffer_ptr(valid_count)
output = ib.buffer_ptr(output)
nthread_tx = max_threads
nthread_bx = size // max_threads + 1
- tx = tvm.thread_axis("threadIdx.x")
- bx = tvm.thread_axis("vthread")
+ tx = te.thread_axis("threadIdx.x")
+ bx = te.thread_axis("vthread")
ib.scope_attr(tx, "thread_extent", nthread_tx)
ib.scope_attr(bx, "virtual_thread", nthread_bx)
tid = bx * nthread_tx + tx
temp_data = ib.allocate("float32", (1,), name="temp_data", scope="local")
temp_index = ib.allocate("int32", (1,), name="temp_index", scope="local")
- is_ascend = tvm.make.node("IntImm", dtype="int32", value=is_ascend)
+ is_ascend = tvm.ir.make_node("IntImm", dtype="int32", value=is_ascend)
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
with ib.for_range(0, axis_mul_before) as i:
with ib.for_range(0, axis_mul_after) as j:
with ib.for_range(0, current_sort_num) as k:
with ib.if_scope(tid < idxd(current_sort_num + 1, 2)):
offset = base_idx + (2 * tid + idxm(k, 2)) * axis_mul_after
- with ib.if_scope(tvm.all(is_ascend == 1, \
- 2 * tid + idxm(k, 2) + 1 < current_sort_num, \
- data[offset] > data[offset + axis_mul_after])):
+ with ib.if_scope(tvm.tir.all(is_ascend == 1, \
+ 2 * tid + idxm(k, 2) + 1 < current_sort_num, \
+ data[offset] > data[offset + axis_mul_after])):
temp_data[0] = data[offset]
data[offset] = data[offset + axis_mul_after]
data[offset + axis_mul_after] = temp_data[0]
temp_index[0] = output[offset]
output[offset] = output[offset + axis_mul_after]
output[offset + axis_mul_after] = temp_index[0]
- with ib.if_scope(tvm.all(is_ascend == 0, \
- 2 * tid + idxm(k, 2) + 1 < current_sort_num, \
- data[offset] < data[offset + axis_mul_after])):
+ with ib.if_scope(tvm.tir.all(is_ascend == 0, \
+ 2 * tid + idxm(k, 2) + 1 < current_sort_num, \
+ data[offset] < data[offset + axis_mul_after])):
temp_data[0] = data[offset]
data[offset] = data[offset + axis_mul_after]
data[offset + axis_mul_after] = temp_data[0]
temp_index[0] = output[offset]
output[offset] = output[offset + axis_mul_after]
output[offset + axis_mul_after] = temp_index[0]
- ib.emit(tvm.make.Call(None, 'tvm_storage_sync',
- tvm.convert(['shared']),
- tvm.expr.Call.Intrinsic, None, 0))
+ ib.emit(tvm.tir.Call(None, 'tvm_storage_sync',
+ tvm.runtime.convert(['shared']),
+ tvm.tir.Call.Intrinsic, None, 0))
return ib.get()
Parameters
----------
- data: tvm.Tensor
+ data: tvm.te.Tensor
The input array.
- valid_count : tvm.Tensor, optional
+ valid_count : tvm.te.Tensor, optional
The number of valid elements to be sorted.
axis : int, optional
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
The output of this function.
"""
if valid_count is not None:
sorted_data = identity(data)
- sorted_data_buf = api.decl_buffer(data.shape, data.dtype, "sorted_data_buf",
- data_alignment=8)
- valid_count_buf = api.decl_buffer(valid_count.shape, valid_count.dtype,
- "valid_count_buf", data_alignment=4)
- out_buf = api.decl_buffer(data.shape, "int32", "out_buf", data_alignment=4)
- out = tvm.extern([data.shape],
- [sorted_data, valid_count],
- lambda ins, outs: sort_nms_ir(
- ins[0], ins[1], outs[0], axis, is_ascend),
- dtype="int32",
- in_buffers=[sorted_data_buf, valid_count_buf],
- out_buffers=[out_buf],
- name="argsort_nms_gpu",
- tag="argsort_nms_gpu")
+ sorted_data_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "sorted_data_buf",
+ data_alignment=8)
+ valid_count_buf = tvm.tir.decl_buffer(valid_count.shape, valid_count.dtype,
+ "valid_count_buf", data_alignment=4)
+ out_buf = tvm.tir.decl_buffer(data.shape, "int32", "out_buf", data_alignment=4)
+ out = te.extern([data.shape],
+ [sorted_data, valid_count],
+ lambda ins, outs: sort_nms_ir(
+ ins[0], ins[1], outs[0], axis, is_ascend),
+ dtype="int32",
+ in_buffers=[sorted_data_buf, valid_count_buf],
+ out_buffers=[out_buf],
+ name="argsort_nms_gpu",
+ tag="argsort_nms_gpu")
else:
- value_buf = api.decl_buffer(data.shape, data.dtype, "value_buf", data_alignment=8)
- indices_buf = api.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8)
- out = tvm.extern([data.shape, data.shape],
- [data],
- lambda ins, outs: sort_ir(
- ins[0], outs[0], axis, is_ascend, indices_out=outs[1]),
- out_buffers=[value_buf, indices_buf],
- name="argsort_gpu",
- tag="argsort_gpu")[1]
+ value_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "value_buf", data_alignment=8)
+ indices_buf = tvm.tir.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8)
+ out = te.extern([data.shape, data.shape],
+ [data],
+ lambda ins, outs: sort_ir(
+ ins[0], outs[0], axis, is_ascend, indices_out=outs[1]),
+ out_buffers=[value_buf, indices_buf],
+ name="argsort_gpu",
+ tag="argsort_gpu")[1]
return out
def schedule_argsort(outs):
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
The input tensor.
k : int, optional
Returns
-------
- out : tvm.Tensor or List[tvm.Tensor]
+ out : tvm.te.Tensor or List[tvm.te.Tensor]
The computed result.
"""
assert ret_type in ["both", "values", "indices"]
ndim = len(data.shape)
axis = axis + ndim if axis < 0 else axis
assert 0 <= axis < ndim
- values_buf = api.decl_buffer(data.shape, data.dtype, "values_buf", data_alignment=8)
- indices_buf = api.decl_buffer(data.shape, dtype, "indices_buf", data_alignment=8)
+ values_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "values_buf", data_alignment=8)
+ indices_buf = tvm.tir.decl_buffer(data.shape, dtype, "indices_buf", data_alignment=8)
if ret_type == "values":
- output = tvm.extern([data.shape],
- [data],
- lambda ins, outs: sort_ir(
- ins[0], outs[0], axis, is_ascend),
- out_buffers=[values_buf],
- name="topk_gpu",
- tag="topk_gpu")
+ output = te.extern([data.shape],
+ [data],
+ lambda ins, outs: sort_ir(
+ ins[0], outs[0], axis, is_ascend),
+ out_buffers=[values_buf],
+ name="topk_gpu",
+ tag="topk_gpu")
else:
- output = tvm.extern([data.shape, data.shape],
- [data],
- lambda ins, outs: sort_ir(
- ins[0], outs[0], axis, is_ascend, indices_out=outs[1]),
- out_buffers=[values_buf, indices_buf],
- name="topk_gpu",
- tag="topk_gpu")
+ output = te.extern([data.shape, data.shape],
+ [data],
+ lambda ins, outs: sort_ir(
+ ins[0], outs[0], axis, is_ascend, indices_out=outs[1]),
+ out_buffers=[values_buf, indices_buf],
+ name="topk_gpu",
+ tag="topk_gpu")
if k < 1:
if ret_type == "indices":
return output[1]
# under the License.
# pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, too-many-statements, too-many-function-args
"""SSD multibox operators"""
-from __future__ import absolute_import as _abs
import math
import tvm
-
-from tvm import api
-from tvm.intrin import if_then_else, exp
+from tvm import te
+from tvm.tir import if_then_else, exp
import topi
"""
max_threads = int(math.sqrt(
tvm.target.Target.current(allow_none=False).max_num_threads))
- tx = tvm.thread_axis("threadIdx.x")
- ty = tvm.thread_axis("threadIdx.y")
- bx = tvm.thread_axis("blockIdx.x")
- by = tvm.thread_axis("blockIdx.y")
- ib = tvm.ir_builder.create()
+ tx = te.thread_axis("threadIdx.x")
+ ty = te.thread_axis("threadIdx.y")
+ bx = te.thread_axis("blockIdx.x")
+ by = te.thread_axis("blockIdx.y")
+ ib = tvm.tir.ir_builder.create()
p_out = ib.buffer_ptr(out)
in_height = data.shape[2]
in_width = data.shape[3]
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D with shape [batch, c_in, h_in, w_in]]
sizes : tuple of float
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
3-D tensor with shape [1, h_in * w_in * (num_sizes + num_ratios - 1), 4]
"""
num_sizes = len(sizes)
num_ratios = len(ratios)
oshape = (
1, data.shape[2] * data.shape[3] * (num_sizes + num_ratios - 1), 4)
- out = tvm.extern(oshape, [data], lambda ins, outs:
- multibox_prior_ir(
- ins[0], outs[0], sizes, ratios, steps, offsets),
- tag="multibox_prior")
+ out = te.extern(oshape, [data], lambda ins, outs:
+ multibox_prior_ir(
+ ins[0], outs[0], sizes, ratios, steps, offsets),
+ tag="multibox_prior")
if clip:
out = topi.clip(out, 0, 1)
return out
num_classes = cls_prob.shape[1]
num_anchors = cls_prob.shape[2]
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
cls_prob = ib.buffer_ptr(cls_prob)
cls_id = ib.buffer_ptr(temp_cls_id)
temp_valid_count = ib.buffer_ptr(temp_valid_count)
score = ib.buffer_ptr(temp_score)
- threshold = tvm.make.node("FloatImm", dtype="float32", value=threshold)
+ threshold = tvm.ir.make_node("FloatImm", dtype="float32", value=threshold)
max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
nthread_tx = max_threads
nthread_bx = (batch_size * num_anchors) // max_threads + 1
- tx = tvm.thread_axis("threadIdx.x")
- bx = tvm.thread_axis("blockIdx.x")
+ tx = te.thread_axis("threadIdx.x")
+ bx = te.thread_axis("blockIdx.x")
ib.scope_attr(tx, "thread_extent", nthread_tx)
ib.scope_attr(bx, "thread_extent", nthread_bx)
tid = bx * max_threads + tx
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
with ib.if_scope(tid < batch_size * num_anchors):
i = idxd(tid, num_anchors)
with ib.for_range(0, num_classes - 1) as k:
temp = cls_prob[i * num_classes * num_anchors + (k + 1) * num_anchors + j]
cls_id[tid] = if_then_else(temp > score[tid], k + 1, cls_id[tid])
- score[tid] = tvm.max(temp, score[tid])
- with ib.if_scope(tvm.all(cls_id[tid] > 0, score[tid] < threshold)):
+ score[tid] = tvm.te.max(temp, score[tid])
+ with ib.if_scope(tvm.tir.all(cls_id[tid] > 0, score[tid] < threshold)):
cls_id[tid] = 0
with ib.if_scope(cls_id[tid] > 0):
temp_valid_count[tid] = 1
with ib.for_range(0, num_anchors) as k:
with ib.if_scope(k > 0):
temp_valid_count[tid * num_anchors + k] += \
- temp_valid_count[tid * num_anchors + k - 1]
+ temp_valid_count[tid * num_anchors + k - 1]
valid_count[i] = temp_valid_count[tid * num_anchors + num_anchors - 1]
return ib.get()
oy = py * vy * ah + ay
ow = exp(pw * vw) * aw / 2.0
oh = exp(ph * vh) * ah / 2.0
- return tvm.if_then_else(clip, tvm.max(0.0, tvm.min(1.0, ox - ow)), ox - ow), \
- tvm.if_then_else(clip, tvm.max(0.0, tvm.min(1.0, oy - oh)), oy - oh), \
- tvm.if_then_else(clip, tvm.max(0.0, tvm.min(1.0, ox + ow)), ox + ow), \
- tvm.if_then_else(clip, tvm.max(0.0, tvm.min(1.0, oy + oh)), oy + oh)
+ return tvm.tir.if_then_else(clip, tvm.te.max(0.0, tvm.te.min(1.0, ox - ow)), ox - ow), \
+ tvm.tir.if_then_else(clip, tvm.te.max(0.0, tvm.te.min(1.0, oy - oh)), oy - oh), \
+ tvm.tir.if_then_else(clip, tvm.te.max(0.0, tvm.te.min(1.0, ox + ow)), ox + ow), \
+ tvm.tir.if_then_else(clip, tvm.te.max(0.0, tvm.te.min(1.0, oy + oh)), oy + oh)
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
loc_pred = ib.buffer_ptr(loc_pred)
anchor = ib.buffer_ptr(anchor)
max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
nthread_tx = max_threads
nthread_bx = (batch_size * num_anchors) // max_threads + 1
- tx = tvm.thread_axis("threadIdx.x")
- bx = tvm.thread_axis("blockIdx.x")
+ tx = te.thread_axis("threadIdx.x")
+ bx = te.thread_axis("blockIdx.x")
ib.scope_attr(tx, "thread_extent", nthread_tx)
ib.scope_attr(bx, "thread_extent", nthread_bx)
tid = bx * max_threads + tx
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
with ib.if_scope(tid < batch_size * num_anchors):
i = idxd(tid, num_anchors)
Parameters
----------
- cls_prob : tvm.Tensor
+ cls_prob : tvm.te.Tensor
Class probabilities.
- loc_pred : tvm.Tensor
+ loc_pred : tvm.te.Tensor
Location regression predictions.
- anchor : tvm.Tensor
+ anchor : tvm.te.Tensor
Prior anchor boxes.
clip : boolean
Returns
-------
- ret : tuple of tvm.Tensor composed of
+ ret : tuple of tvm.te.Tensor composed of
- out : tvm.Tensor
+ out : tvm.te.Tensor
3-D tensor with shape (batch_size, num_anchors, 6)
- valid_count : tvm.Tensor
+ valid_count : tvm.te.Tensor
1-D tensor with shape (batch_size,), number of valid anchor boxes.
"""
batch_size = cls_prob.shape[0]
valid_count_dtype = "int32"
out_loc_dtype = loc_pred.dtype
- valid_count_buf = api.decl_buffer((batch_size,), valid_count_dtype,
- "valid_count_buf", data_alignment=4)
- loc_pred_buf = api.decl_buffer(loc_pred.shape, loc_pred.dtype,
- "loc_pred_buf", data_alignment=8)
- anchor_buf = api.decl_buffer(anchor.shape, anchor.dtype,
- "anchor_buf", data_alignment=8)
+ valid_count_buf = tvm.tir.decl_buffer((batch_size,), valid_count_dtype,
+ "valid_count_buf", data_alignment=4)
+ loc_pred_buf = tvm.tir.decl_buffer(loc_pred.shape, loc_pred.dtype,
+ "loc_pred_buf", data_alignment=8)
+ anchor_buf = tvm.tir.decl_buffer(anchor.shape, anchor.dtype,
+ "anchor_buf", data_alignment=8)
- temp_valid_count_buf = api.decl_buffer(
+ temp_valid_count_buf = tvm.tir.decl_buffer(
(batch_size, num_anchors,), valid_count_dtype, "temp_valid_count", data_alignment=8)
- temp_cls_id_buf = api.decl_buffer(
+ temp_cls_id_buf = tvm.tir.decl_buffer(
(batch_size, num_anchors,), valid_count_dtype, "temp_cls_id", data_alignment=8)
- temp_score_buf = api.decl_buffer(
+ temp_score_buf = tvm.tir.decl_buffer(
(batch_size, num_anchors,), cls_prob.dtype, "temp_score", data_alignment=8)
valid_count, temp_valid_count, temp_cls_id, temp_score = \
- tvm.extern([(batch_size,), (batch_size, num_anchors,), (batch_size, num_anchors,), \
- (batch_size, num_anchors,)], [cls_prob],
- lambda ins, outs: transform_loc_pre(
- ins[0], outs[0], outs[1], outs[2], outs[3], threshold),
- dtype=[valid_count_dtype, valid_count_dtype, valid_count_dtype, cls_prob.dtype],
- out_buffers=[valid_count_buf, temp_valid_count_buf, \
- temp_cls_id_buf, temp_score_buf],
- tag="multibox_transform_loc_phase_one")
+ te.extern([(batch_size,), (batch_size, num_anchors,), (batch_size, num_anchors,), \
+ (batch_size, num_anchors,)], [cls_prob],
+ lambda ins, outs: transform_loc_pre(
+ ins[0], outs[0], outs[1], outs[2], outs[3], threshold),
+ dtype=[valid_count_dtype, valid_count_dtype, valid_count_dtype, cls_prob.dtype],
+ out_buffers=[valid_count_buf, temp_valid_count_buf, \
+ temp_cls_id_buf, temp_score_buf],
+ tag="multibox_transform_loc_phase_one")
out_loc = \
- tvm.extern([oshape],
- [loc_pred, anchor, temp_valid_count, temp_cls_id, temp_score],
- lambda ins, outs: transform_loc_ir(
- ins[0], ins[1], ins[2], ins[3], ins[4], outs[0], clip, variances, \
- batch_size, num_anchors),
- in_buffers=[loc_pred_buf, anchor_buf, temp_valid_count_buf, \
- temp_cls_id_buf, temp_score_buf],
- dtype=[out_loc_dtype],
- tag="multibox_transform_loc")
+ te.extern([oshape],
+ [loc_pred, anchor, temp_valid_count, temp_cls_id, temp_score],
+ lambda ins, outs: transform_loc_ir(
+ ins[0], ins[1], ins[2], ins[3], ins[4], outs[0], clip, variances, \
+ batch_size, num_anchors),
+ in_buffers=[loc_pred_buf, anchor_buf, temp_valid_count_buf, \
+ temp_cls_id_buf, temp_score_buf],
+ dtype=[out_loc_dtype],
+ tag="multibox_transform_loc")
return [out_loc, valid_count]
Parameters
----------
- cls_prob : tvm.Tensor
+ cls_prob : tvm.te.Tensor
Class probabilities.
- loc_pred : tvm.Tensor
+ loc_pred : tvm.te.Tensor
Location regression predictions.
- anchor : tvm.Tensor
+ anchor : tvm.te.Tensor
Prior anchor boxes.
clip : boolean
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
3-D tensor with shape (batch_size, num_anchors, 6)
"""
inter_out = multibox_transform_loc(cls_prob, loc_pred, anchor,
"""Tensor intrinsics on CUDA."""
#pylint: disable=invalid-name
import tvm
+from tvm import te
def dp4a(x_scope='local', y_scope='local', z_scope='local'):
"""
n = 4 # dp4a requires operands packed by 4
- x = tvm.placeholder((n,), name='x', dtype='int8')
- y = tvm.placeholder((n,), name='y', dtype='int8')
+ x = te.placeholder((n,), name='x', dtype='int8')
+ y = te.placeholder((n,), name='y', dtype='int8')
- k = tvm.reduce_axis((0, n), name='rc')
+ k = te.reduce_axis((0, n), name='rc')
- z = tvm.compute((1,), lambda i: tvm.sum(
+ z = te.compute((1,), lambda i: te.sum(
x[k].astype('int32') * y[k].astype('int32'), axis=[k]))
def _intrin_func(ins, outs):
if index == 1:
return zz.vstore(0, 0)
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
vec_x = xx.vload(0, dtype='int8x4')
vec_y = yy.vload(0, dtype='int8x4')
prev_z = 0 if index == 0 else zz.vload(0)
- new_z = tvm.call_pure_extern('int32', '__dp4a', vec_x, vec_y, prev_z)
+ new_z = tvm.tir.call_pure_extern('int32', '__dp4a', vec_x, vec_y, prev_z)
ib.emit(zz.vstore(0, new_z))
return ib.get()
return _instr(0), _instr(1), _instr(2) # body, reset, update
- with tvm.build_config(data_alignment=4, offset_factor=1) as cfg:
+ with tvm.target.build_config(data_alignment=4, offset_factor=1) as cfg:
scopes = {x: x_scope, y: y_scope, z: z_scope}
- binds = {t: tvm.decl_buffer(t.shape, t.dtype, t.op.name,
- data_alignment=cfg.data_alignment,
- offset_factor=cfg.offset_factor,
- scope=scopes[t]) for t in [x, y, z]}
+ binds = {t: tvm.tir.decl_buffer(t.shape, t.dtype, t.op.name,
+ data_alignment=cfg.data_alignment,
+ offset_factor=cfg.offset_factor,
+ scope=scopes[t]) for t in [x, y, z]}
- return tvm.decl_tensor_intrin(z.op, _intrin_func, binds=binds)
+ return te.decl_tensor_intrin(z.op, _intrin_func, binds=binds)
"""Schedule for vision operators"""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from .. import cpp
from .. import tag
from .pooling import schedule_pool
def _default_schedule(outs):
"""Default schedule for gpu."""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
scheduled_ops = []
def traverse(op):
if tag.is_broadcast(op.tag) or op.tag in ['bbox_score', 'sorted_bbox']:
# create schedule that dispatches to topi.cuda.schedule_injective
with tvm.target.create("cuda"):
- s = tvm.generic.schedule_injective(outs)
+ s = tvm.tir.generic.schedule_injective(outs)
"""
from __future__ import absolute_import as _abs
# pylint: disable=invalid-name, unused-variable, too-many-locals
# pylint: disable=unused-argument, redefined-builtin
"""Generic convolution schedules"""
-from __future__ import absolute_import as _abs
-import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity
from ..util import get_const_tuple
_, _, _, _, oc_bn = get_const_tuple(conv_out.shape)
# schedule pad
- if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \
+ if isinstance(s[data_vec].op, te.tensor.ComputeOp) \
and "pad" in data_vec.op.tag:
batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis
parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih)
# this part will be folded during Relay fold_constant pass.
s[data_vec].pragma(s[data_vec].op.axis[0], "debug_skip_region")
s[kernel_vec].pragma(s[kernel_vec].op.axis[0], "debug_skip_region")
- elif isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and \
+ elif isinstance(kernel_vec.op, te.tensor.ComputeOp) and \
kernel_vec.name == 'kernel_vec':
# data and kernel are not pre-computed, schedule layout transform here.
# this should only be used by x86 conv2d_nchw, which is for
_, _, _, _, oc_bn = get_const_tuple(conv_out.shape)
# schedule pad
- if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \
+ if isinstance(s[data_vec].op, te.tensor.ComputeOp) \
and "pad" in data_vec.op.tag:
batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis
parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih)
# this part will be folded during Relay fold_constant pass.
s[data_vec].pragma(s[data_vec].op.axis[0], "debug_skip_region")
s[kernel_vec].pragma(s[kernel_vec].op.axis[0], "debug_skip_region")
- elif isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and \
+ elif isinstance(kernel_vec.op, te.tensor.ComputeOp) and \
kernel_vec.name == 'kernel_vec':
# data and kernel are not pre-computed, schedule layout transform here.
# this should only be used by x86 conv2d_nchw, which is for
# under the License.
# pylint: disable=invalid-name
"""generic declaration and schedules."""
-from __future__ import absolute_import as _abs
-
import tvm
from .. import cpp
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
def schedule_injective_from_existing(sch, out):
"""Schedule for injective op from existing schedule.
target = tvm.target.Target.current(allow_none=False)
if target.target_name != "llvm":
raise RuntimeError("schedule_injective not registered for '%s'" % target)
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
x = outs[0]
- s = tvm.create_schedule([x.op for x in outs])
- tvm.schedule.AutoInlineInjective(s)
+ s = te.create_schedule([x.op for x in outs])
+ te.schedule.AutoInlineInjective(s)
schedule_injective_from_existing(s, x)
return s
# under the License.
# pylint: disable=invalid-name,unused-argument
"""Generic nn operators"""
-from __future__ import absolute_import as _abs
import tvm
+from tvm import te
def _default_schedule(outs, auto_inline):
"""Default schedule for llvm."""
target = tvm.target.Target.current(allow_none=False)
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
if target.target_name not in ("llvm", "c"):
raise RuntimeError("schedule not registered for '%s'" % target)
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
if auto_inline:
x = outs[0]
- tvm.schedule.AutoInlineInjective(s)
+ te.schedule.AutoInlineInjective(s)
s[x].fuse(s[x].op.axis)
return s
"""
# Typically this is computed in PreCompute pass
# so we make a schedule here for cpu llvm
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
output = outs[0]
_, G = s[output].op.input_tensors
s[G].compute_inline()
The computation schedule for the op.
"""
# Typically this is computed in PreCompute pass
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
return s
"""Generic vision operators"""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from .. import cpp
def _default_schedule(outs, auto_inline):
"""Default schedule for llvm."""
target = tvm.target.Target.current(allow_none=False)
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
if target.target_name != "llvm":
raise RuntimeError("schedule not registered for '%s'" % target)
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
if auto_inline:
x = outs[0]
- tvm.schedule.AutoInlineInjective(s)
+ te.schedule.AutoInlineInjective(s)
s[x].fuse(s[x].op.axis)
return s
# under the License.
"""Implementation of generic operators in the presence of Tensor"""
# pylint: disable=invalid-name, too-many-arguments
-from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from . import broadcast as _broadcast
from . import math as _math
Returns
-------
- ret : tvm.Tensor (if at least one operand is non-zero-rank Tensor)
+ ret : tvm.te.Tensor (if at least one operand is non-zero-rank Tensor)
tvm.Expr (otherwise)
The result of {op} operation.
"""
- if not isinstance(lhs, tvm.tensor.Tensor) and not isinstance(rhs, tvm.tensor.Tensor):
+ if not isinstance(lhs, te.tensor.Tensor) and not isinstance(rhs, te.tensor.Tensor):
return orig_bop(lhs, rhs)
return broadcast_bop(lhs, rhs)
_tensor_bop_impl.__doc__ = _tensor_bop_impl.__doc__.format(op=name)
"""Bind generic operators for Tensor."""
# Check __op_priority__ to make sure the binding happens only once.
__op_priority__ = 1
- if __op_priority__ > tvm.generic.__op_priority__:
- tvm.generic.__op_priority__ = __op_priority__
- tvm.generic.add = _make_bop(_broadcast.add, tvm.generic.add)
- tvm.generic.subtract = _make_bop(_broadcast.subtract, tvm.generic.subtract)
- tvm.generic.multiply = _make_bop(_broadcast.multiply, tvm.generic.multiply)
- tvm.generic.divide = _make_bop(_broadcast.divide, tvm.generic.divide)
- tvm.generic.cast = _math.cast
+ if __op_priority__ > tvm.tir.generic.__op_priority__:
+ tvm.tir.generic.__op_priority__ = __op_priority__
+ tvm.tir.generic.add = _make_bop(_broadcast.add, tvm.tir.generic.add)
+ tvm.tir.generic.subtract = _make_bop(_broadcast.subtract, tvm.tir.generic.subtract)
+ tvm.tir.generic.multiply = _make_bop(_broadcast.multiply, tvm.tir.generic.multiply)
+ tvm.tir.generic.divide = _make_bop(_broadcast.divide, tvm.tir.generic.divide)
+ tvm.tir.generic.cast = _math.cast
_bind_generic_ops()
# pylint: disable=invalid-name, unused-variable,
"""Schedule for composition of injective operator"""
import tvm
+from tvm import te
def schedule_injective_from_existing(sch, out):
"""Schedule for injective op from existing schedule.
"""
fused = sch[out].fuse(*sch[out].op.axis)
px, x = sch[out].split(fused, nparts=1)
- sch[out].bind(px, tvm.thread_axis("pipeline"))
+ sch[out].bind(px, te.thread_axis("pipeline"))
return sch
def schedule_injective(outs):
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
- tvm.schedule.AutoInlineInjective(s)
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
+ tvm.te.schedule.AutoInlineInjective(s)
for out in outs:
schedule_injective_from_existing(s, out)
return s
"""HLS nn operators"""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from .. import tag
def _schedule_conv2d(outs):
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
- tvm.schedule.AutoInlineInjective(s)
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
+ tvm.te.schedule.AutoInlineInjective(s)
def traverse(OP):
"""Internal traverse function"""
if OP not in s.outputs:
s[OP].compute_inline()
for tensor in OP.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp):
+ if isinstance(tensor.op, tvm.te.ComputeOp):
traverse(tensor.op)
# schedule conv2d
elif OP.tag.find("conv2d") >= 0:
traverse(outs[0].op)
px, x = s[outs[0]].split(outs[0].op.axis[0], nparts=1)
- s[outs[0]].bind(px, tvm.thread_axis("pipeline"))
+ s[outs[0]].bind(px, te.thread_axis("pipeline"))
return s
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
- tvm.schedule.AutoInlineInjective(s)
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
+ tvm.te.schedule.AutoInlineInjective(s)
def traverse(OP):
"""Internal traverse function"""
if OP not in s.outputs:
s[OP].compute_inline()
for tensor in OP.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp):
+ if isinstance(tensor.op, tvm.te.ComputeOp):
traverse(tensor.op)
elif OP.tag in ["comm_reduce", "comm_reduce_idx"]:
if OP.tag == "comm_reduce":
fused = s[outs[0]].fuse()
px, x = s[outs[0]].split(fused, nparts=1)
- s[outs[0]].bind(px, tvm.thread_axis("pipeline"))
+ s[outs[0]].bind(px, te.thread_axis("pipeline"))
return s
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
- tvm.schedule.AutoInlineInjective(s)
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
+ tvm.te.schedule.AutoInlineInjective(s)
softmax = outs[0]
s[max_elem].compute_at(s[softmax], s[softmax].op.axis[1])
px, x = s[softmax].split(softmax.op.axis[0], nparts=1)
- s[softmax].bind(px, tvm.thread_axis("pipeline"))
+ s[softmax].bind(px, te.thread_axis("pipeline"))
return s
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
- tvm.schedule.AutoInlineInjective(s)
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
+ tvm.te.schedule.AutoInlineInjective(s)
def traverse(OP):
"""Internal traverse function"""
if OP not in s.outputs:
s[OP].compute_inline()
for tensor in OP.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp):
+ if isinstance(tensor.op, tvm.te.ComputeOp):
traverse(tensor.op)
# schedule dense
elif OP.tag == 'dense':
traverse(outs[0].op)
px, x = s[outs[0]].split(outs[0].op.axis[0], nparts=1)
- s[outs[0]].bind(px, tvm.thread_axis("pipeline"))
+ s[outs[0]].bind(px, te.thread_axis("pipeline"))
return s
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
- tvm.schedule.AutoInlineInjective(s)
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
+ tvm.te.schedule.AutoInlineInjective(s)
def traverse(OP):
"""Internal traverse function"""
if OP not in s.outputs:
s[OP].compute_inline()
for tensor in OP.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp):
+ if isinstance(tensor.op, tvm.te.ComputeOp):
traverse(tensor.op)
# schedule pool
elif OP.tag.startswith('pool'):
traverse(outs[0].op)
px, x = s[outs[0]].split(outs[0].op.axis[0], nparts=1)
- s[outs[0]].bind(px, tvm.thread_axis("pipeline"))
+ s[outs[0]].bind(px, te.thread_axis("pipeline"))
return s
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
- tvm.schedule.AutoInlineInjective(s)
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
+ tvm.te.schedule.AutoInlineInjective(s)
def traverse(OP):
"""Internal traverse function"""
if OP not in s.outputs:
s[OP].compute_inline()
for tensor in OP.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp):
+ if isinstance(tensor.op, tvm.te.ComputeOp):
traverse(tensor.op)
# schedule global_pool
elif OP.tag.startswith('adaptive_pool'):
traverse(outs[0].op)
px, x = s[outs[0]].split(outs[0].op.axis[0], nparts=1)
- s[outs[0]].bind(px, tvm.thread_axis("pipeline"))
+ s[outs[0]].bind(px, te.thread_axis("pipeline"))
return s
"""TVM operator input resize compute."""
from __future__ import absolute_import
import tvm
+from tvm import te
from topi.util import nchw_pack_layout, nchw_xc_layout
from .. import tag
def get_2d_pixel(data, layout, boxes, image_height, image_width, n, c, y, x, cc, ib, ic):
""" Get 2d pixel """
if boxes is None:
- y = tvm.max(tvm.min(y, image_height - 1), 0)
- x = tvm.max(tvm.min(x, image_width - 1), 0)
+ y = tvm.te.max(tvm.te.min(y, image_height - 1), 0)
+ x = tvm.te.max(tvm.te.min(x, image_width - 1), 0)
if layout == 'NHWC':
return data(n, y, x, c).astype('float')
if layout == 'NCHW':
indices : tuple
The indices of input data
- data : tvm.Tensor
+ data : tvm.te.Tensor
inputs is a 4-D tensor with shape
[batch, channel, in_height, in_width]
or [batch, in_height, in_width, channel]
target_width : integer
The target resized image width
- boxes : tvm.Tensor, optional
+ boxes : tvm.te.Tensor, optional
A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies
the coordinates of a box.
- box_indices : tvm.Tensor, optional
+ box_indices : tvm.te.Tensor, optional
A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that
the i-th box refers to.
in_x = w_scale * x
if coordinate_transformation_mode == "align_corners" or boxes is not None:
- closest_x_index = tvm.round(in_x).astype("int32")
- closest_y_index = tvm.round(in_y).astype("int32")
+ closest_x_index = te.round(in_x).astype("int32")
+ closest_y_index = te.round(in_y).astype("int32")
else:
# Add epsilon to floor to prevent gpu rounding errors.
epsilon = 1e-5
- closest_y_index = tvm.floor(in_y + epsilon).astype('int32')
- closest_x_index = tvm.floor(in_x + epsilon).astype('int32')
+ closest_y_index = te.floor(in_y + epsilon).astype('int32')
+ closest_x_index = te.floor(in_x + epsilon).astype('int32')
value = get_2d_pixel(data, layout, boxes, image_height, image_width,
box_idx, c, closest_y_index, closest_x_index, cc, inum, ic)
if extrapolation_value is not None:
- out = tvm.if_then_else(in_y < 0,
- extrapolation_value,
- tvm.if_then_else(in_y > image_height - 1,
- extrapolation_value,
- value))
+ out = tvm.tir.if_then_else(in_y < 0,
+ extrapolation_value,
+ tvm.tir.if_then_else(in_y > image_height - 1,
+ extrapolation_value,
+ value))
# use extrapolation_value if in_x is out of boundary
- value = tvm.if_then_else(in_x < 0,
- extrapolation_value,
- tvm.if_then_else(in_x > image_width - 1,
- extrapolation_value,
- out))
+ value = tvm.tir.if_then_else(in_x < 0,
+ extrapolation_value,
+ tvm.tir.if_then_else(in_x > image_width - 1,
+ extrapolation_value,
+ out))
return _cast_output(value, data.dtype, out_dtype=out_dtype)
indices : tuple
The indices of input data
- data : tvm.Tensor
+ data : tvm.te.Tensor
inputs is a 4-D tensor with shape
[batch, channel, in_height, in_width]
or [batch, in_height, in_width, channel]
target_width : integer
The target resized image width
- boxes : tvm.Tensor, optional
+ boxes : tvm.te.Tensor, optional
A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies
the coordinates of a box.
- box_indices : tvm.Tensor, optional
+ box_indices : tvm.te.Tensor, optional
A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that
the i-th box refers to.
in_y = h_scale * y
in_x = w_scale * x
- top_y_index = tvm.floor(in_y).astype('int32')
- bottom_y_index = tvm.ceil(in_y).astype('int32')
+ top_y_index = te.floor(in_y).astype('int32')
+ bottom_y_index = te.ceil(in_y).astype('int32')
y_lerp = in_y - top_y_index
- left_x_index = tvm.floor(in_x).astype('int32')
- right_x_index = tvm.ceil(in_x).astype('int32')
+ left_x_index = te.floor(in_x).astype('int32')
+ right_x_index = te.ceil(in_x).astype('int32')
x_lerp = in_x - left_x_index
top_left = get_2d_pixel(data, layout, boxes, image_height, image_width,
# use extrapolation_value if in_y/in_x is out of boundary
if extrapolation_value is not None:
- out = tvm.if_then_else(in_y < 0,
- extrapolation_value,
- tvm.if_then_else(in_y > image_height - 1,
- extrapolation_value,
- value))
- value = tvm.if_then_else(in_x < 0,
- extrapolation_value,
- tvm.if_then_else(in_x > image_width - 1,
- extrapolation_value,
- out))
+ out = tvm.tir.if_then_else(in_y < 0,
+ extrapolation_value,
+ tvm.tir.if_then_else(in_y > image_height - 1,
+ extrapolation_value,
+ value))
+ value = tvm.tir.if_then_else(in_x < 0,
+ extrapolation_value,
+ tvm.tir.if_then_else(in_x > image_width - 1,
+ extrapolation_value,
+ out))
return _cast_output(value, data.dtype, out_dtype=out_dtype)
indices : tuple
The indices of input data
- data : tvm.Tensor
+ data : tvm.te.Tensor
inputs is a 4-D tensor with shape
[batch, channel, in_height, in_width]
or [batch, in_height, in_width, channel]
target_width : integer
The target resized image width
- boxes : tvm.Tensor, optional
+ boxes : tvm.te.Tensor, optional
A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies
the coordinates of a box.
- box_indices : tvm.Tensor, optional
+ box_indices : tvm.te.Tensor, optional
A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that
the i-th box refers to.
in_y = h_scale * y
in_x = w_scale * x
- xint = tvm.floor(in_x).astype('int32')
- xfract = in_x - tvm.floor(in_x)
+ xint = te.floor(in_x).astype('int32')
+ xfract = in_x - te.floor(in_x)
- yint = tvm.floor(in_y).astype('int32')
- yfract = in_y - tvm.floor(in_y)
+ yint = te.floor(in_y).astype('int32')
+ yfract = in_y - te.floor(in_y)
# 1st row
p00 = _get_pixel(data, layout, boxes, image_height, image_width,
# use extrapolation_value if in_y/in_x is out of boundary
if extrapolation_value is not None:
- out = tvm.if_then_else(in_y < 0,
- extrapolation_value,
- tvm.if_then_else(in_y > image_height - 1,
- extrapolation_value,
- value))
- value = tvm.if_then_else(in_x < 0,
- extrapolation_value,
- tvm.if_then_else(in_x > image_width - 1,
- extrapolation_value,
- out))
+ out = tvm.tir.if_then_else(in_y < 0,
+ extrapolation_value,
+ tvm.tir.if_then_else(in_y > image_height - 1,
+ extrapolation_value,
+ value))
+ value = tvm.tir.if_then_else(in_x < 0,
+ extrapolation_value,
+ tvm.tir.if_then_else(in_x > image_width - 1,
+ extrapolation_value,
+ out))
return _cast_output(value, data.dtype, out_dtype=out_dtype)
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
inputs is a 4-D tensor with shape
[batch, channel, in_height, in_width]
or [batch, in_height, in_width, channel]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [batch, channel, in_height*scale, in_width*scale]
or [batch, in_height*scale, in_width*scale, channel]
or 5-D with shape [batch, channel-major, in_height*scale, in_width*scale, channel-minor]
return resize_nearest_neighbor(indices, data, in_h, in_w,
size[0], size[1], layout=layout,
coordinate_transformation_mode= \
- coordinate_transformation_mode,
+ coordinate_transformation_mode,
out_dtype=out_dtype)
def _bilinear(*indices):
return resize_bilinear(indices, data, in_h, in_w,
size[0], size[1], layout=layout,
coordinate_transformation_mode= \
- coordinate_transformation_mode,
+ coordinate_transformation_mode,
out_dtype=out_dtype)
def _bicubic(*indices):
return resize_bicubic(indices, data, in_h, in_w,
size[0], size[1], layout,
coordinate_transformation_mode= \
- coordinate_transformation_mode,
+ coordinate_transformation_mode,
out_dtype=out_dtype)
# Determine which interpolation method to use then run it.
else:
raise ValueError('%s method is not supported.' % method)
- return tvm.compute(output_shape, compute_func, name='resize', tag=tag.INJECTIVE)
+ return te.compute(output_shape, compute_func, name='resize', tag=tag.INJECTIVE)
def crop_and_resize(data, boxes, box_indices, crop_size, layout="NCHW",
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
inputs is a 4-D tensor with shape
[batch, channel, in_height, in_width]
or [batch, in_height, in_width, channel]
- boxes : tvm.Tensor
+ boxes : tvm.te.Tensor
A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies
the coordinates of a box.
- box_indices : tvm.Tensor
+ box_indices : tvm.te.Tensor
A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that
the i-th box refers to.
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [num_boxes, channel, crop_height, crop_width]
or [num_boxes, crop_height, crop_width, channel]
"""
else:
raise ValueError('%s method is not supported.' % method)
- return tvm.compute(output_shape, compute_func, name='crop_and_resize', tag=tag.INJECTIVE)
+ return te.compute(output_shape, compute_func, name='crop_and_resize', tag=tag.INJECTIVE)
"""Perform resize operation on the data.
Parameters
----------
- inputs: tvm.Tensor
+ inputs: tvm.te.Tensor
inputs is a 5-D tensor with shape
[batch, channel, in_depth, in_height, in_width]
or [batch, in_depth, in_height, in_width, channel]
Type to return. If left None will be same as input type.
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
5-D with shape [batch, channel, in_depth*scale, in_height*scale, in_width*scale]
or [batch, in_depth*scale, in_height*scale, in_width*scale, channel]
or 5-D with shape [batch, channel-major, in_depth*scale, in_height*scale, in_width*scale,
coordinate_transformation_mode))
def _get_pixel(n, c, z, y, x, cc):
- z = tvm.max(tvm.min(z, in_d - 1), 0)
- y = tvm.max(tvm.min(y, in_h - 1), 0)
- x = tvm.max(tvm.min(x, in_w - 1), 0)
+ z = tvm.te.max(tvm.te.min(z, in_d - 1), 0)
+ y = tvm.te.max(tvm.te.min(y, in_h - 1), 0)
+ x = tvm.te.max(tvm.te.min(x, in_w - 1), 0)
if layout == 'NDHWC':
return data(n, z, y, x, c).astype('float')
if layout == 'NCDHW':
in_x = x_ratio * x
if coordinate_transformation_mode == "align_corners":
- zint = tvm.round(in_z).astype('int32')
- yint = tvm.round(in_y).astype('int32')
- xint = tvm.round(in_x).astype('int32')
+ zint = te.round(in_z).astype('int32')
+ yint = te.round(in_y).astype('int32')
+ xint = te.round(in_x).astype('int32')
elif coordinate_transformation_mode in ["asymmetric", "half_pixel"]:
# Add epsilon to floor to prevent gpu rounding errors.
epsilon = 1e-5
- zint = tvm.floor(in_z + epsilon).astype('int32')
- yint = tvm.floor(in_y + epsilon).astype('int32')
- xint = tvm.floor(in_x + epsilon).astype('int32')
+ zint = te.floor(in_z + epsilon).astype('int32')
+ yint = te.floor(in_y + epsilon).astype('int32')
+ xint = te.floor(in_x + epsilon).astype('int32')
else:
raise ValueError("Unsupported coordinate_transformation_mode: {}".format(
coordinate_transformation_mode))
in_y = y_ratio * y
in_x = x_ratio * x
- zint = tvm.floor(in_z).astype('int32')
- zfract = in_z - tvm.floor(in_z)
+ zint = te.floor(in_z).astype('int32')
+ zfract = in_z - te.floor(in_z)
- xint = tvm.floor(in_x).astype('int32')
- xfract = in_x - tvm.floor(in_x)
+ xint = te.floor(in_x).astype('int32')
+ xfract = in_x - te.floor(in_x)
- yint = tvm.floor(in_y).astype('int32')
- yfract = in_y - tvm.floor(in_y)
+ yint = te.floor(in_y).astype('int32')
+ yfract = in_y - te.floor(in_y)
p000 = _get_pixel(n, c, zint, yint, xint, cc)
p001 = _get_pixel(n, c, zint, yint, xint + 1, cc)
else:
raise ValueError('%s method is not supported.' % method)
- return tvm.compute(output_shape, compute_func, name='resize3d', tag=tag.INJECTIVE)
+ return te.compute(output_shape, compute_func, name='resize3d', tag=tag.INJECTIVE)
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity
xo, xi = s[tensor].split(x, x_factor)
s[tensor].reorder(zo, yo, xo, zi, yi, xi)
- thread_z = tvm.thread_axis((0, z_factor), "threadIdx.z")
- thread_y = tvm.thread_axis((0, y_factor), "threadIdx.y")
- thread_x = tvm.thread_axis((0, x_factor), "threadIdx.x")
- s[tensor].bind(zo, tvm.thread_axis("blockIdx.z"))
+ thread_z = te.thread_axis((0, z_factor), "threadIdx.z")
+ thread_y = te.thread_axis((0, y_factor), "threadIdx.y")
+ thread_x = te.thread_axis((0, x_factor), "threadIdx.x")
+ s[tensor].bind(zo, te.thread_axis("blockIdx.z"))
s[tensor].bind(zi, thread_z)
- s[tensor].bind(yo, tvm.thread_axis("blockIdx.y"))
+ s[tensor].bind(yo, te.thread_axis("blockIdx.y"))
s[tensor].bind(yi, thread_y)
- s[tensor].bind(xo, tvm.thread_axis("blockIdx.x"))
+ s[tensor].bind(xo, te.thread_axis("blockIdx.x"))
s[tensor].bind(xi, thread_x)
return xi, thread_z, thread_y, thread_x
ic_chunk = ic // ic_bn
oc_chunk = oc // oc_bn
- data = tvm.compute((n, ic_chunk, ih, iw, ic_bn),
- lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w],
- name="data_vec")
+ data = te.compute((n, ic_chunk, ih, iw, ic_bn),
+ lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w],
+ name="data_vec")
- kernel = tvm.compute(
+ kernel = te.compute(
(oc_chunk, ic_chunk, kh, kw, ic_bn, oc_bn),
lambda occ, icc, k_h, k_w, icb, ocb:
kernel[occ * oc_bn + ocb,
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
5-D with shape [num_filter, in_channel, filter_height, filter_width, nnum_filter_vec]
stride : int or a list/tuple of two ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
if len(data.shape) == 5:
_create_schedule_template(cfg, data_shape, kernel_shape, strides, padding, dilation)
if cfg.is_fallback:
- _get_default_config(cfg, tvm.placeholder((batch, in_channel, ih, iw), dtype=data.dtype),
- tvm.placeholder((num_filter, in_channel, kernel_height, kernel_width),
- dtype=kernel.dtype),
+ _get_default_config(cfg, te.placeholder((batch, in_channel, ih, iw), dtype=data.dtype),
+ te.placeholder((num_filter, in_channel, kernel_height, kernel_width),
+ dtype=kernel.dtype),
strides, padding, out_dtype)
ic_bn = cfg["tile_ic"].val if hasattr(cfg["tile_ic"], "val") else cfg["tile_ic"].size[-1]
out_width = simplify((iw - kernel_width + pad_left + pad_right) // stride_w + 1)
oshape = (batch, out_channel // oc_bn, out_height, out_width, oc_bn)
- rc = tvm.reduce_axis((0, in_channel), name='rc')
- ry = tvm.reduce_axis((0, kernel_height), name='ry')
- rx = tvm.reduce_axis((0, kernel_width), name='rx')
+ rc = te.reduce_axis((0, in_channel), name='rc')
+ ry = te.reduce_axis((0, kernel_height), name='ry')
+ rx = te.reduce_axis((0, kernel_width), name='rx')
block_h = cfg["block_oh"].val
block_w = cfg["block_ow"].val
else:
temp = data
- conv = tvm.compute(
+ conv = te.compute(
cshape,
lambda nn, ff, yy, xx, ff_v: \
- tvm.sum(
- temp[nn, rc//ic_bn, yy * stride_h + ry, xx * stride_w + rx, rc%ic_bn]. \
- astype(out_dtype) *
- kernel[ff, rc//ic_bn, ry, rx, rc%ic_bn, ff_v].astype(out_dtype),
- axis=[rc, ry, rx]), tag="conv2d_NCHWc", name='conv2d_NCHWc')
+ te.sum(
+ temp[nn, rc//ic_bn, yy * stride_h + ry, xx * stride_w + rx, rc%ic_bn]. \
+ astype(out_dtype) *
+ kernel[ff, rc//ic_bn, ry, rx, rc%ic_bn, ff_v].astype(out_dtype),
+ axis=[rc, ry, rx]), tag="conv2d_NCHWc", name='conv2d_NCHWc')
if DOUNPACK:
- output = tvm.compute(
+ output = te.compute(
oshape,
lambda nn, ff, yy, xx, ff_v:
conv[nn][ff][yy][xx][ff_v],
s: Schedule
The computation schedule for conv2d_nchw.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
"""inline all one-to-one-mapping operators except the last stage (output)"""
# this part will be folded during Relay fold_constant pass.
s[data].pragma(s[data].op.axis[0], "debug_skip_region")
s[kernel].pragma(s[kernel].op.axis[0], "debug_skip_region")
- elif isinstance(kernel.op, tvm.tensor.ComputeOp) and kernel.name == "kernel_vec":
+ elif isinstance(kernel.op, tvm.te.ComputeOp) and kernel.name == "kernel_vec":
# data and kernel are not pre-computed, schedule layout transform here.
# TODO(@Laurawly): Add schedule for data and kernel pack
pass
z_factor = 1
y_factor = 1
x_factor = 16
- thread_z = tvm.thread_axis((0, z_factor), "threadIdx.z")
- thread_y = tvm.thread_axis((0, y_factor), "threadIdx.y")
- thread_x = tvm.thread_axis((0, x_factor), "threadIdx.x")
+ thread_z = te.thread_axis((0, z_factor), "threadIdx.z")
+ thread_y = te.thread_axis((0, y_factor), "threadIdx.y")
+ thread_x = te.thread_axis((0, x_factor), "threadIdx.x")
_, co, oh, ow, vc = s[conv].op.axis
ooh, ioh = s[conv].split(oh, factor=OUTPUT_BLOCK_HEIGHT)
oow, iow = s[conv].split(ow, factor=OUTPUT_BLOCK_WIDTH)
s[conv].bind(oohi, thread_z)
s[conv].bind(oowi, thread_y)
s[conv].bind(vci, thread_x)
- s[conv].bind(ooho, tvm.thread_axis("blockIdx.z"))
- s[conv].bind(oowo, tvm.thread_axis("blockIdx.y"))
- s[conv].bind(coi, tvm.thread_axis("blockIdx.x"))
+ s[conv].bind(ooho, te.thread_axis("blockIdx.z"))
+ s[conv].bind(oowo, te.thread_axis("blockIdx.y"))
+ s[conv].bind(coi, te.thread_axis("blockIdx.x"))
# schedule conv_L
s[conv_L].compute_at(s[conv], vci)
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
4-D with shape [num_filter, in_channel, filter_height, filter_width]
stride : int or a list/tuple of two ints
stride size, or [stride_height, stride_width]
padding size, or [pad_height, pad_width]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
assert data.shape[0].value == 1, "only support batch size=1 convolution on intel gpu"
s: Schedule
The computation schedule for conv2d_nchw.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
"""inline all one-to-one-mapping operators except the last stage (output)"""
out_width = simplify((in_width - kernel_w + pad_left + pad_right) // stride_w + 1)
oshape = (batch, out_channel, out_height, out_width)
- rc = tvm.reduce_axis((0, in_channel), name='rc')
- ry = tvm.reduce_axis((0, kernel_h), name='ry')
- rx = tvm.reduce_axis((0, kernel_w), name='rx')
+ rc = te.reduce_axis((0, in_channel), name='rc')
+ ry = te.reduce_axis((0, kernel_h), name='ry')
+ rx = te.reduce_axis((0, kernel_w), name='rx')
if stride_h == 2:
if num_filter + kernel_h == 515:
cshape = (batch, out_channel // nv, c_h, c_w, nv)
kvshape = (num_filter // nv, channel, kernel_h, kernel_w, nv)
- kernel_vec = tvm.compute(
+ kernel_vec = te.compute(
kvshape,
lambda co, ci, kh, kw, vc:
kernel[co*nv + vc][ci][kh][kw], name='kernel_vec')
- conv = tvm.compute(
+ conv = te.compute(
cshape,
lambda nn, ff, yy, xx, vc: \
- tvm.sum(
- temp[nn, rc, yy * stride_h + ry, xx * stride_w + rx].astype(out_dtype) *
- kernel_vec[ff, rc, ry, rx, vc].astype(out_dtype),
- axis=[rc, ry, rx]), name='conv', attrs=attrs)
+ te.sum(
+ temp[nn, rc, yy * stride_h + ry, xx * stride_w + rx].astype(out_dtype) *
+ kernel_vec[ff, rc, ry, rx, vc].astype(out_dtype),
+ axis=[rc, ry, rx]), name='conv', attrs=attrs)
- output = tvm.compute(
+ output = te.compute(
oshape,
lambda nn, ff, yy, xx:
conv[nn][ff//nv][yy][xx][ff%nv],
z_factor = 1
y_factor = 1
x_factor = 16
- thread_z = tvm.thread_axis((0, z_factor), "threadIdx.z")
- thread_y = tvm.thread_axis((0, y_factor), "threadIdx.y")
- thread_x = tvm.thread_axis((0, x_factor), "threadIdx.x")
+ thread_z = te.thread_axis((0, z_factor), "threadIdx.z")
+ thread_y = te.thread_axis((0, y_factor), "threadIdx.y")
+ thread_x = te.thread_axis((0, x_factor), "threadIdx.x")
_, co, oh, ow, vc = s[conv].op.axis
ooh, ioh = s[conv].split(oh, factor=OUTPUT_BLOCK_HEIGHT)
oow, iow = s[conv].split(ow, factor=OUTPUT_BLOCK_WIDTH)
s[conv].bind(oohi, thread_z)
s[conv].bind(oowi, thread_y)
s[conv].bind(vci, thread_x)
- s[conv].bind(ooho, tvm.thread_axis("blockIdx.z"))
- s[conv].bind(oowo, tvm.thread_axis("blockIdx.y"))
- s[conv].bind(coi, tvm.thread_axis("blockIdx.x"))
+ s[conv].bind(ooho, te.thread_axis("blockIdx.z"))
+ s[conv].bind(oowo, te.thread_axis("blockIdx.y"))
+ s[conv].bind(coi, te.thread_axis("blockIdx.x"))
# schedule conv_L
s[conv_L].compute_at(s[conv], vci)
"""Conv2D alter op and legalize functions for x86"""
import tvm
+from tvm import te
from tvm import relay
from tvm import autotvm
new_attrs['out_layout'] = 'NCHW%dc' % oc_bn
# Store altered operator's config
- new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn),
- dtype=data_dtype)
- new_kernel = tvm.placeholder((out_channel//oc_bn, in_channel//ic_bn,
- kh, kw, ic_bn, oc_bn), dtype=kernel_dtype)
+ new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn),
+ dtype=data_dtype)
+ new_kernel = te.placeholder((out_channel//oc_bn, in_channel//ic_bn,
+ kh, kw, ic_bn, oc_bn), dtype=kernel_dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, strides, padding, dilation, new_attrs["data_layout"],
new_attrs["out_layout"], out_dtype], "conv2d_NCHWc.intel_graphics")
# pylint: disable=invalid-name
"""Schedule for depthwise_conv2d with auto fusion"""
import tvm
+from tvm import te
from tvm import autotvm
from ..util import traverse_inline
from .. import tag
s: Schedule
The computation schedule for depthwise_conv2d nchw.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'depthwise_conv2d_nchw':
##### space definition end #####
s[pad_data].compute_inline()
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag:
s[kernel].compute_inline()
if conv.op in s.outputs:
kernel_scope, n = s[output].split(n, nparts=1)
bf = s[output].fuse(n, bf)
- s[output].bind(bf, tvm.thread_axis("blockIdx.z"))
- s[output].bind(by, tvm.thread_axis("blockIdx.y"))
- s[output].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[output].bind(vf, tvm.thread_axis("vthread"))
- s[output].bind(vy, tvm.thread_axis("vthread"))
- s[output].bind(vx, tvm.thread_axis("vthread"))
- s[output].bind(tf, tvm.thread_axis("threadIdx.z"))
- s[output].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[output].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(bf, te.thread_axis("blockIdx.z"))
+ s[output].bind(by, te.thread_axis("blockIdx.y"))
+ s[output].bind(bx, te.thread_axis("blockIdx.x"))
+ s[output].bind(vf, te.thread_axis("vthread"))
+ s[output].bind(vy, te.thread_axis("vthread"))
+ s[output].bind(vx, te.thread_axis("vthread"))
+ s[output].bind(tf, te.thread_axis("threadIdx.z"))
+ s[output].bind(ty, te.thread_axis("threadIdx.y"))
+ s[output].bind(tx, te.thread_axis("threadIdx.x"))
s[output].reorder(bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi)
s[OL].compute_at(s[output], tx)
fused, tx = s[load].split(fused, cfg["tile_x"].size[2])
fused, ty = s[load].split(fused, cfg["tile_y"].size[2])
fused, tz = s[load].split(fused, cfg["tile_f"].size[2])
- s[load].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[load].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.z"))
+ s[load].bind(ty, te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val)
s[output].pragma(kernel_scope, 'unroll_explicit', cfg['unroll_explicit'].val)
s: Schedule
The computation schedule for depthwise_conv2d nhwc.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(temp, Filter, DepthwiseConv2d):
s[temp].compute_inline()
Output = outs[0].op.output(0)
s[DepthwiseConv2d].set_scope("local")
- block_x = tvm.thread_axis("blockIdx.x")
- thread_x = tvm.thread_axis("threadIdx.x")
+ block_x = te.thread_axis("blockIdx.x")
+ thread_x = te.thread_axis("threadIdx.x")
b, h, w, c = s[Output].op.axis
# num_thread here could be 728, it is larger than cuda.max_num_threads
- num_thread = tvm.ir_pass.Simplify(temp.shape[3]).value
+ num_thread = tvm.tir.ir_pass.Simplify(temp.shape[3]).value
target = tvm.target.Target.current()
if target and (target.target_name not in ["cuda", "nvptx"]):
num_thread = target.max_num_threads
if OP.tag == 'depthwise_conv2d_nhwc':
PaddedInput = OP.input_tensors[0]
Filter = OP.input_tensors[1]
- if isinstance(Filter.op, tvm.tensor.ComputeOp) and 'dilate' in Filter.op.tag:
+ if isinstance(Filter.op, tvm.te.ComputeOp) and 'dilate' in Filter.op.tag:
s[Filter].compute_inline()
DepthwiseConv2d = OP.output(0)
_schedule(PaddedInput, Filter, DepthwiseConv2d)
The computation schedule for depthwise_conv2d backward
wrt input with layout nhwc.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(Padded_out_grad, In_grad):
s[Padded_out_grad].compute_inline()
- block_x = tvm.thread_axis("blockIdx.x")
- thread_x = tvm.thread_axis("threadIdx.x")
+ block_x = te.thread_axis("blockIdx.x")
+ thread_x = te.thread_axis("threadIdx.x")
_, h, w, c = In_grad.op.axis
fused_hwc = s[In_grad].fuse(h, w, c)
The computation schedule for depthwise_conv2d backward
wrt weight with layout nhwc.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(Weight_grad):
- block_x = tvm.thread_axis("blockIdx.x")
- thread_y = tvm.thread_axis("threadIdx.y")
- thread_x = tvm.thread_axis("threadIdx.x")
+ block_x = te.thread_axis("blockIdx.x")
+ thread_y = te.thread_axis("threadIdx.y")
+ thread_x = te.thread_axis("threadIdx.x")
db, dh, dw = Weight_grad.op.reduce_axis
# pylint: disable=invalid-name,unused-variable,unused-argument,no-else-return
"""conv2d schedule on ARM Mali GPU"""
import tvm
+from tvm import te
from tvm import relay
from tvm import autotvm
from tvm.autotvm.task.space import get_factors
cfg: ConfigEntity
The config for this template
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
4-D with shape [num_filter, in_channel, filter_height, filter_width] or
pre-packed 5-D with shape [num_filter_chunk, in_channel, filter_height,
filter_width, num_filter_block]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
return conv2d_spatial_pack_nchw(cfg, data, kernel, strides, padding,
s: Schedule
The computation schedule for conv2d
"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
# schedule conv2d
kernel = kernel_vec.op.input_tensors[0]
else:
kernel = kernel_vec
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
_schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec)
BW, TW, VW = cfg["tile_ow"].size
# schedule padding
- if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
+ if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag:
data_pad = data
s[data_pad].compute_inline()
# schedule data packing
- if isinstance(data_vec.op, tvm.tensor.ComputeOp) and data_vec.op.name == 'data_vec_undilated':
+ if isinstance(data_vec.op, tvm.te.ComputeOp) and data_vec.op.name == 'data_vec_undilated':
_, h, w, ci, _, _, vh, vw = s[data_vec].op.axis
else:
_, h, w, ci, vh, vw = s[data_vec].op.axis
if vw.dom.extent.value < max_unroll:
s[data_vec].unroll(vw)
- if isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and kernel_vec.name == 'kernel_vec':
+ if isinstance(kernel_vec.op, tvm.te.ComputeOp) and kernel_vec.name == 'kernel_vec':
if autotvm.GLOBAL_SCOPE.in_tuning:
# kernel packing will be pre-computed during compilation, so we skip
# this part to make tuning records correct
fused = s[kernel_vec].fuse(co, ci, kh, kw, vc)
fused, vec = s[kernel_vec].split(fused, VC)
bb, tt = s[kernel_vec].split(fused, max_threads)
- s[kernel_vec].bind(bb, tvm.thread_axis("blockIdx.x"))
- s[kernel_vec].bind(tt, tvm.thread_axis("threadIdx.x"))
+ s[kernel_vec].bind(bb, te.thread_axis("blockIdx.x"))
+ s[kernel_vec].bind(tt, te.thread_axis("threadIdx.x"))
if VC in vec_size:
s[kernel_vec].vectorize(vec)
@autotvm.register_topi_schedule("conv2d_nchw_winograd.mali")
def schedule_conv2d_nchw_winograd(cfg, outs):
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if 'winograd_conv2d_output' in op.tag:
assert CO % bna == 0 and P_round % bnb == 0
# pack input tile
- input_tile = tvm.compute((CI, P_round // bnb, alpha, alpha, bnb), lambda ci, b, eps, nu, bb: \
- tvm.if_then_else(
- b * bnb + bb < P,
- data_pad[(b*bnb+bb) // (nH*nW)][ci][(b*bnb+bb) // nW % nH * m + eps]
- [(b*bnb+bb) % nW * m + nu], tvm.const(0, data_pad.dtype)), name='d')
+ input_tile = te.compute(
+ (CI, P_round // bnb, alpha, alpha, bnb), lambda ci, b, eps, nu, bb: \
+ tvm.tir.if_then_else(
+ b * bnb + bb < P,
+ data_pad[(b*bnb+bb) // (nH*nW)][ci][(b*bnb+bb) // nW % nH * m + eps]
+ [(b*bnb+bb) % nW * m + nu], tvm.tir.const(0, data_pad.dtype)), name='d')
# transform kernel
if pre_computed:
U = kernel
else:
- r_kh = tvm.reduce_axis((0, KH), 'r_kh')
- r_kw = tvm.reduce_axis((0, KW), 'r_kw')
- U = tvm.compute((alpha, alpha, CO // bna, CI, bna), lambda eps, nu, co, ci, vco:
- tvm.sum(kernel[co * bna + vco][ci][r_kh][r_kw] * G[eps][r_kh] * G[nu][r_kw],
- axis=[r_kh, r_kw]), name='U')
+ r_kh = te.reduce_axis((0, KH), 'r_kh')
+ r_kw = te.reduce_axis((0, KW), 'r_kw')
+ U = te.compute((alpha, alpha, CO // bna, CI, bna), lambda eps, nu, co, ci, vco:
+ te.sum(kernel[co * bna + vco][ci][r_kh][r_kw] * G[eps][r_kh] * G[nu][r_kw],
+ axis=[r_kh, r_kw]), name='U')
# transform image
- r_a = tvm.reduce_axis((0, alpha), 'r_a')
- r_b = tvm.reduce_axis((0, alpha), 'r_b')
- V = tvm.compute((alpha, alpha, P_round // bnb, CI, bnb), lambda eps, nu, p, ci, vp:
- tvm.sum(input_tile[ci][p][r_a][r_b][vp] * B[r_a][eps] * B[r_b][nu],
- axis=[r_a, r_b]), name='V')
+ r_a = te.reduce_axis((0, alpha), 'r_a')
+ r_b = te.reduce_axis((0, alpha), 'r_b')
+ V = te.compute((alpha, alpha, P_round // bnb, CI, bnb), lambda eps, nu, p, ci, vp:
+ te.sum(input_tile[ci][p][r_a][r_b][vp] * B[r_a][eps] * B[r_b][nu],
+ axis=[r_a, r_b]), name='V')
- idxdiv = tvm.indexdiv
- idxmod = tvm.indexmod
+ idxdiv = tvm.tir.indexdiv
+ idxmod = tvm.tir.indexmod
# batch gemm
- ci = tvm.reduce_axis((0, CI), name='c')
- M = tvm.compute((alpha, alpha, CO, P_round), lambda eps, nu, co, p:
- tvm.sum(U[eps][nu][idxdiv(co, bna)][ci][idxmod(co, bna)] *
- V[eps][nu][idxdiv(p, bnb)][ci][idxmod(p, bnb)], axis=ci), name='M')
+ ci = te.reduce_axis((0, CI), name='c')
+ M = te.compute((alpha, alpha, CO, P_round), lambda eps, nu, co, p:
+ te.sum(U[eps][nu][idxdiv(co, bna)][ci][idxmod(co, bna)] *
+ V[eps][nu][idxdiv(p, bnb)][ci][idxmod(p, bnb)], axis=ci), name='M')
- r_a = tvm.reduce_axis((0, alpha), 'r_a')
- r_b = tvm.reduce_axis((0, alpha), 'r_b')
- Y = tvm.compute((CO, P, m, m), lambda co, p, vh, vw:
- tvm.sum(M[r_a][r_b][co][p] * A[r_a][vh] * A[r_b][vw],
- axis=[r_a, r_b]), name='Y')
+ r_a = te.reduce_axis((0, alpha), 'r_a')
+ r_b = te.reduce_axis((0, alpha), 'r_b')
+ Y = te.compute((CO, P, m, m), lambda co, p, vh, vw:
+ te.sum(M[r_a][r_b][co][p] * A[r_a][vh] * A[r_b][vw],
+ axis=[r_a, r_b]), name='Y')
# unpack output
- output = tvm.compute((N, CO, H, W), lambda n, co, h, w:
- Y[co, n * nH * nW + idxdiv(h, m) * nW + idxdiv(w, m),
- idxmod(h, m), idxmod(w, m)]
- # The following hack term is used to make the padding in batch gemm ("M")
- # effective, otherwise the padding will be eliminated by bound inference.
- # Use `tvm.expr.Mul` instead of `*` to avoid issues in const folding.
- + tvm.expr.Mul(tvm.const(0, out_dtype),
- M[alpha-1][alpha-1][CO-1][P_round-1]),
- name='output', tag='winograd_conv2d_output')
+ output = te.compute((N, CO, H, W), lambda n, co, h, w:
+ Y[co, n * nH * nW + idxdiv(h, m) * nW + idxdiv(w, m),
+ idxmod(h, m), idxmod(w, m)]
+ # The following hack term is used to make the padding in batch gemm ("M")
+ # effective, otherwise the padding will be eliminated by bound inference.
+ # Use `tvm.tir.Mul` instead of `*` to avoid issues in const folding.
+ + tvm.tir.Mul(tvm.tir.const(0, out_dtype),
+ M[alpha-1][alpha-1][CO-1][P_round-1]),
+ name='output', tag='winograd_conv2d_output')
# we have to manually assign effective GFLOP for winograd
cfg.add_flop(2 * N * CO * H * W * KH * KW * CI)
s[data_pad].compute_inline()
# transform kernel
- if isinstance(U.op, tvm.tensor.ComputeOp):
+ if isinstance(U.op, tvm.te.ComputeOp):
kernel, G = s[U].op.input_tensors
s[G].compute_inline()
eps, nu, co, ci, vco, = s[U].op.axis
tile_and_bind(s, U, co, ci, 1, 256)
# dilation
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
# transform image
fused = s[V].fuse(p, ci)
bb, tt = cfg['tile_t1'].apply(s, V, fused)
- s[V].bind(bb, tvm.thread_axis('blockIdx.x'))
- s[V].bind(tt, tvm.thread_axis('threadIdx.x'))
+ s[V].bind(bb, te.thread_axis('blockIdx.x'))
+ s[V].bind(tt, te.thread_axis('threadIdx.x'))
eps, nu, p, ci, vp = s[VL].op.axis
r_a, r_b = s[VL].op.reduce_axis
s[output].unroll(wi)
fused = s[output].fuse(n, co, h, w)
bb, tt = cfg['tile_t2'].apply(s, output, fused)
- s[output].bind(bb, tvm.thread_axis('blockIdx.x'))
- s[output].bind(tt, tvm.thread_axis('threadIdx.x'))
+ s[output].bind(bb, te.thread_axis('blockIdx.x'))
+ s[output].bind(tt, te.thread_axis('threadIdx.x'))
s[Y].compute_at(s[output], tt)
data, kernel = tinfos
out_dtype = out_type.dtype
- idxd = tvm.indexdiv
+ idxd = tvm.tir.indexdiv
if topi_tmpl == "conv2d_nchw_spatial_pack.mali":
assert data_layout == "NCHW" and kernel_layout == "OIHW"
new_attrs['kernel_layout'] = 'OIHW%do' % VC
new_data = data
- new_kernel = tvm.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype)
+ new_kernel = te.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, strides, padding, dilation, out_dtype],
"conv2d_nchw_spatial_pack.mali")
new_attrs['tile_size'] = tile_size
new_data = data
- new_kernel = tvm.placeholder((KH + tile_size - 1,
- KW + tile_size -1,
- idxd(CO, VC), CI, VC),
- kernel.dtype)
+ new_kernel = te.placeholder((KH + tile_size - 1,
+ KW + tile_size -1,
+ idxd(CO, VC), CI, VC),
+ kernel.dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, strides, padding, dilation, out_dtype],
'conv2d_nchw_winograd.mali')
""" tile and bind to GPU threads """
x_factor = x_factor or y_factor
yo, xo, yi, xi = s[tensor].tile(y, x, y_factor, x_factor)
- s[tensor].bind(xo, tvm.thread_axis("blockIdx.x"))
- s[tensor].bind(xi, tvm.thread_axis("threadIdx.x"))
- s[tensor].bind(yo, tvm.thread_axis("blockIdx.y"))
- s[tensor].bind(yi, tvm.thread_axis("threadIdx.y"))
+ s[tensor].bind(xo, te.thread_axis("blockIdx.x"))
+ s[tensor].bind(xi, te.thread_axis("threadIdx.x"))
+ s[tensor].bind(yo, te.thread_axis("blockIdx.y"))
+ s[tensor].bind(yi, te.thread_axis("threadIdx.y"))
return yo, xo, yi, xi
zo, zi = s[tensor].split(z, z_factor)
yo, yi = s[tensor].split(y, y_factor)
xo, xi = s[tensor].split(x, x_factor)
- s[tensor].bind(zo, tvm.thread_axis("blockIdx.z"))
- s[tensor].bind(zi, tvm.thread_axis("threadIdx.z"))
- s[tensor].bind(yo, tvm.thread_axis("blockIdx.y"))
- s[tensor].bind(yi, tvm.thread_axis("threadIdx.y"))
- s[tensor].bind(xo, tvm.thread_axis("blockIdx.x"))
- s[tensor].bind(xi, tvm.thread_axis("threadIdx.x"))
+ s[tensor].bind(zo, te.thread_axis("blockIdx.z"))
+ s[tensor].bind(zi, te.thread_axis("threadIdx.z"))
+ s[tensor].bind(yo, te.thread_axis("blockIdx.y"))
+ s[tensor].bind(yi, te.thread_axis("threadIdx.y"))
+ s[tensor].bind(xo, te.thread_axis("blockIdx.x"))
+ s[tensor].bind(xi, te.thread_axis("threadIdx.x"))
s[tensor].reorder(zo, yo, xo, zi, yi, xi)
return zo, yo, xo, zi, yi, xi
# under the License.
# pylint: disable=invalid-name,unused-variable
"""dense schedule on ARM Mali GPU"""
-
-from __future__ import absolute_import as _abs
-
-import tvm
+from tvm import te
from tvm import autotvm
from .. import nn
s: Schedule
The computation schedule for dense.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'dense':
by, ty, yi = cfg['tile_y'].apply(s, output, y)
bx, tx, xi = cfg['tile_x'].apply(s, output, x)
- s[output].bind(by, tvm.thread_axis('blockIdx.y'))
- s[output].bind(bx, tvm.thread_axis('blockIdx.x'))
- s[output].bind(ty, tvm.thread_axis('threadIdx.y'))
- s[output].bind(tx, tvm.thread_axis('threadIdx.x'))
+ s[output].bind(by, te.thread_axis('blockIdx.y'))
+ s[output].bind(bx, te.thread_axis('blockIdx.x'))
+ s[output].bind(ty, te.thread_axis('threadIdx.y'))
+ s[output].bind(tx, te.thread_axis('threadIdx.x'))
if cfg['tile_y'].size[-1] < max_unroll:
s[output].unroll(yi)
axis = axis or s[tensor].op.axis
fused = s[tensor].fuse(*axis)
bx, tx = s[tensor].split(fused, num_thread)
- s[tensor].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[tensor].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[tensor].bind(bx, te.thread_axis("blockIdx.x"))
+ s[tensor].bind(tx, te.thread_axis("threadIdx.x"))
return bx, tx
"""depthwise_conv2d schedule on ARM Mali GPU"""
import tvm
+from tvm import te
from tvm import autotvm
from .. import nn
s: Schedule
The computation schedule for depthwise_conv2d nchw.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(pad_data, kernel, conv):
"""schedule depthwise_conv2d"""
tile_and_bind3d(s, pad_data, c, y, x, cfg["tile_c"].size[1], 1, 1)
# schedule dilation
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
# schedule conv
bx, tx, xi = cfg['tile_x'].apply(s, output, x)
bc = s[output].fuse(n, bc)
- s[output].bind(bc, tvm.thread_axis("blockIdx.z"))
- s[output].bind(tc, tvm.thread_axis("threadIdx.z"))
- s[output].bind(by, tvm.thread_axis("blockIdx.y"))
- s[output].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[output].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[output].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(bc, te.thread_axis("blockIdx.z"))
+ s[output].bind(tc, te.thread_axis("threadIdx.z"))
+ s[output].bind(by, te.thread_axis("blockIdx.y"))
+ s[output].bind(ty, te.thread_axis("threadIdx.y"))
+ s[output].bind(bx, te.thread_axis("blockIdx.x"))
+ s[output].bind(tx, te.thread_axis("threadIdx.x"))
di, dj = s[OL].op.reduce_axis
s[OL].unroll(di)
zo, zi = s[tensor].split(z, z_factor)
yo, yi = s[tensor].split(y, y_factor)
xo, xi = s[tensor].split(x, x_factor)
- s[tensor].bind(zo, tvm.thread_axis("blockIdx.z"))
- s[tensor].bind(zi, tvm.thread_axis("threadIdx.z"))
- s[tensor].bind(yo, tvm.thread_axis("blockIdx.y"))
- s[tensor].bind(yi, tvm.thread_axis("threadIdx.y"))
- s[tensor].bind(xo, tvm.thread_axis("blockIdx.x"))
- s[tensor].bind(xi, tvm.thread_axis("threadIdx.x"))
+ s[tensor].bind(zo, te.thread_axis("blockIdx.z"))
+ s[tensor].bind(zi, te.thread_axis("threadIdx.z"))
+ s[tensor].bind(yo, te.thread_axis("blockIdx.y"))
+ s[tensor].bind(yi, te.thread_axis("threadIdx.y"))
+ s[tensor].bind(xo, te.thread_axis("blockIdx.x"))
+ s[tensor].bind(xi, te.thread_axis("threadIdx.x"))
return zo, zi, yo, yi, xo, xi
# under the License.
"""Elementwise operators"""
# pylint: disable=redefined-builtin
-from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from . import tag
from . import cpp
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def identity(x):
"""Take identity of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
# pylint: disable=unnecessary-lambda
- return tvm.compute(x.shape, lambda *i: x(*i))
+ return te.compute(x.shape, lambda *i: x(*i))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def negative(x):
"""Take negation of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
# pylint: disable=unnecessary-lambda
- return tvm.compute(x.shape, lambda *i: -x(*i))
+ return te.compute(x.shape, lambda *i: -x(*i))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def exp(x):
"""Take exponential of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.exp(x(*i)))
+ return te.compute(x.shape, lambda *i: te.exp(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def erf(x):
"""Take gauss error function of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.erf(x(*i)))
+ return te.compute(x.shape, lambda *i: te.erf(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def tanh(x):
"""Take hyperbolic tanh of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.tanh(x(*i)))
+ return te.compute(x.shape, lambda *i: te.tanh(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def cos(x):
"""Take cos of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.cos(x(*i)))
+ return te.compute(x.shape, lambda *i: te.cos(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def sin(x):
"""Take sin of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.sin(x(*i)))
+ return te.compute(x.shape, lambda *i: te.sin(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def atan(x):
"""Take atan of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.atan(x(*i)))
+ return te.compute(x.shape, lambda *i: te.atan(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def floor(x):
"""Take floor of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.floor(x(*i)))
+ return te.compute(x.shape, lambda *i: te.floor(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def ceil(x):
"""Take ceil of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.ceil(x(*i)))
+ return te.compute(x.shape, lambda *i: te.ceil(x(*i)))
def sign(x):
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
return cpp.sign(x)
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def trunc(x):
"""Take truncated value of the input of x, element-wise.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.trunc(x(*i)))
+ return te.compute(x.shape, lambda *i: te.trunc(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def abs(x):
"""Take absolute value of the input of x, element-wise.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.abs(x(*i)))
+ return te.compute(x.shape, lambda *i: te.abs(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def isnan(x):
"""Check if value of x is NaN, element-wise.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.isnan(x(*i)))
+ return te.compute(x.shape, lambda *i: te.isnan(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def round(x):
"""Round elements of x to nearest integer.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.round(x(*i)))
+ return te.compute(x.shape, lambda *i: te.round(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def log(x):
"""Take logarithm of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.log(x(*i)))
+ return te.compute(x.shape, lambda *i: te.log(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def sqrt(x):
"""Take square root of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.sqrt(x(*i)))
+ return te.compute(x.shape, lambda *i: te.sqrt(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def rsqrt(x):
"""Take inverse square root of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.rsqrt(x(*i)))
+ return te.compute(x.shape, lambda *i: te.rsqrt(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def sigmoid(x):
"""Take sigmoid tanh of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.sigmoid(x(*i)))
+ return te.compute(x.shape, lambda *i: te.sigmoid(x(*i)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def left_shift(x, n):
"""Take n bits left shift of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
n : int
Number of bits.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: x(*i) << n)
+ return te.compute(x.shape, lambda *i: x(*i) << n)
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def right_shift(x, n):
"""Take n bits right shift of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
n : int
Number of bits.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: x(*i) >> n)
+ return te.compute(x.shape, lambda *i: x(*i) >> n)
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def clip(x, a_min, a_max):
"""Clip (limit) the values in an array. Given an interval, values
outside the interval are clipped to the interval edges.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
a_min : int or float
Minimum value.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
def _compute(*indices):
value = x(*indices)
- const_min = tvm.const(a_min, value.dtype)
- const_max = tvm.const(a_max, value.dtype)
- return tvm.max(tvm.min(value, const_max), const_min)
- return tvm.compute(x.shape, _compute)
+ const_min = tvm.tir.const(a_min, value.dtype)
+ const_max = tvm.tir.const(a_max, value.dtype)
+ return tvm.te.max(tvm.te.min(value, const_max), const_min)
+ return te.compute(x.shape, _compute)
def cast(x, dtype):
Parameters
----------
- x : tvm.Tensor or Expr
+ x : tvm.te.Tensor or Expr
Input argument.
dtype : str
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- if isinstance(x, tvm.tensor.Tensor):
- return tvm.compute(
+ if isinstance(x, te.tensor.Tensor):
+ return te.compute(
x.shape, lambda *i: x(*i).astype(dtype), tag=tag.ELEMWISE)
# pylint: disable=import-outside-toplevel
from tvm.tir import _ffi_api
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
dtype : str
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
return cpp.reinterpret(x, dtype)
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
return cpp.fast_exp(x, x.dtype, tag.ELEMWISE)
# under the License.
"""Binary Neural Network (BNN) Operators"""
# pylint: disable=invalid-name
-from __future__ import absolute_import as _abs
-import tvm
+from tvm import te
from ..util import get_const_tuple
def batch_matmul(x, y):
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
3-D with shape [batch, M, K]
- y : tvm.Tensor
+ y : tvm.te.Tensor
3-D with shape [batch, N, K]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
3-D with shape [batch, M, N]
"""
assert len(x.shape) == 3 and len(y.shape) == 3, "only support 3-dim batch_matmul"
assert x_shape[2] == y_shape[2], "shapes of x and y is inconsistant"
batch, M, K = x.shape
N = y.shape[1]
- k = tvm.reduce_axis((0, K), name='k')
- return tvm.compute((batch, M, N),
- lambda b, i, j: tvm.sum(x[b, i, k] * y[b, j, k], axis=k),
- tag='batch_matmul')
+ k = te.reduce_axis((0, K), name='k')
+ return te.compute((batch, M, N),
+ lambda b, i, j: te.sum(x[b, i, k] * y[b, j, k], axis=k),
+ tag='batch_matmul')
# pylint: disable=invalid-name, too-many-locals, too-many-arguments
# pylint: disable=unused-argument, redefined-builtin
"""Bitserial Conv2D operators"""
-from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from .pad import pad
from .util import get_pad_tuple
from .bitserial_util import bitpack
Parameters
----------
- input : tvm.Tensor
+ input : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- filter : tvm.Tensor
+ filter : tvm.te.Tensor
4-D with shape [num_filter, in_channel, filter_height, filter_width]
stride : int or a list/tuple of two ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
assert isinstance(stride, int) or len(stride) == 2
out_height = (in_height - kernel_h + TPAD + DPAD) // stride_h + 1
out_width = (in_width - kernel_w + LPAD + RPAD) // stride_w + 1
- rc = tvm.reduce_axis((0, in_channel), name='rc')
- ry = tvm.reduce_axis((0, kernel_h), name='ry')
- rx = tvm.reduce_axis((0, kernel_w), name='rx')
- b1 = tvm.reduce_axis((0, activation_bits), name='b1')
- b2 = tvm.reduce_axis((0, weight_bits), name='b2')
+ rc = te.reduce_axis((0, in_channel), name='rc')
+ ry = te.reduce_axis((0, kernel_h), name='ry')
+ rx = te.reduce_axis((0, kernel_w), name='rx')
+ b1 = te.reduce_axis((0, activation_bits), name='b1')
+ b2 = te.reduce_axis((0, weight_bits), name='b2')
if unipolar:
def _conv(nn, ff, yy, xx):
b1b2 = (b1+b2).astype(out_dtype)
- return tvm.sum(
- ((tvm.popcount(PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] &
- Filter_q[ff, rc, ry, rx, b2]) -
- tvm.popcount(PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] &
- ~Filter_q[ff, rc, ry, rx, b2]))
+ return te.sum(
+ ((tvm.tir.popcount(PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] &
+ Filter_q[ff, rc, ry, rx, b2]) -
+ tvm.tir.popcount(PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] &
+ ~Filter_q[ff, rc, ry, rx, b2]))
<< (b1b2)).astype(out_dtype),
axis=[rc, ry, rx, b2, b1]).astype(out_dtype)
else:
def _conv(nn, ff, yy, xx):
b1b2 = (b1+b2).astype(out_dtype)
- return tvm.sum((tvm.popcount(
+ return te.sum((tvm.tir.popcount(
PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] &
Filter_q[ff, rc, ry, rx, b2])<< (b1b2)).astype(out_dtype),
- axis=[rc, ry, rx, b2, b1]).astype(out_dtype)
+ axis=[rc, ry, rx, b2, b1]).astype(out_dtype)
- return tvm.compute((batch, out_channel, out_height, out_width), _conv,
- name="Conv2dOutput", tag="bitserial_conv2d_nchw")
+ return te.compute((batch, out_channel, out_height, out_width), _conv,
+ name="Conv2dOutput", tag="bitserial_conv2d_nchw")
def bitserial_conv2d_nhwc(data, kernel, stride, padding, activation_bits, weight_bits,
pack_dtype='uint32', out_dtype='int16', unipolar=True):
Parameters
----------
- input : tvm.Tensor
+ input : tvm.te.Tensor
4-D with shape [batch, in_height, in_width, in_channel]
- filter : tvm.Tensor
+ filter : tvm.te.Tensor
4-D with shape [filter_height, filter_width, in_channel, num_filter]
stride : int or a list/tuple of two ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [batch, out_height, out_width, out_channel]
"""
assert isinstance(stride, int) or len(stride) == 2
out_width = (in_width - kernel_w + LPAD + RPAD) // stride_w + 1
PadInput_q = pad(Input_q, pad_before, pad_after, name="PaddedInput")
- rc = tvm.reduce_axis((0, in_channel_q), name='rc')
- ry = tvm.reduce_axis((0, kernel_h), name='ry')
- rx = tvm.reduce_axis((0, kernel_w), name='rx')
- b1 = tvm.reduce_axis((0, activation_bits), name='b1')
- b2 = tvm.reduce_axis((0, weight_bits), name='b2')
+ rc = te.reduce_axis((0, in_channel_q), name='rc')
+ ry = te.reduce_axis((0, kernel_h), name='ry')
+ rx = te.reduce_axis((0, kernel_w), name='rx')
+ b1 = te.reduce_axis((0, activation_bits), name='b1')
+ b2 = te.reduce_axis((0, weight_bits), name='b2')
if unipolar:
def _conv(nn, yy, xx, ff):
b1b2 = (b1+b2).astype(out_dtype)
- return tvm.sum(
- ((tvm.popcount(PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] &
- Filter_q[ry, rx, rc, ff, b2]) -
- tvm.popcount(PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] &
- ~Filter_q[ry, rx, rc, ff, b2]))
+ return te.sum(
+ ((tvm.tir.popcount(PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] &
+ Filter_q[ry, rx, rc, ff, b2]) -
+ tvm.tir.popcount(PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] &
+ ~Filter_q[ry, rx, rc, ff, b2]))
<< b1b2).astype(out_dtype),
axis=[rc, ry, rx, b2, b1])
else:
def _conv(nn, yy, xx, ff):
b1b2 = (b1+b2).astype(out_dtype)
- return tvm.sum((tvm.popcount(
+ return te.sum((tvm.tir.popcount(
PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] &
Filter_q[ry, rx, rc, ff, b2]) << b1b2).astype(out_dtype),
- axis=[rc, ry, rx, b2, b1])
+ axis=[rc, ry, rx, b2, b1])
- conv = tvm.compute((batch, out_height, out_width, out_channel), _conv,
- name="Conv2dOutput", tag="bitserial_conv2d_nhwc")
+ conv = te.compute((batch, out_height, out_width, out_channel), _conv,
+ name="Conv2dOutput", tag="bitserial_conv2d_nhwc")
return conv
"""Bitserial Dense operator."""
from __future__ import absolute_import
import tvm
+from tvm import te
from topi.util import get_const_tuple
from .bitserial_util import bitpack
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
2-D with shape [batch, in_dim]
- weight : tvm.Tensor
+ weight : tvm.te.Tensor
2-D with shape [out_dim, in_dim] or
3-D with shape [out_dim, weight_bits, in_dim]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D with shape [batch, out_dim]
"""
data_packed = bitpack(data, data_bits, pack_axis=1, bit_axis=1, pack_type=pack_dtype)
X, WB, _ = get_const_tuple(weight_packed.shape)
oshape = (Y, X)
- k = tvm.reduce_axis((0, K), name='k')
- db = tvm.reduce_axis((0, DB), name='db')
- wb = tvm.reduce_axis((0, WB), name='wb')
+ k = te.reduce_axis((0, K), name='k')
+ db = te.reduce_axis((0, DB), name='db')
+ wb = te.reduce_axis((0, WB), name='wb')
- matmul_unipolar = tvm.compute(oshape, lambda i, j: tvm.sum(
- (tvm.popcount(weight_packed[j, wb, k] & data_packed[i, db, k]) -
- tvm.popcount(~weight_packed[j, wb, k] & data_packed[i, db, k])).astype(out_dtype)
+ matmul_unipolar = te.compute(oshape, lambda i, j: te.sum(
+ (tvm.tir.popcount(weight_packed[j, wb, k] & data_packed[i, db, k]) -
+ tvm.tir.popcount(~weight_packed[j, wb, k] & data_packed[i, db, k])).astype(out_dtype)
<< (db+wb).astype(out_dtype), axis=[wb, db, k]),
- tag='bitserial_dense_unipolar')
+ tag='bitserial_dense_unipolar')
- matmul = tvm.compute(oshape, lambda i, j: tvm.sum(
- tvm.popcount(weight_packed[j, wb, k] & data_packed[i, db, k]).astype(out_dtype)
+ matmul = te.compute(oshape, lambda i, j: te.sum(
+ tvm.tir.popcount(weight_packed[j, wb, k] & data_packed[i, db, k]).astype(out_dtype)
<< (db+wb).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense')
"""Utility functions for bitserial operators"""
import numpy as np
import tvm
+from tvm import te
from topi.transform import concatenate
from ..util import get_const_int
pack_axis += 1
def _bitpack(*indices):
- packed_data = [tvm.const(0, pack_type)] * bits
+ packed_data = [tvm.tir.const(0, pack_type)] * bits
for k in range(data_width):
# Translate indices for packed data back to original
idx = [0] * n
element = data(*idx)
for b in range(bits):
- extracted_bit = ((element & tvm.const(masks[b], "int32")) >> b).astype(pack_type)
+ extracted_bit = (
+ (element & tvm.tir.const(masks[b], "int32")) >> b).astype(pack_type)
packed_data[b] = (packed_data[b] | extracted_bit)
if k < data_width - 1:
packed_data[b] = packed_data[b] << 1
return tuple(packed_data)
return tuple(packed_data)
- output_tuple = tvm.compute(bitserial_oshape, _bitpack, name=name, tag='bitpack')
+ output_tuple = te.compute(bitserial_oshape, _bitpack, name=name, tag='bitpack')
if bits > 1:
return concatenate(output_tuple, axis=bit_axis)
"""Binary Neural Network (BNN) Operators"""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from .. import tag
from ..util import simplify, get_const_int
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
n-D input, can be any layout.
axis : None or int
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
n-D, the same layout as input, dtype is uint32.
"""
ishape = data.shape
assert get_const_int(ishape[axis]) % 32 == 0
n = len(ishape)
oshape = tuple(simplify(ishape[i] // 32) if i == axis \
- else ishape[i] for i in range(n))
+ else ishape[i] for i in range(n))
def _binarize_pack(*indices):
start_idx = [indices[i] * 32 if i == axis else indices[i] for i in range(n)]
- packed = tvm.const(0, 'uint32')
+ packed = tvm.tir.const(0, 'uint32')
for j in range(32):
idx = [start_idx[i] + j if i == axis else start_idx[i] for i in range(n)]
sign = (data(*idx) >= 0).astype("uint32")
packed = packed << 1
raise RuntimeError("not resach")
- return tvm.compute(oshape, _binarize_pack, name=name, tag='binarize_pack')
+ return te.compute(oshape, _binarize_pack, name=name, tag='binarize_pack')
def binary_dense(data, weight):
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
2-D with shape [batch, in_dim], dtype is uint32.
- weight : tvm.Tensor
+ weight : tvm.te.Tensor
2-D with shape [out_dim, in_dim], dtype is uint32.
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D with shape [batch, out_dim], dtype is float32.
"""
assert data.dtype == 'uint32' and weight.dtype == 'uint32', \
"only support 2-dim binary dense"
batch, in_dim = data.shape
out_dim, _ = weight.shape
- k = tvm.reduce_axis((0, in_dim), name='k')
- matmul = tvm.compute((batch, out_dim), lambda i, j: \
- tvm.sum(tvm.popcount(data[i, k] ^ weight[j, k]), axis=k), \
- tag='binary_dense')
+ k = te.reduce_axis((0, in_dim), name='k')
+ matmul = te.compute((batch, out_dim), lambda i, j: \
+ te.sum(tvm.tir.popcount(data[i, k] ^ weight[j, k]), axis=k), \
+ tag='binary_dense')
- return tvm.compute((batch, out_dim), lambda i, j: \
- 32 * in_dim - 2. * matmul(i, j), \
- tag=tag.ELEMWISE)
+ return te.compute((batch, out_dim), lambda i, j: \
+ 32 * in_dim - 2. * matmul(i, j), \
+ tag=tag.ELEMWISE)
# under the License.
# pylint: disable=invalid-name, unused-variable, unused-argument
"""1D convolution operators."""
-from __future__ import absolute_import as _abs
-import tvm
+from tvm import te
from .pad import pad
from ..util import simplify
from .util import get_pad_tuple1d
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
3-D input shape [batch, in_channel, in_width] for layout == 'NCW'
and [batch, in_width, in_channel] for layout == 'NWC'
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
3-D kernel with shape [num_filter, in_channel, filter_size] for layout == 'NCW'
and [filter_size, in_channel, num_filter] for layout == 'NWC'
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
3-D with shape [batch, in_channel, in_width]
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
3-D with shape [num_filter, in_channel, filter_size]
strides : int or tuple
temp = pad(data, pad_before, pad_after, name='pad_temp')
# Compute graph
- rc = tvm.reduce_axis((0, in_channels), name='rc')
- rw = tvm.reduce_axis((0, kernel_size), name='rw')
+ rc = te.reduce_axis((0, in_channels), name='rc')
+ rw = te.reduce_axis((0, kernel_size), name='rw')
- return tvm.compute(
+ return te.compute(
(batch, out_channels, out_width),
- lambda b, c, w: tvm.sum(
+ lambda b, c, w: te.sum(
temp[b, rc, w * strides + rw * dilation].astype(out_dtype)
* kernel[c, rc, rw].astype(out_dtype),
axis=[rc, rw]),
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
3-D with shape [batch, in_width, in_channel]
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
3-D with shape [filter_size, in_channel, num_filter]
strides : int or tuple
temp = pad(data, pad_before, pad_after, name='pad_temp')
# Compute graph
- rc = tvm.reduce_axis((0, in_channels), name='rc')
- rw = tvm.reduce_axis((0, kernel_size), name='rw')
+ rc = te.reduce_axis((0, in_channels), name='rc')
+ rw = te.reduce_axis((0, kernel_size), name='rw')
- return tvm.compute(
+ return te.compute(
(batch, out_width, out_channels),
- lambda b, w, c: tvm.sum(
+ lambda b, w, c: te.sum(
temp[b, w * strides + rw * dilation, rc].astype(out_dtype)
* kernel[rw, rc, c].astype(out_dtype),
axis=[rc, rw]),
# under the License.
# pylint: disable=invalid-name, unused-variable, unused-argument
"""Transposed 1D convolution operators (sometimes called Deconvolution)."""
-from __future__ import absolute_import as _abs
-import tvm
+from tvm import te
from .dilate import dilate
from .pad import pad
from ..util import simplify
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
3-D with shape [batch, in_channel, in_width]
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
3-D with shape [in_channel, num_filter, filter_width]
stride : ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
3-D with shape [batch, out_channel, out_width]
"""
data = pad(data, [0, 0, pad_left], [0, 0, pad_right], name='data_pad')
# transpose kernel, switch kernel layout to IOW
- kernel = tvm.compute((channels_out, channels_in, kernel_width), \
- lambda o, i, w: kernel[i][o][kernel_width-1-w],\
- name='kernel')
+ kernel = te.compute((channels_out, channels_in, kernel_width), \
+ lambda o, i, w: kernel[i][o][kernel_width-1-w],\
+ name='kernel')
# convolution
_, _, data_width = data.shape
out_w = simplify(data_width - kernel_width + 1)
- dc = tvm.reduce_axis((0, channels_in), name='dc')
- dw = tvm.reduce_axis((0, kernel_width), name='dw')
- output = tvm.compute(
+ dc = te.reduce_axis((0, channels_in), name='dc')
+ dw = te.reduce_axis((0, kernel_width), name='dw')
+ output = te.compute(
(batch, channels_out, out_w),
- lambda b, c, w: tvm.sum(
+ lambda b, c, w: te.sum(
data[b, dc, w+dw].astype(out_dtype) *
kernel[c, dc, dw].astype(out_dtype),
axis=[dc, dw]), tag="conv1d_transpose_ncw")
from __future__ import absolute_import as _abs
from collections import namedtuple
import tvm
+from tvm import te
from .pad import pad
from .util import get_pad_tuple
Parameters
----------
- input : tvm.Tensor
+ input : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- filter : tvm.Tensor
+ filter : tvm.te.Tensor
4-D with shape [num_filter, in_channel, filter_height, filter_width]
strides : int or a list/tuple of two ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
# search platform specific declaration first
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
4-D with shape [num_filter, in_channel, filter_height, filter_width]
stride : int or a list/tuple of two ints
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
if out_dtype is None:
pad_before = [0, 0, pad_top, pad_left]
pad_after = [0, 0, pad_down, pad_right]
temp = pad(Input, pad_before, pad_after, name="pad_temp")
- rc = tvm.reduce_axis((0, in_channel), name='rc')
- ry = tvm.reduce_axis((0, kernel_h), name='ry')
- rx = tvm.reduce_axis((0, kernel_w), name='rx')
- return tvm.compute(
+ rc = te.reduce_axis((0, in_channel), name='rc')
+ ry = te.reduce_axis((0, kernel_h), name='ry')
+ rx = te.reduce_axis((0, kernel_w), name='rx')
+ return te.compute(
(batch, out_channel, out_height, out_width),
- lambda nn, ff, yy, xx: tvm.sum(
+ lambda nn, ff, yy, xx: te.sum(
temp[nn, rc, yy * stride_h + ry * dilation_h,
xx * stride_w + rx * dilation_w].astype(out_dtype) *
Filter[ff, rc, ry, rx].astype(out_dtype),
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
4-D with shape [in_height, in_width, in_channel, batch]
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
4-D with shape [filter_height, filter_width, in_channel, num_filter]
stride : int or a list/tuple of two ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [out_height, out_width, out_channel, batch]
"""
if out_dtype is None:
pad_before = [pad_top, pad_left, 0, 0]
pad_after = [pad_down, pad_right, 0, 0]
PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput")
- rc = tvm.reduce_axis((0, in_channel), name='rc')
- ry = tvm.reduce_axis((0, kernel_h), name='ry')
- rx = tvm.reduce_axis((0, kernel_w), name='rx')
- Output = tvm.compute(
+ rc = te.reduce_axis((0, in_channel), name='rc')
+ ry = te.reduce_axis((0, kernel_h), name='ry')
+ rx = te.reduce_axis((0, kernel_w), name='rx')
+ Output = te.compute(
(out_height, out_width, out_channel, batch),
- lambda yy, xx, ff, nn: tvm.sum(
+ lambda yy, xx, ff, nn: te.sum(
PaddedInput[yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w,
rc, nn].astype(out_dtype) *
Filter[ry, rx, rc, ff].astype(out_dtype), axis=[ry, rx, rc]),
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
4-D with shape [batch, in_height, in_width, in_channel]
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
4-D with shape [filter_height, filter_width, in_channel, num_filter]
stride : int or a list/tuple of two ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [batch, out_height, out_width, out_channel]
"""
assert isinstance(stride, int) or len(stride) == 2
pad_before = [0, pad_top, pad_left, 0]
pad_after = [0, pad_down, pad_right, 0]
PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput")
- rc = tvm.reduce_axis((0, in_channel), name='rc')
- ry = tvm.reduce_axis((0, kernel_h), name='ry')
- rx = tvm.reduce_axis((0, kernel_w), name='rx')
- Output = tvm.compute(
+ rc = te.reduce_axis((0, in_channel), name='rc')
+ ry = te.reduce_axis((0, kernel_h), name='ry')
+ rx = te.reduce_axis((0, kernel_w), name='rx')
+ Output = te.compute(
(batch, out_height, out_width, out_channel),
- lambda nn, yy, xx, ff: tvm.sum(
+ lambda nn, yy, xx, ff: te.sum(
PaddedInput[nn, yy * stride_h + ry * dilation_h,
xx * stride_w + rx * dilation_w, rc].astype(out_dtype) *
Filter[ry, rx, rc, ff].astype(out_dtype), axis=[ry, rx, rc]),
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block]
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
6-D with shape
[num_filter_chunk, in_channel_chunk, filter_height, filter_width,
in_channel_block, num_filter_block]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
5-D with shape [batch, out_channel_chunk, out_height, out_width, out_channel_block]
"""
else:
data_pad = data
- ic = tvm.reduce_axis((0, in_channel), name='ic')
- kh = tvm.reduce_axis((0, kernel_height), name='kh')
- kw = tvm.reduce_axis((0, kernel_width), name='kw')
-
- idxdiv = tvm.indexdiv
- idxmod = tvm.indexmod
-
- return tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
- tvm.sum(data_pad[n,
- idxdiv(ic, ic_bn),
- oh * HSTR + kh * dilation_h,
- ow * WSTR + kw * dilation_w,
- idxmod(ic, ic_bn)].astype(out_dtype)
- * kernel[oc_chunk,
- idxdiv(ic, ic_bn),
- kh,
- kw,
- idxmod(ic, ic_bn),
- oc_block],
- axis=[ic, kh, kw]),
- name='conv2d_NCHWc', tag="conv2d_NCHWc")
+ ic = te.reduce_axis((0, in_channel), name='ic')
+ kh = te.reduce_axis((0, kernel_height), name='kh')
+ kw = te.reduce_axis((0, kernel_width), name='kw')
+
+ idxdiv = tvm.tir.indexdiv
+ idxmod = tvm.tir.indexmod
+
+ return te.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
+ te.sum(data_pad[n,
+ idxdiv(ic, ic_bn),
+ oh * HSTR + kh * dilation_h,
+ ow * WSTR + kw * dilation_w,
+ idxmod(ic, ic_bn)].astype(out_dtype)
+ * kernel[oc_chunk,
+ idxdiv(ic, ic_bn),
+ kh,
+ kw,
+ idxmod(ic, ic_bn),
+ oc_block],
+ axis=[ic, kh, kw]),
+ name='conv2d_NCHWc', tag="conv2d_NCHWc")
def conv2d_NCHWc_int8(data, kernel, stride, padding, dilation, layout, out_layout,
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block]
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
7-D with shape
[num_filter_chunk, in_channel_chunk, filter_height, filter_width, in_channel_block/4,
num_filter_block, 4]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
5-D with shape [batch, out_channel_chunk, out_height, out_width, out_channel_block]
"""
else:
data_pad = data
- ic = tvm.reduce_axis((0, in_channel), name='ic')
- kh = tvm.reduce_axis((0, kernel_height), name='kh')
- kw = tvm.reduce_axis((0, kernel_width), name='kw')
+ ic = te.reduce_axis((0, in_channel), name='ic')
+ kh = te.reduce_axis((0, kernel_height), name='kh')
+ kw = te.reduce_axis((0, kernel_width), name='kw')
if groups == 1:
n_elems = 4
- ic_outer = tvm.reduce_axis((0, in_channel//ic_bn), name='ic_outer')
- ic_f_inner = tvm.reduce_axis((0, ic_bn//n_elems), name='ic_f_inner')
- ic_s_inner = tvm.reduce_axis((0, n_elems), name='ic_s_inner')
- return tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
- tvm.sum(data_pad[n,
- ic_outer,
- oh * HSTR + kh * dilation_h,
- ow * WSTR + kw * dilation_w,
- ic_f_inner * n_elems + ic_s_inner].astype(out_dtype)
- * kernel[oc_chunk,
- ic_outer,
- kh,
- kw,
- ic_f_inner,
- oc_block,
- ic_s_inner].astype(out_dtype),
- axis=[kh, kw, ic_outer, ic_f_inner, ic_s_inner]),
- name='conv2d_NCHWc_int8', tag="conv2d_NCHWc_int8")
+ ic_outer = te.reduce_axis((0, in_channel//ic_bn), name='ic_outer')
+ ic_f_inner = te.reduce_axis((0, ic_bn//n_elems), name='ic_f_inner')
+ ic_s_inner = te.reduce_axis((0, n_elems), name='ic_s_inner')
+ return te.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
+ te.sum(data_pad[n,
+ ic_outer,
+ oh * HSTR + kh * dilation_h,
+ ow * WSTR + kw * dilation_w,
+ ic_f_inner * n_elems + ic_s_inner].astype(out_dtype)
+ * kernel[oc_chunk,
+ ic_outer,
+ kh,
+ kw,
+ ic_f_inner,
+ oc_block,
+ ic_s_inner].astype(out_dtype),
+ axis=[kh, kw, ic_outer, ic_f_inner, ic_s_inner]),
+ name='conv2d_NCHWc_int8', tag="conv2d_NCHWc_int8")
# for int8 group conv support
n_elems = 4
ic_chunk = in_channel//ic_bn
- ic_outer = tvm.reduce_axis((0, ic_chunk//groups), name='ic_outer')
- ic_f_inner = tvm.reduce_axis((0, ic_bn//n_elems), name='ic_f_inner')
- ic_s_inner = tvm.reduce_axis((0, n_elems), name='ic_s_inner')
+ ic_outer = te.reduce_axis((0, ic_chunk//groups), name='ic_outer')
+ ic_f_inner = te.reduce_axis((0, ic_bn//n_elems), name='ic_f_inner')
+ ic_s_inner = te.reduce_axis((0, n_elems), name='ic_s_inner')
oshape = (n, oc_chunk, out_height, out_width, oc_bn)
- return tvm.compute(oshape, lambda n, occ, oh, ow, oc_block:
- tvm.sum(data_pad[n,
- (occ * oc_bn // (oc_chunk * oc_bn // groups))
- * (ic_chunk // groups) + ic_outer,
- oh * HSTR + kh,
- ow * WSTR + kw,
- ic_f_inner * n_elems + ic_s_inner].astype(out_dtype)
- * kernel[occ,
- ic_outer,
- kh,
- kw,
- ic_f_inner,
- oc_block,
- ic_s_inner].astype(out_dtype),
- axis=[kh, kw, ic_outer, ic_f_inner, ic_s_inner]),
- name='conv2d_NCHWc_int8', tag="conv2d_NCHWc_int8")
+ return te.compute(oshape, lambda n, occ, oh, ow, oc_block:
+ te.sum(data_pad[n,
+ (occ * oc_bn // (oc_chunk * oc_bn // groups))
+ * (ic_chunk // groups) + ic_outer,
+ oh * HSTR + kh,
+ ow * WSTR + kw,
+ ic_f_inner * n_elems + ic_s_inner].astype(out_dtype)
+ * kernel[occ,
+ ic_outer,
+ kh,
+ kw,
+ ic_f_inner,
+ oc_block,
+ ic_s_inner].astype(out_dtype),
+ axis=[kh, kw, ic_outer, ic_f_inner, ic_s_inner]),
+ name='conv2d_NCHWc_int8', tag="conv2d_NCHWc_int8")
def conv2d_winograd_weight_transform(kernel, tile_size):
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [alpha, alpha, CO, CI]
"""
shape = get_const_tuple(kernel.shape)
_, _, G = winograd_transform_matrices(tile_size, K, kernel.dtype)
- r_kh = tvm.reduce_axis((0, K), name='r_kh')
- r_kw = tvm.reduce_axis((0, K), name='r_kw')
- return tvm.compute(shape, lambda eps, nu, co, ci:
- tvm.sum(kernel[co][ci][r_kh][r_kw] *
- G[eps][r_kh] * G[nu][r_kw],
- axis=[r_kh, r_kw]), name='transform_weight')
+ r_kh = te.reduce_axis((0, K), name='r_kh')
+ r_kw = te.reduce_axis((0, K), name='r_kw')
+ return te.compute(shape, lambda eps, nu, co, ci:
+ te.sum(kernel[co][ci][r_kh][r_kw] *
+ G[eps][r_kh] * G[nu][r_kw],
+ axis=[r_kh, r_kw]), name='transform_weight')
def conv2d_winograd_nnpack_weight_transform(kernel, convolution_algorithm, out_dtype):
The convolution algorithm for Winograd NNPACK.
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [alpha, alpha, CO, CI]
"""
# pylint: disable=import-outside-toplevel
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
4-D with shape [num_filter, in_channel // groups, filter_height, filter_width]
stride : int or a list/tuple of two ints
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
if out_dtype is None:
pad_before = [0, 0, pad_top, pad_left]
pad_after = [0, 0, pad_down, pad_right]
temp = pad(Input, pad_before, pad_after, name="pad_temp")
- rc = tvm.reduce_axis((0, in_channel // groups), name='rc')
- ry = tvm.reduce_axis((0, kernel_h), name='ry')
- rx = tvm.reduce_axis((0, kernel_w), name='rx')
- return tvm.compute(
+ rc = te.reduce_axis((0, in_channel // groups), name='rc')
+ ry = te.reduce_axis((0, kernel_h), name='ry')
+ rx = te.reduce_axis((0, kernel_w), name='rx')
+ return te.compute(
(batch, out_channel, out_height, out_width),
- lambda nn, ff, yy, xx: tvm.sum(
+ lambda nn, ff, yy, xx: te.sum(
temp[nn, ff // (num_filter//groups) * (in_channel//groups) + rc,
yy * stride_h + ry * dilation_h,
xx * stride_w + rx * dilation_w].astype(out_dtype) *
Parameters
-----------
- packed_out : tvm.Tensor
+ packed_out : tvm.te.Tensor
The output tensor of conv2d_NCHWc.
out_dtype : str
Returns
-------
- unpacked_out : tvm.Tensor
+ unpacked_out : tvm.te.Tensor
The unpacked output tensor in NCHW layout.
"""
n, oc_chunk, oh, ow, oc_bn = get_const_tuple(packed_out.shape)
- idxmod = tvm.indexmod
- idxdiv = tvm.indexdiv
+ idxmod = tvm.tir.indexmod
+ idxdiv = tvm.tir.indexdiv
oshape = (n, oc_chunk * oc_bn, oh, ow)
unpacked_out = \
- tvm.compute(oshape,
- lambda n, c, h, w:
- packed_out[n, idxdiv(c, oc_bn), h, w, idxmod(c, oc_bn)]
- .astype(out_dtype),
- name='output_unpack',
- tag=tag.INJECTIVE+",unpack_nchwc")
+ te.compute(oshape,
+ lambda n, c, h, w:
+ packed_out[n, idxdiv(c, oc_bn), h, w, idxmod(c, oc_bn)]
+ .astype(out_dtype),
+ name='output_unpack',
+ tag=tag.INJECTIVE+",unpack_nchwc")
return unpacked_out
# under the License.
# pylint: disable=invalid-name, unused-variable, unused-argument
"""Transposed 2D convolution operators (sometimes called Deconvolution)."""
-from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from tvm import relay
from .dilate import dilate
from .pad import pad
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
4-D with shape [in_channel, num_filter, filter_height, filter_width]
strides : tuple of two ints
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
return declaration_conv2d_transpose_impl(Input, Filter, strides, padding, out_dtype)
[0, 0, bpad_bottom, bpad_right], \
name='data_pad')
# transform kernel layout from IOHW to OIHW, and rotate kernel by 180 degrees
- kernel_transform = tvm.compute((out_c, in_c, filter_h, filter_w), \
- lambda o, i, h, w: kernel[i][o][filter_h-1-h][filter_w-1-w], \
- name='kernel_transform')
+ kernel_transform = te.compute((out_c, in_c, filter_h, filter_w), \
+ lambda o, i, h, w: kernel[i][o][filter_h-1-h][filter_w-1-w], \
+ name='kernel_transform')
return data_pad, kernel_transform
out_c = simplify(out_c)
out_h = simplify(in_h - filter_h + 1)
out_w = simplify(in_w - filter_w + 1)
- dc = tvm.reduce_axis((0, in_c), name='dc')
- dh = tvm.reduce_axis((0, filter_h), name='dh')
- dw = tvm.reduce_axis((0, filter_w), name='dw')
+ dc = te.reduce_axis((0, in_c), name='dc')
+ dh = te.reduce_axis((0, filter_h), name='dh')
+ dw = te.reduce_axis((0, filter_w), name='dw')
- Output = tvm.compute(
+ Output = te.compute(
(batch, out_c, out_h, out_w),
- lambda b, c, h, w: tvm.sum(
+ lambda b, c, h, w: te.sum(
data_pad[b, dc, h+dh, w+dw].astype(out_dtype) *
kernel_transform[c, dc, dh, dw].astype(out_dtype),
axis=[dc, dh, dw]), tag="conv2d_transpose_nchw")
# pylint: disable=invalid-name, unused-variable, too-many-locals
# pylint: disable=unused-argument, redefined-builtin, no-else-return
"""Conv3D operators"""
-from __future__ import absolute_import as _abs
-import tvm
+from tvm import te
from .pad import pad
from .util import get_pad_tuple3d
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
5-D with shape [batch, in_channel, in_depth, in_height, in_width]
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width]
stride : int or a list/tuple of three ints
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
5-D with shape [batch, out_channel, out_depth, out_height, out_width]
"""
if out_dtype is None:
pad_before = [0, 0, pad_front, pad_top, pad_left]
pad_after = [0, 0, pad_back, pad_down, pad_right]
temp = pad(Input, pad_before, pad_after, name="pad_temp")
- rc = tvm.reduce_axis((0, in_channel), name='rc')
- rz = tvm.reduce_axis((0, kernel_d), name='rz')
- ry = tvm.reduce_axis((0, kernel_h), name='ry')
- rx = tvm.reduce_axis((0, kernel_w), name='rx')
+ rc = te.reduce_axis((0, in_channel), name='rc')
+ rz = te.reduce_axis((0, kernel_d), name='rz')
+ ry = te.reduce_axis((0, kernel_h), name='ry')
+ rx = te.reduce_axis((0, kernel_w), name='rx')
- return tvm.compute(
+ return te.compute(
(batch, out_channel, out_depth, out_height, out_width),
- lambda nn, ff, zz, yy, xx: tvm.sum(
+ lambda nn, ff, zz, yy, xx: te.sum(
temp[nn, rc, zz * stride_d + rz * dilation_d, yy * stride_h + ry * dilation_h,
xx * stride_w + rx * dilation_w].astype(out_dtype) *
Filter[ff, rc, rz, ry, rx].astype(out_dtype),
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
5-D with shape [batch, in_depth, in_height, in_width, in_channel]
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
5-D with shape [filter_depth, filter_height, filter_width, in_channel, num_filter]
stride : int or a list/tuple of three ints
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
5-D with shape [batch, out_depth, out_height, out_width, out_channel]
"""
assert isinstance(stride, int) or len(stride) == 3
pad_before = [0, pad_front, pad_top, pad_left, 0]
pad_after = [0, pad_back, pad_down, pad_right, 0]
PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput")
- rd = tvm.reduce_axis((0, kernel_d), name='rd')
- rh = tvm.reduce_axis((0, kernel_h), name='rh')
- rw = tvm.reduce_axis((0, kernel_w), name='rw')
- rc = tvm.reduce_axis((0, in_channel), name='rc')
- Output = tvm.compute(
+ rd = te.reduce_axis((0, kernel_d), name='rd')
+ rh = te.reduce_axis((0, kernel_h), name='rh')
+ rw = te.reduce_axis((0, kernel_w), name='rw')
+ rc = te.reduce_axis((0, in_channel), name='rc')
+ Output = te.compute(
(batch, out_depth, out_height, out_width, out_channel),
- lambda nn, dd, hh, ww, cc: tvm.sum(
+ lambda nn, dd, hh, ww, cc: te.sum(
PaddedInput[nn, dd * stride_d + rd * dilation_d, hh * stride_h + rh * dilation_h,
ww * stride_w + rw * dilation_w, rc].astype(out_dtype) *
Filter[rd, rh, rw, rc, cc].astype(out_dtype), axis=[rd, rh, rw, rc]),
# pylint: disable=invalid-name, too-many-locals, too-many-arguments
"""Deformable Conv2D operators"""
import tvm
+from tvm import te
from .util import get_pad_tuple
from ..util import get_const_tuple
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- offset : tvm.Tensor
+ offset : tvm.te.Tensor
4-D with shape [batch, deformable_groups * filter_height * filter_width * 2,
out_height, out_width].
- kernel : tvm.Tensor
+ kernel : tvm.te.Tensor
4-D with shape [num_filter, in_channel, filter_height, filter_width]
strides : int or a list/tuple of two ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
if out_dtype is None:
dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
pad_top, pad_left, _, _ = get_pad_tuple(
padding, (dilated_kernel_h, dilated_kernel_w))
- rc = tvm.reduce_axis((0, in_channel), name='rc')
- ry = tvm.reduce_axis((0, kernel_h), name='ry')
- rx = tvm.reduce_axis((0, kernel_w), name='rx')
+ rc = te.reduce_axis((0, in_channel), name='rc')
+ ry = te.reduce_axis((0, kernel_h), name='ry')
+ rx = te.reduce_axis((0, kernel_w), name='rx')
- zero = tvm.const(0.0, data.dtype)
+ zero = tvm.tir.const(0.0, data.dtype)
def _bilinear(n, c, h, w):
- outside = tvm.any(h < 0, w < 0, h >= in_height, w >= in_width)
+ outside = tvm.tir.any(h < 0, w < 0, h >= in_height, w >= in_width)
val = bilinear_sample_nchw(data, (n, c, h, w), in_height - 1, in_width - 1)
- return tvm.if_then_else(outside, zero, val)
+ return tvm.tir.if_then_else(outside, zero, val)
data_deform = \
- tvm.compute((batch, in_channel, kernel_h, kernel_w, out_height, out_width),
- lambda n, c, kh, kw, y, x:
- _bilinear(n, c,
- y * stride_h - pad_top + kh * dilation_h +
- offset[n, c // ic_per_dgroup * (kernel_w*kernel_h*2) +
- (kh * kernel_w + kw) * 2, y, x],
- x * stride_w - pad_left + kw * dilation_w +
- offset[n, c // ic_per_dgroup * (kernel_w*kernel_h*2) +
- (kh * kernel_w + kw) * 2 + 1, y, x]))
- return tvm.compute(
+ te.compute((batch, in_channel, kernel_h, kernel_w, out_height, out_width),
+ lambda n, c, kh, kw, y, x:
+ _bilinear(n, c,
+ y * stride_h - pad_top + kh * dilation_h +
+ offset[n, c // ic_per_dgroup * (kernel_w*kernel_h*2) +
+ (kh * kernel_w + kw) * 2, y, x],
+ x * stride_w - pad_left + kw * dilation_w +
+ offset[n, c // ic_per_dgroup * (kernel_w*kernel_h*2) +
+ (kh * kernel_w + kw) * 2 + 1, y, x]))
+ return te.compute(
(batch, out_channel, out_height, out_width),
- lambda n, f, y, x: tvm.sum(
+ lambda n, f, y, x: te.sum(
data_deform[n, rc, ry, rx, y, x].astype(out_dtype) *
kernel[f, rc, ry, rx].astype(out_dtype),
axis=[rc, ry, rx]), tag="deformable_conv2d_nchw")
# specific language governing permissions and limitations
# under the License.
"""TVM operator fully connected compute."""
-from __future__ import absolute_import
-import tvm
+from tvm import te
from .. import tag
def dense(data, weight, bias=None, out_dtype=None):
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
2-D with shape [batch, in_dim]
- weight : tvm.Tensor
+ weight : tvm.te.Tensor
2-D with shape [out_dim, in_dim]
- bias : tvm.Tensor, optional
+ bias : tvm.te.Tensor, optional
1-D with shape [out_dim]
out_dtype : str
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D with shape [batch, out_dim]
"""
assert len(data.shape) == 2 and len(weight.shape) == 2, \
out_dtype = data.dtype
batch, in_dim = data.shape
out_dim, _ = weight.shape
- k = tvm.reduce_axis((0, in_dim), name='k')
- matmul = tvm.compute((batch, out_dim), \
- lambda i, j: tvm.sum(data[i, k].astype(out_dtype) * \
- weight[j, k].astype(out_dtype), axis=k), \
- name='T_dense', tag='dense')
+ k = te.reduce_axis((0, in_dim), name='k')
+ matmul = te.compute((batch, out_dim), \
+ lambda i, j: te.sum(data[i, k].astype(out_dtype) * \
+ weight[j, k].astype(out_dtype), axis=k), \
+ name='T_dense', tag='dense')
if bias is not None:
- matmul = tvm.compute((batch, out_dim), \
- lambda i, j: matmul[i, j] + bias[j].astype(out_dtype), \
- tag=tag.BROADCAST)
+ matmul = te.compute((batch, out_dim), \
+ lambda i, j: matmul[i, j] + bias[j].astype(out_dtype), \
+ tag=tag.BROADCAST)
return matmul
"""TVM operator depth_to_space compute."""
from __future__ import absolute_import
import tvm
+from tvm import te
from .. import tag
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D tensor in either NCHW or NHWC layout.
block_size : int
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
Output of shape [N, C / block_size**2, H * block_size, W * block_size]
"""
if layout == 'NCHW':
in_n, in_c, in_h, in_w = data.shape
- channel_factor = tvm.truncdiv(in_c, (block_size * block_size))
+ channel_factor = tvm.tir.truncdiv(in_c, (block_size * block_size))
output_shape = [in_n, channel_factor,
in_h * block_size, in_w * block_size]
elif layout == 'NHWC':
in_n, in_h, in_w, in_c = data.shape
- channel_factor = tvm.truncdiv(in_c, (block_size * block_size))
+ channel_factor = tvm.tir.truncdiv(in_c, (block_size * block_size))
output_shape = [in_n, in_h * block_size,
in_w * block_size, channel_factor]
else:
return n, c, y, x
def _get_pixel(n, c, y, x):
- block_x = tvm.truncdiv(x, block_size)
- block_y = tvm.truncdiv(y, block_size)
- idx_x = tvm.truncmod(x, block_size)
- idx_y = tvm.truncmod(y, block_size)
+ block_x = tvm.tir.truncdiv(x, block_size)
+ block_y = tvm.tir.truncdiv(y, block_size)
+ idx_x = tvm.tir.truncmod(x, block_size)
+ idx_y = tvm.tir.truncmod(y, block_size)
if mode == "DCR":
channel_idx = channel_factor * ((block_size * idx_y) + idx_x) + c
else:
n, c, y, x = _get_indices(*indices)
return _get_pixel(n, c, y, x)
- return tvm.compute(output_shape, _compute, name='depth_to_space', tag=tag.INJECTIVE)
+ return te.compute(output_shape, _compute, name='depth_to_space', tag=tag.INJECTIVE)
from __future__ import absolute_import as _abs
from collections import namedtuple
import tvm
+from tvm import te
from .dilate import dilate
from .pad import pad
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
4-D with shape [in_channel, channel_multiplier, filter_height, filter_width]
stride : tuple of two ints
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
out_dtype = Input.dtype if out_dtype is None else out_dtype
pad_after = [0, 0, pad_down, pad_right]
PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput")
# depthconv stage
- idxdiv = tvm.indexdiv
- idxmod = tvm.indexmod
- di = tvm.reduce_axis((0, filter_height), name='di')
- dj = tvm.reduce_axis((0, filter_width), name='dj')
- Output = tvm.compute(
+ idxdiv = tvm.tir.indexdiv
+ idxmod = tvm.tir.indexmod
+ di = te.reduce_axis((0, filter_height), name='di')
+ dj = te.reduce_axis((0, filter_width), name='dj')
+ Output = te.compute(
(batch, out_channel, out_height, out_width),
- lambda b, c, i, j: tvm.sum(
+ lambda b, c, i, j: te.sum(
(PaddedInput[b, idxdiv(c, channel_multiplier), i*stride_h+di*dilation_h,
j*stride_w+dj*dilation_w].astype(out_dtype) *
Filter[idxdiv(c, channel_multiplier),
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
4-D with shape [batch, in_height, in_width, in_channel]
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
4-D with shape [filter_height, filter_width, in_channel, channel_multiplier]
stride : tuple of two ints
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
4-D with shape [batch, out_height, out_width, out_channel]
"""
out_dtype = Input.dtype if out_dtype is None else out_dtype
pad_after = [0, pad_down, pad_right, 0]
PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput")
# depthconv stage
- idxdiv = tvm.indexdiv
- idxmod = tvm.indexmod
+ idxdiv = tvm.tir.indexdiv
+ idxmod = tvm.tir.indexmod
- di = tvm.reduce_axis((0, filter_height), name='di')
- dj = tvm.reduce_axis((0, filter_width), name='dj')
- Output = tvm.compute(
+ di = te.reduce_axis((0, filter_height), name='di')
+ dj = te.reduce_axis((0, filter_width), name='dj')
+ Output = te.compute(
(batch, out_height, out_width, out_channel),
- lambda b, i, j, c: tvm.sum(
+ lambda b, i, j, c: te.sum(
(PaddedInput[b, i*stride_h + di*dilation_h, j*stride_w + dj*dilation_w,
idxdiv(c, channel_multiplier)].astype(out_dtype) *
Filter[di, dj,
Parameters
----------
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
4-D with shape [filter_height, filter_width, in_channel, channel_multiplier]
- Out_grad : tvm.Tensor
+ Out_grad : tvm.te.Tensor
4-D with shape [batch, out_height, out_width, out_channel]
stride : tuple of two ints
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
4-D with shape [batch, in_height, in_width, in_channel]
"""
batch, in_h, in_w, in_c = ishape
bpad_right = (filter_w - 1 - fpad_right) + (stride_w - 1)
padded_out_grad = pad(dilated_out_grad, \
- [0, bpad_top, bpad_left, 0], \
- [0, bpad_bottom, bpad_right, 0], \
- name='padded_out_grad')
+ [0, bpad_top, bpad_left, 0], \
+ [0, bpad_bottom, bpad_right, 0], \
+ name='padded_out_grad')
- dh = tvm.reduce_axis((0, filter_h), name='dh')
- dw = tvm.reduce_axis((0, filter_w), name='dw')
- dc = tvm.reduce_axis((0, channel_multiplier), name='dc')
+ dh = te.reduce_axis((0, filter_h), name='dh')
+ dw = te.reduce_axis((0, filter_w), name='dw')
+ dc = te.reduce_axis((0, channel_multiplier), name='dc')
- In_grad = tvm.compute(
+ In_grad = te.compute(
(batch, in_h, in_w, in_c),
- lambda b, h, w, c: tvm.sum(padded_out_grad[b, h+dh, w+dw, c*channel_multiplier + dc] * \
- Filter[filter_h-1-dh, filter_w-1-dw, c, dc],
- axis=[dh, dw, dc]), tag='depthwise_conv2d_backward_input_nhwc')
+ lambda b, h, w, c: te.sum(padded_out_grad[b, h+dh, w+dw, c*channel_multiplier + dc] * \
+ Filter[filter_h-1-dh, filter_w-1-dw, c, dc],
+ axis=[dh, dw, dc]), tag='depthwise_conv2d_backward_input_nhwc')
return In_grad
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
4-D with shape [batch, in_height, in_width, in_channel]
- Out_grad : tvm.Tensor
+ Out_grad : tvm.te.Tensor
4-D with shape [batch, out_height, out_width, out_channel]
stride : tuple of two ints
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
4-D with shape [filter_height, filter_width, in_channel, channel_multiplier]
"""
batch, out_h, out_w, out_c = oshape
pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(padding, (filter_h, filter_w))
padded_in = pad(Input, \
- [0, pad_top, pad_left, 0], \
- [0, pad_bottom, pad_right, 0], \
- name='padded_in')
+ [0, pad_top, pad_left, 0], \
+ [0, pad_bottom, pad_right, 0], \
+ name='padded_in')
- dh = tvm.reduce_axis((0, Out_grad.shape[1].value), name='dh')
- dw = tvm.reduce_axis((0, Out_grad.shape[2].value), name='dw')
- db = tvm.reduce_axis((0, batch), name='db')
- idxdiv = tvm.indexdiv
- idxmod = tvm.indexmod
+ dh = te.reduce_axis((0, Out_grad.shape[1].value), name='dh')
+ dw = te.reduce_axis((0, Out_grad.shape[2].value), name='dw')
+ db = te.reduce_axis((0, batch), name='db')
+ idxdiv = tvm.tir.indexdiv
+ idxmod = tvm.tir.indexmod
- Weight_grad = tvm.compute(
+ Weight_grad = te.compute(
(filter_h, filter_w, in_c, channel_multiplier),
- lambda fh, fw, c, m: tvm.sum(
+ lambda fh, fw, c, m: te.sum(
Out_grad[db, dh, dw, c*channel_multiplier+idxmod(m, channel_multiplier)] *
padded_in[db, fh+dh*stride_h, fw+dw*stride_w, c], axis=[db, dh, dw]),
tag='depthwise_conv2d_backward_weight_nhwc')
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block]
- Filter : tvm.Tensor
+ Filter : tvm.te.Tensor
6-D with shape [out_channel_chunk, 1, filter_height, filter_width, 1, out_channel_block]
In NCHWc depthwise convolution,
we group kernel's in_channel and channel_multiplier together then do the tiling.
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
5-D with shape [batch, out_channel_chunk, out_height, out_width, out_channel_block]
"""
raise ValueError("missing register for topi.nn.depthwise_conv2d_NCHWc")
# under the License.
# pylint: disable=invalid-name
"""Dilation operators"""
-from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from .. import util
from .. import tag
-@tvm.tag_scope(tag=tag.INJECTIVE+",dilate")
+@te.tag_scope(tag=tag.INJECTIVE+",dilate")
def dilate(data, strides, name="DilatedInput"):
"""Dilate data with zeros.
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
n-D, can be any layout.
strides : list / tuple of n ints
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
n-D, the same layout as data.
"""
n = len(data.shape)
n, len(strides)))
out_shape = tuple(
- tvm.ir_pass.Simplify((data.shape[i] - 1) * strides[i] + 1) for i in range(n))
+ tvm.tir.ir_pass.Simplify((data.shape[i] - 1) * strides[i] + 1) for i in range(n))
def _dilate(*indices):
not_zero = []
index_tuple = []
- idxdiv = tvm.indexdiv
- idxmod = tvm.indexmod
+ idxdiv = tvm.tir.indexdiv
+ idxmod = tvm.tir.indexmod
for i in range(n):
if not util.equal_const_int(strides[i], 1):
index_tuple.append(idxdiv(indices[i], strides[i]))
else:
index_tuple.append(indices[i])
if not_zero:
- not_zero = tvm.all(*not_zero)
- return tvm.if_then_else(not_zero, data(*index_tuple), tvm.const(0.0, data.dtype))
+ not_zero = tvm.tir.all(*not_zero)
+ return tvm.tir.if_then_else(
+ not_zero, data(*index_tuple), tvm.tir.const(0.0, data.dtype))
return data(*index_tuple)
- return tvm.compute(out_shape, _dilate, name=name)
+ return te.compute(out_shape, _dilate, name=name)
"""Elementwise operators"""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from .. import tag
from ..util import get_const_int
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def relu(x):
"""Take relu of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
- return tvm.compute(x.shape, lambda *i: tvm.max(x(*i), tvm.const(0, x.dtype)))
+ return te.compute(x.shape, lambda *i: tvm.te.max(x(*i), tvm.tir.const(0, x.dtype)))
-@tvm.tag_scope(tag=tag.ELEMWISE)
+@tvm.te.tag_scope(tag=tag.ELEMWISE)
def leaky_relu(x, alpha):
"""Take leaky relu of input x.
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
alpha : float
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
def _compute(*indices):
value = x(*indices)
- calpha = tvm.const(alpha, value.dtype)
- return tvm.expr.Select(value > 0, value, value * calpha)
- return tvm.compute(x.shape, _compute)
+ calpha = tvm.tir.const(alpha, value.dtype)
+ return tvm.tir.Select(value > 0, value, value * calpha)
+ return te.compute(x.shape, _compute)
-@tvm.tag_scope(tag=tag.BROADCAST)
+@tvm.te.tag_scope(tag=tag.BROADCAST)
def prelu(x, slope, axis=1):
""" PReLU.
It accepts two arguments: an input ``x`` and a weight array ``W``
where :math:`*` is an elementwise multiplication for each sample in the
batch.
Arguments:
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
- slope : tvm.Tensor
+ slope : tvm.te.Tensor
Channelised slope tensor for prelu
axis : int
The axis where the channel data needs to be applied
Returns:
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
Links:
def _compute_channelwise(*indices):
xval = x(*indices)
- return tvm.expr.Select(xval > 0, xval, xval * slope(indices[axis]))
- return tvm.compute(x.shape, _compute_channelwise)
+ return tvm.tir.Select(xval > 0, xval, xval * slope(indices[axis]))
+ return te.compute(x.shape, _compute_channelwise)
"""FIFO buffer op"""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from .. import tag
from ..transform import concatenate, strided_slice
-@tvm.tag_scope(tag=tag.INJECTIVE+",fifo_buffer")
+@tvm.te.tag_scope(tag=tag.INJECTIVE+",fifo_buffer")
def fifo_buffer(data, buffer, axis):
"""
FIFO buffer to enable computation reuse in CNNs with sliding indow input
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
The input data
- buffer : tvm.Tensor
+ buffer : tvm.te.Tensor
Previous value of the FIFO buffer
axis : int
Specify which axis should be used for buffering
Returns
-------
- result : tvm.Tensor
+ result : tvm.te.Tensor
Updated value for the buffer
"""
assert len(data.shape) == len(buffer.shape), \
# Explicitly write out formula up to 4D, and then use concat+slice combo for 5D and higher
if len(buffer.shape) == 1:
- return tvm.compute(buffer.shape,
- lambda i:
- tvm.if_then_else(i < buflen - data_size,
- buffer[i + data_size],
- data[i - buflen + data_size]),
- name='new_buffer')
+ return te.compute(buffer.shape,
+ lambda i:
+ tvm.tir.if_then_else(i < buflen - data_size,
+ buffer[i + data_size],
+ data[i - buflen + data_size]),
+ name='new_buffer')
if len(buffer.shape) == 2:
if axis == 0:
- return tvm.compute(buffer.shape,
- lambda i, j:
- tvm.if_then_else(i < buflen - data_size,
- buffer[i + data_size, j],
- data[i - buflen + data_size, j]),
- name='new_buffer')
+ return te.compute(buffer.shape,
+ lambda i, j:
+ tvm.tir.if_then_else(i < buflen - data_size,
+ buffer[i + data_size, j],
+ data[i - buflen + data_size, j]),
+ name='new_buffer')
if axis == 1:
- return tvm.compute(buffer.shape,
- lambda i, j:
- tvm.if_then_else(j < buflen - data_size,
- buffer[i, j + data_size],
- data[i, j - buflen + data_size]),
- name='new_buffer')
+ return te.compute(buffer.shape,
+ lambda i, j:
+ tvm.tir.if_then_else(j < buflen - data_size,
+ buffer[i, j + data_size],
+ data[i, j - buflen + data_size]),
+ name='new_buffer')
assert False, 'Invalid value for axis; it should be at most {}'.format(len(buffer.shape))
elif len(buffer.shape) == 3:
if axis == 0:
- return tvm.compute(buffer.shape,
- lambda i, j, k:
- tvm.if_then_else(i < buflen - data_size,
- buffer[i + data_size, j, k],
- data[i - buflen + data_size, j, k]),
- name='new_buffer')
+ return te.compute(buffer.shape,
+ lambda i, j, k:
+ tvm.tir.if_then_else(i < buflen - data_size,
+ buffer[i + data_size, j, k],
+ data[i - buflen + data_size, j, k]),
+ name='new_buffer')
if axis == 1:
- return tvm.compute(buffer.shape,
- lambda i, j, k:
- tvm.if_then_else(j < buflen - data_size,
- buffer[i, j + data_size, k],
- data[i, j - buflen + data_size, k]),
- name='new_buffer')
+ return te.compute(buffer.shape,
+ lambda i, j, k:
+ tvm.tir.if_then_else(j < buflen - data_size,
+ buffer[i, j + data_size, k],
+ data[i, j - buflen + data_size, k]),
+ name='new_buffer')
if axis == 2:
- return tvm.compute(buffer.shape,
- lambda i, j, k:
- tvm.if_then_else(k < buflen - data_size,
- buffer[i, j, k + data_size],
- data[i, j, k - buflen + data_size]),
- name='new_buffer')
+ return te.compute(buffer.shape,
+ lambda i, j, k:
+ tvm.tir.if_then_else(k < buflen - data_size,
+ buffer[i, j, k + data_size],
+ data[i, j, k - buflen + data_size]),
+ name='new_buffer')
assert False, 'Invalid value for axis; it should be at most {}'.format(len(buffer.shape))
elif len(buffer.shape) == 4:
if axis == 0:
- return tvm.compute(buffer.shape,
- lambda i, j, k, l:
- tvm.if_then_else(i < buflen - data_size,
- buffer[i + data_size, j, k, l],
- data[i - buflen + data_size, j, k, l]),
- name='new_buffer')
+ return te.compute(buffer.shape,
+ lambda i, j, k, l:
+ tvm.tir.if_then_else(i < buflen - data_size,
+ buffer[i + data_size, j, k, l],
+ data[i - buflen + data_size, j, k, l]),
+ name='new_buffer')
if axis == 1:
- return tvm.compute(buffer.shape,
- lambda i, j, k, l:
- tvm.if_then_else(j < buflen - data_size,
- buffer[i, j + data_size, k, l],
- data[i, j - buflen + data_size, k, l]),
- name='new_buffer')
+ return te.compute(buffer.shape,
+ lambda i, j, k, l:
+ tvm.tir.if_then_else(j < buflen - data_size,
+ buffer[i, j + data_size, k, l],
+ data[i, j - buflen + data_size, k, l]),
+ name='new_buffer')
if axis == 2:
- return tvm.compute(buffer.shape,
- lambda i, j, k, l:
- tvm.if_then_else(k < buflen - data_size,
- buffer[i, j, k + data_size, l],
- data[i, j, k - buflen + data_size, l]),
- name='new_buffer')
+ return te.compute(buffer.shape,
+ lambda i, j, k, l:
+ tvm.tir.if_then_else(k < buflen - data_size,
+ buffer[i, j, k + data_size, l],
+ data[i, j, k - buflen + data_size, l]),
+ name='new_buffer')
if axis == 3:
- return tvm.compute(buffer.shape,
- lambda i, j, k, l:
- tvm.if_then_else(l < buflen - data_size,
- buffer[i, j, k, l + data_size],
- data[i, j, k, l - buflen + data_size]),
- name='new_buffer')
+ return te.compute(buffer.shape,
+ lambda i, j, k, l:
+ tvm.tir.if_then_else(l < buflen - data_size,
+ buffer[i, j, k, l + data_size],
+ data[i, j, k, l - buflen + data_size]),
+ name='new_buffer')
assert False, 'Invalid value for axis; it should be at most {}'.format(len(buffer.shape))
else:
# Implement FIFO buffer as combination of concat and slice
"""TVM operator flatten compute."""
from __future__ import absolute_import
import tvm
+from tvm import te
from .. import tag
-@tvm.tag_scope(tag=tag.INJECTIVE)
+@tvm.te.tag_scope(tag=tag.INJECTIVE)
def flatten(data):
"""Flattens the input array into a 2-D array by collapsing the higher dimensions.
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
Input array.
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D array with collapsed higher dimensions.
"""
ishape = data.shape
for i in range(1, len(ishape)):
dim = dim * ishape[i]
oshape = [ishape[0], dim]
- idxdiv = tvm.indexdiv
- idxmod = tvm.indexmod
+ idxdiv = tvm.tir.indexdiv
+ idxmod = tvm.tir.indexmod
def unwrap(idx, shape):
index = []
idx = idxdiv(idx, s)
return list(reversed(index))
- return tvm.compute(oshape, lambda i, j: data(i, *unwrap(j, ishape[1:])))
+ return te.compute(oshape, lambda i, j: data(i, *unwrap(j, ishape[1:])))
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D with shape [batch, channel, height, width]
size : int
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D output with same shape
"""
return cpp.nn.lrn(data, size, axis, alpha, beta, bias)
"""Operators of one-to-one-mapping on the first input"""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from .. import tag
-@tvm.tag_scope(tag=tag.BROADCAST)
+@tvm.te.tag_scope(tag=tag.BROADCAST)
def scale_shift_nchw(Input, Scale, Shift):
"""Batch normalization operator in inference.
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
Input tensor, layout is NCHW
- Scale : tvm.Tensor
+ Scale : tvm.te.Tensor
Scale tensor, 1-D of size channel number
- Shift : tvm.Tensor
+ Shift : tvm.te.Tensor
Shift tensor, 1-D of size channel number
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
Output tensor, layout is NCHW
"""
- return tvm.compute(Input.shape, lambda b, c, i, j: Input[b, c, i, j] * Scale[c] + Shift[c], name='ScaleShift')
+ return te.compute(Input.shape, lambda b, c, i, j: Input[b, c, i, j] * Scale[c] + Shift[c], name='ScaleShift')
-@tvm.tag_scope(tag=tag.BROADCAST)
+@tvm.te.tag_scope(tag=tag.BROADCAST)
def scale_shift_nhwc(Input, Scale, Shift):
"""Batch normalization operator in inference.
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
Input tensor, layout is NHWC
- Scale : tvm.Tensor
+ Scale : tvm.te.Tensor
Scale tensor, 1-D of size channel number
- Shift : tvm.Tensor
+ Shift : tvm.te.Tensor
Shift tensor, 1-D of size channel number
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
Output tensor, layout is NHWC
"""
- return tvm.compute(Input.shape, lambda b, i, j, c: Input[b, i, j, c] * Scale[c] + Shift[c], name='ScaleShift')
+ return te.compute(Input.shape, lambda b, i, j, c: Input[b, i, j, c] * Scale[c] + Shift[c], name='ScaleShift')
"""Pad the data by constant value """
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from ..util import equal_const_int
from .. import tag
-@tvm.tag_scope(tag=tag.INJECTIVE+",pad")
+@tvm.te.tag_scope(tag=tag.INJECTIVE+",pad")
def pad(data, pad_before, pad_after=None, pad_value=0.0, name="PadInput"):
"""Pad Input with zeros.
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
n-D input, can be any layout.
pad_before : list / tuple of n ints
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
n-D, the same layout as Input.
"""
n = len(data.shape)
raise ValueError("Input dimension and pad_after dismatch : %d vs %d" % (
n, len(pad_before)))
out_shape = tuple(
- tvm.ir_pass.Simplify(
+ tvm.tir.ir_pass.Simplify(
(data.shape[i] + pad_before[i] + pad_after[i])) for i in range(n))
- pad_value = (pad_value if isinstance(pad_value, tvm.expr.PrimExpr)
- else tvm.const(pad_value, data.dtype))
+ pad_value = (pad_value if isinstance(pad_value, tvm.tir.PrimExpr)
+ else tvm.tir.const(pad_value, data.dtype))
def _pad(*indices):
not_zero = []
index_tuple = []
not_zero.append(indices[i] >= pad_before[i])
not_zero.append(indices[i] < data.shape[i] + pad_before[i])
if not_zero:
- not_zero = tvm.all(*not_zero)
- return tvm.if_then_else(not_zero, data(*index_tuple), pad_value)
+ not_zero = tvm.tir.all(*not_zero)
+ return tvm.tir.if_then_else(not_zero, data(*index_tuple), pad_value)
return data(*index_tuple)
- return tvm.compute(out_shape, _pad, name=name)
+ return te.compute(out_shape, _pad, name=name)
-@tvm.tag_scope(tag=tag.INJECTIVE + ",pad")
+@tvm.te.tag_scope(tag=tag.INJECTIVE + ",pad")
def mirror_pad(data,
pad_before,
pad_after=None,
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
n-D input, can be any layout.
pad_before : list / tuple of n ints
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
n-D, the same layout as Input.
"""
n = len(data.shape)
raise ValueError("Input dimension and pad_after dismatch : %d vs %d" %
(n, len(pad_before)))
out_shape = tuple(
- tvm.ir_pass.Simplify((data.shape[i] + pad_before[i] + pad_after[i]))
+ tvm.tir.ir_pass.Simplify((data.shape[i] + pad_before[i] + pad_after[i]))
for i in range(n))
assert mode in ('SYMMETRIC', 'REFLECT')
mode = int(mode == 'SYMMETRIC')
below.append(indices[i] < pad_before[i])
mapped_tuple = []
for i, axis in enumerate(index_tuple):
- mapped_axis = tvm.if_then_else(below[i], -axis - mode, axis)
- mapped_axis = tvm.if_then_else(
+ mapped_axis = tvm.tir.if_then_else(below[i], -axis - mode, axis)
+ mapped_axis = tvm.tir.if_then_else(
above[i], (2 * (data.shape[i] - 1)) - axis + mode, mapped_axis)
mapped_tuple.append(mapped_axis)
return data(*mapped_tuple)
- return tvm.compute(out_shape, _pad, name=name)
+ return te.compute(out_shape, _pad, name=name)
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
n-D with shape of layout
pool_type : str
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
n-D in same layout with height and width dimension size of 1.
e.g., for NCHW, the output shape will be [batch, channel, 1, 1]
"""
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
n-D with shape of layout
kernel : list/tuple of two ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
n-D in the same layout
"""
return cpp.nn.pool(data, kernel, stride, padding,
Parameters
----------
- grads : tvm.Tensor
+ grads : tvm.te.Tensor
n-D with shape of layout
- data : tvm.Tensor
+ data : tvm.te.Tensor
n-D with shape of layout
kernel : list/tuple of two ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
n-D in the same layout
"""
return cpp.nn.pool_grad(grads, data, kernel,
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
n-D with shape of layout
output_size : tuple of int
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
n-D in the same layout
"""
return cpp.nn.adaptive_pool(data, output_size, POOL_TYPE_CODE[pool_type], layout)
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
n-D with shape of layout
kernel : list/tuple of one int or int
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
n-D in the same layout
"""
if isinstance(kernel, int):
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
n-D with shape of layout
kernel : list/tuple of three ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
n-D in the same layout
"""
return cpp.nn.pool3d(data, kernel, stride, padding,
"""TVM operator for softmax and log_softmax compute."""
from __future__ import absolute_import
import tvm
+from tvm import te
-@tvm.tag_scope(tag='softmax_output')
+@tvm.te.tag_scope(tag='softmax_output')
def softmax(x, axis=-1):
"""Perform softmax activation on the data
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
can be any dimension
axis : int
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
output shape is the same as input
"""
shape = x.shape
if axis >= len(shape):
ValueError("axis parameter should be less than input dim")
- k1 = tvm.reduce_axis((0, shape[axis]), name='k')
- k2 = tvm.reduce_axis((0, shape[axis]), name='k')
+ k1 = te.reduce_axis((0, shape[axis]), name='k')
+ k2 = te.reduce_axis((0, shape[axis]), name='k')
def insert_reduce_index(indices, reduce_index):
return indices[:axis] + (reduce_index,) + indices[axis:]
def _compute_max(*indices):
eval_range = insert_reduce_index(indices, k1)
- return tvm.max(x[eval_range], axis=k1)
+ return tvm.te.max(x[eval_range], axis=k1)
def _compute_exp(max_elem, *indices):
non_reduce_indices = get_non_reduce_indices(indices)
- return tvm.exp(x[indices] - max_elem[non_reduce_indices])
+ return te.exp(x[indices] - max_elem[non_reduce_indices])
def _compute_expsum(exp, *indices):
eval_range = insert_reduce_index(indices, k2)
- return tvm.sum(exp[eval_range], axis=k2)
+ return te.sum(exp[eval_range], axis=k2)
def _normalize(exp, expsum, *indices):
non_reduce_indices = get_non_reduce_indices(indices)
return exp[indices] / expsum[non_reduce_indices]
reduced_shape = tuple([dim for (i, dim) in enumerate(shape) if i != axis])
- max_elem = tvm.compute(reduced_shape, _compute_max, name='T_softmax_maxelem')
- exp = tvm.compute(shape, lambda *indices: _compute_exp(max_elem, *indices),
- name='T_softmax_exp')
- expsum = tvm.compute(reduced_shape, lambda *indices: _compute_expsum(exp, *indices),
- name='T_softmax_expsum')
- return tvm.compute(shape, lambda *indices: _normalize(exp, expsum, *indices),
- name='T_softmax_norm', attrs={"axis" : axis})
+ max_elem = te.compute(reduced_shape, _compute_max, name='T_softmax_maxelem')
+ exp = te.compute(shape, lambda *indices: _compute_exp(max_elem, *indices),
+ name='T_softmax_exp')
+ expsum = te.compute(reduced_shape, lambda *indices: _compute_expsum(exp, *indices),
+ name='T_softmax_expsum')
+ return te.compute(shape, lambda *indices: _normalize(exp, expsum, *indices),
+ name='T_softmax_norm', attrs={"axis" : axis})
-@tvm.tag_scope(tag='log_softmax_output')
+@tvm.te.tag_scope(tag='log_softmax_output')
def log_softmax(x):
"""Perform log softmax activation on the data
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
2-D input data
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D output with same shape
"""
assert len(x.shape) == 2, "only support 2-dim log softmax"
m, n = x.shape
- k = tvm.reduce_axis((0, n), name='k')
- max_elem = tvm.compute((m, ), lambda i: tvm.max(x[i, k], axis=k))
- k = tvm.reduce_axis((0, n), name='k')
- expsum = tvm.compute(
- (m, ), lambda i: tvm.sum(tvm.exp(x[i, k] - max_elem[i]), axis=k))
- return tvm.compute(
- x.shape, lambda i, j: x[i, j] - max_elem[i] - tvm.log(expsum[i]))
+ k = te.reduce_axis((0, n), name='k')
+ max_elem = te.compute((m, ), lambda i: tvm.te.max(x[i, k], axis=k))
+ k = te.reduce_axis((0, n), name='k')
+ expsum = te.compute(
+ (m, ), lambda i: te.sum(te.exp(x[i, k] - max_elem[i]), axis=k))
+ return te.compute(
+ x.shape, lambda i, j: x[i, j] - max_elem[i] - te.log(expsum[i]))
"""TVM operator space_to_depth compute."""
from __future__ import absolute_import
import tvm
+from tvm import te
from .. import tag
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D tensor in either NCHW or NHWC layout.
block_size : int
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
Output of shape [N, C * block_size**2, H / block_size, W / block_size]
"""
if layout == 'NCHW':
in_n, in_c, in_h, in_w = data.shape
output_shape = [in_n, in_c * block_size * block_size,
- tvm.truncdiv(in_h, block_size), tvm.truncdiv(in_w, block_size)]
+ tvm.tir.truncdiv(in_h, block_size), tvm.tir.truncdiv(in_w, block_size)]
elif layout == 'NHWC':
in_n, in_h, in_w, in_c = data.shape
- output_shape = [in_n, tvm.truncdiv(in_h, block_size), tvm.truncdiv(
+ output_shape = [in_n, tvm.tir.truncdiv(in_h, block_size), tvm.tir.truncdiv(
in_w, block_size), in_c * block_size * block_size]
else:
raise ValueError("Only NCHW and NHWC layouts are currently supported.")
return n, c, y, x
def _get_pixel(n, c, y, x):
- block_offset = tvm.truncdiv(c, in_c)
- channel_idx = tvm.truncmod(c, in_c)
- x_idx = tvm.truncmod(block_offset, block_size)
- y_idx = tvm.truncdiv(block_offset, block_size)
+ block_offset = tvm.tir.truncdiv(c, in_c)
+ channel_idx = tvm.tir.truncmod(c, in_c)
+ x_idx = tvm.tir.truncmod(block_offset, block_size)
+ y_idx = tvm.tir.truncdiv(block_offset, block_size)
if layout == 'NCHW':
output = data(n, channel_idx, y_idx +
n, c, y, x = _get_indices(*indices)
return _get_pixel(n, c, y, x)
- return tvm.compute(output_shape, _compute, name='space_to_depth', tag=tag.INJECTIVE)
+ return te.compute(output_shape, _compute, name='space_to_depth', tag=tag.INJECTIVE)
"""Sparse operators"""
from __future__ import absolute_import
import tvm
+from tvm import te
from ..util import get_const_tuple
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
2-D with shape [M, K], float32
- weight_data : tvm.Tensor
+ weight_data : tvm.te.Tensor
1-D with shape [nnz] (CSR) or
3-D with shape [num_blocks, bs_r, bs_c] (BSR)
- weight_indices : tvm.Tensor
+ weight_indices : tvm.te.Tensor
1-D with shape [nnz] (CSR) or
1-D with shape [num_blocks] (BSR)
- weight_indptr : tvm.Tensor
+ weight_indptr : tvm.te.Tensor
1-D with shape [N + 1] (CSR) or
1-D with shape [(N + 1) // bs_r] (BSR)
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D with shape [M, N]
"""
assert len(weight_data.shape) in (1, 3)
row_start = weight_indptr[row]
row_end = weight_indptr[row + 1]
row_elems = row_end - row_start
- elem_idx = tvm.reduce_axis((0, row_elems), name="elem_idx")
+ elem_idx = te.reduce_axis((0, row_elems), name="elem_idx")
elem = row_start + elem_idx
a_val = weight_data[elem]
weight_val = data[i, weight_indices[elem]]
- return tvm.sum(a_val * weight_val, axis=elem_idx)
- return tvm.compute(oshape, f, tag="sparse_dense_csrmm")
+ return te.sum(a_val * weight_val, axis=elem_idx)
+ return te.compute(oshape, f, tag="sparse_dense_csrmm")
def _sparse_dense_bsrmm(data, weight_data, weight_indices, weight_indptr):
row_start = weight_indptr[nb_j]
row_end = weight_indptr[nb_j + 1]
row_elems = row_end - row_start
- elem_idx = tvm.reduce_axis(
+ elem_idx = te.reduce_axis(
(0, row_elems), name="elem_idx")
block_offset = row_start + elem_idx
- c = tvm.reduce_axis((0, bs_c), name="c")
+ c = te.reduce_axis((0, bs_c), name="c")
block_j = weight_indices[block_offset]
block_ij_val = weight_data[block_offset][j][c]
x_val = data[i, bs_c * block_j + c]
- return tvm.sum(block_ij_val * x_val, axis=[elem_idx, c])
+ return te.sum(block_ij_val * x_val, axis=[elem_idx, c])
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
- bsrmm_block = tvm.compute(
+ bsrmm_block = te.compute(
(m, num_blocks, bs_r), _compute_block,
tag="sparse_dense_bsrmm_block")
- return tvm.compute(
+ return te.compute(
(m, num_blocks * bs_r),
lambda m, n: bsrmm_block[m, idxd(n, bs_r), idxm(n, bs_r)],
tag="sparse_dense_bsrmm")
Parameters
----------
- sparse_data : tvm.Tensor
+ sparse_data : tvm.te.Tensor
1-D with shape [nonzeros], dtype of 'float32'
- sparse_indices : tvm.Tensor
+ sparse_indices : tvm.te.Tensor
1-D with shape [nonzeros], dtype of 'int32'
- sparse_indptr : tvm.Tensor
+ sparse_indptr : tvm.te.Tensor
1-D with shape [n+1], dtype of 'int32'
Returns
-------
- out_data : tvm.Tensor
+ out_data : tvm.te.Tensor
1-D with shape [nonzeros], dtype of 'float32'
- out_indices : tvm.Tensor
+ out_indices : tvm.te.Tensor
1-D with shape [nonzeros], dtype of 'int32'
- out_indptr : tvm.Tensor
+ out_indptr : tvm.te.Tensor
1-D with shape [n+1], dtype of 'int32'
"""
assert len(sparse_data.shape) == 1, "error in data dimension"
# TODO: Add BSR transpose support
- output_data, output_indices, output_indptr = tvm.extern(
+ output_data, output_indices, output_indptr = te.extern(
shape=output_shape,
inputs=[sparse_data, sparse_indices, sparse_indptr],
fcompute=lambda ins, outs:
def _csr_transpose_ir(data, indices, indptr, out_data, out_indices, out_indptr):
"""define ir for csr_transpose"""
- irb = tvm.ir_builder.create()
+ irb = tvm.tir.ir_builder.create()
data_ptr = irb.buffer_ptr(data)
indices_ptr = irb.buffer_ptr(indices)
# specific language governing permissions and limitations
# under the License.
"""TVM operator upsampling compute."""
-from __future__ import absolute_import
import topi
-import tvm
+from tvm import te
from ..util import simplify
Parameters
----------
- inputs : tvm.Tensor
+ inputs : tvm.te.Tensor
inputs is a 4-D tensor with shape
[batch, channel, in_height, in_width]
or [batch, in_height, in_width, channel]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [batch, channel, in_height*scale_h, in_width*scale_w]
or [batch, in_height*scale, in_width*scale, channel]
"""
base_layout = layout[0:4]
if base_layout == "NCHW":
- out_shape = (simplify(topi.cast(tvm.round(data.shape[2] * scale_h), data.shape[2].dtype)),
- simplify(topi.cast(tvm.round(data.shape[3] * scale_w), data.shape[3].dtype)))
+ out_shape = (simplify(topi.cast(te.round(data.shape[2] * scale_h), data.shape[2].dtype)),
+ simplify(topi.cast(te.round(data.shape[3] * scale_w), data.shape[3].dtype)))
elif layout == "NHWC":
- out_shape = (simplify(topi.cast(tvm.round(data.shape[1] * scale_h), data.shape[1].dtype)),
- simplify(topi.cast(tvm.round(data.shape[2] * scale_w), data.shape[2].dtype)))
+ out_shape = (simplify(topi.cast(te.round(data.shape[1] * scale_h), data.shape[1].dtype)),
+ simplify(topi.cast(te.round(data.shape[2] * scale_w), data.shape[2].dtype)))
else:
raise ValueError("not support this layout {} yet".format(layout))
Parameters
----------
- inputs : tvm.Tensor
+ inputs : tvm.te.Tensor
inputs is a 5-D tensor with shape
[batch, channel, in_depth, in_height, in_width]
or [batch, in_depth, in_height, in_width, channel]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
5-D with shape [batch, channel, in_depth*scale, in_height*scale, in_width*scale]
or [batch, in_depth*scale, in_height*scale, in_width*scale, channel]
"""
base_layout = layout[0:5]
if base_layout == "NCDHW":
- out_shape = (simplify(topi.cast(tvm.round(data.shape[2] * scale_d), data.shape[2].dtype)),
- simplify(topi.cast(tvm.round(data.shape[3] * scale_h), data.shape[3].dtype)),
- simplify(topi.cast(tvm.round(data.shape[4] * scale_w), data.shape[4].dtype)))
+ out_shape = (simplify(topi.cast(te.round(data.shape[2] * scale_d), data.shape[2].dtype)),
+ simplify(topi.cast(te.round(data.shape[3] * scale_h), data.shape[3].dtype)),
+ simplify(topi.cast(te.round(data.shape[4] * scale_w), data.shape[4].dtype)))
elif layout == "NDHWC":
- out_shape = (simplify(topi.cast(tvm.round(data.shape[1] * scale_d), data.shape[1].dtype)),
- simplify(topi.cast(tvm.round(data.shape[2] * scale_h), data.shape[2].dtype)),
- simplify(topi.cast(tvm.round(data.shape[3] * scale_w), data.shape[3].dtype)))
+ out_shape = (simplify(topi.cast(te.round(data.shape[1] * scale_d), data.shape[1].dtype)),
+ simplify(topi.cast(te.round(data.shape[2] * scale_h), data.shape[2].dtype)),
+ simplify(topi.cast(te.round(data.shape[3] * scale_w), data.shape[3].dtype)))
else:
raise ValueError("not support this layout {} yet".format(layout))
_, _, IH, IW = data.shape
_, _, KH, KW = kernel.shape
_, _, OH, OW = out.shape
- hstride = (IH - KH) // tvm.make.Max(OH - 1, 1) + tvm.expr.Select(OH == 1, 1, 0)
- wstride = (IW - KW) // tvm.make.Max(OW - 1, 1) + tvm.expr.Select(OW == 1, 1, 0)
+ hstride = (IH - KH) // tvm.te.max(OH - 1, 1) + tvm.tir.Select(OH == 1, 1, 0)
+ wstride = (IW - KW) // tvm.te.max(OW - 1, 1) + tvm.tir.Select(OW == 1, 1, 0)
return get_const_int(hstride), get_const_int(wstride)
f = lambda j, i: reduce(mul, ((a[i]-a[k] if k != i else 1) for k in range(0, n-1)), 1)
Ff = np.fromfunction(np.vectorize(f), (1, n-1), dtype=int)
f = lambda i, nth: (reduce(mul, [(np.poly1d([1, -a[k]]) if k != i else 1) \
- for k in range(0, n-1)], 1)).coef[n-1-nth-1]/Ff[0, i]
+ for k in range(0, n-1)], 1)).coef[n-1-nth-1]/Ff[0, i]
F = np.fromfunction(np.vectorize(f), (n-1, n-1), dtype=int)
f = lambda i, j: -a[i]**(n-1)
t = np.fromfunction(np.vectorize(f), (n-1, 1), dtype=int)
#pylint: disable=invalid-name, no-member, too-many-locals, too-many-statements, too-many-arguments, too-many-branches, line-too-long
"""Schedule for conv2d_nchw with auto fusion"""
import tvm
+from tvm import te
from .. import tag
def schedule_conv2d_nchw(outs):
s: Schedule
The computation schedule for conv2d_nchw.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
scheduled_ops = []
def _schedule(conv2d, data):
if OP not in s.outputs:
s[OP].opengl()
for tensor in OP.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, tvm.te.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
# schedule conv2d_nchw
elif OP.tag.startswith('conv2d_nchw'):
conv2d = OP.output(0)
data = OP.input_tensors[0]
kernel = OP.input_tensors[1]
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
_schedule(conv2d, data)
else:
# under the License.
# pylint: disable=invalid-name, unused-variable
"""Schedule for dense operator"""
-from __future__ import absolute_import as _abs
-import tvm
+from tvm import te
from .. import tag
def schedule_dense(outs):
s: Schedule
The computation schedule for dense.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
scheduled_ops = []
def _schedule(Dense):
if OP not in s.outputs:
s[OP].compute_inline()
for tensor in OP.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
# schedule dense
elif OP.tag == 'dense':
# under the License.
# pylint: disable=invalid-name, unused-variable,
"""Schedule for composition of injective operator"""
-import tvm
+from tvm import te
def schedule_injective_from_existing(sch, out):
"""Schedule for injective op from existing schedule.
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
- tvm.schedule.AutoInlineInjective(s)
+ te.schedule.AutoInlineInjective(s)
for out in outs:
schedule_injective_from_existing(s, out)
return s
# under the License.
# pylint: disable=invalid-name, unused-variable, unused-argument
"""Schedule for pooling operators"""
-import tvm
+from tvm import te
from .. import tag
def schedule_adaptive_pool(outs):
s: Schedule
The computation schedule for adaptive pool.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
scheduled_ops = []
def _schedule(Pool):
if OP not in s.outputs:
s[OP].opengl()
for tensor in OP.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
# schedule global_pool
elif OP.tag.startswith('adaptive_pool'):
s: Schedule
The computation schedule for pool.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
scheduled_ops = []
def _schedule(PaddedInput, Pool):
- if isinstance(PaddedInput.op, tvm.tensor.ComputeOp):
+ if isinstance(PaddedInput.op, te.tensor.ComputeOp):
s[PaddedInput].opengl()
if Pool.op in s.outputs:
Out = Pool
if OP not in s.outputs:
s[OP].compute_inline()
for tensor in OP.input_tensors:
- if tensor.op not in scheduled_ops and isinstance(tensor.op, tvm.tensor.ComputeOp):
+ if tensor.op not in scheduled_ops and isinstance(tensor.op, te.tensor.ComputeOp):
traverse(tensor.op)
# schedule pool
elif OP.tag.startswith('pool'):
# under the License.
# pylint: disable=invalid-name, unused-variable, trailing-whitespace
"""Schedule for softmax operator"""
-import tvm
+from tvm import te
def schedule_softmax(outs):
"""Schedule for softmax op.
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
softmax = outs[0]
op_tag = softmax.op.tag
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
The input tvm tensor
axis : None or int or tuple of int
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.sum(data, axis, keepdims)
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
The input tvm boolean tensor
axis : None or int or tuple of int
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.all(data, axis, keepdims)
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
The input tvm boolean tensor
axis : None or int or tuple of int
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.any(data, axis, keepdims)
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
The input tvm tensor
axis : None or int or tuple of int
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.max(data, axis, keepdims)
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
The input tvm tensor
axis : None or int or tuple of int
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.min(data, axis, keepdims)
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
The input tvm tensor
axis : None or int or tuple of int
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.argmax(data, axis, keepdims)
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
The input tvm tensor
axis : None or int or tuple of int
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.argmin(data, axis, keepdims)
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
The input tvm tensor
axis : None or int or tuple of int
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.prod(data, axis, keepdims)
cfg: ConfigEntity
The config for this template
- input : tvm.Tensor
+ input : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
- filter : tvm.Tensor
+ filter : tvm.te.Tensor
4-D with shape [num_filter, in_channel, filter_height, filter_width]
strides : int or a list/tuple of two ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
OH = (H + 2 * pad_h - KH) // stride_h + 1
OW = (W + 2 * pad_w - KW) // stride_w + 1
cfg.add_flop(2 * N * OH * OW * CO * CI * ((KH - 1) * dilation_h + 1) *\
- ((KW - 1) * dilation_w + 1))
+ ((KW - 1) * dilation_w + 1))
return miopen.conv2d_forward(data,
kernel,
# under the License.
# pylint: disable=invalid-name, unused-variable, unused-argument
"""Schedule for dense operator"""
-from __future__ import absolute_import as _abs
-import tvm
+from tvm import te
from tvm import autotvm
from tvm.contrib import rocblas
from .. import generic, nn
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
2-D with shape [batch, in_dim]
- weight : tvm.Tensor
+ weight : tvm.te.Tensor
2-D with shape [out_dim, in_dim]
- bias : tvm.Tensor, optional
+ bias : tvm.te.Tensor, optional
1-D with shape [out_dim]
out_dtype : str
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D with shape [batch, out_dim]
"""
assert len(data.shape) == 2 and len(weight.shape) == 2, \
s: Schedule
The computation schedule for dense.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if op.tag == 'dense':
else:
Out = outs[0].op.output(0)
s[Dense].compute_at(s[Out], s[Out].op.axis[1])
- s[Out].bind(s[Out].op.axis[0], tvm.thread_axis("blockIdx.y"))
- s[Out].bind(s[Out].op.axis[1], tvm.thread_axis("blockIdx.x"))
+ s[Out].bind(s[Out].op.axis[0], te.thread_axis("blockIdx.y"))
+ s[Out].bind(s[Out].op.axis[1], te.thread_axis("blockIdx.x"))
tx = s[Dense].op.reduce_axis[0]
- thread_x = tvm.thread_axis("threadIdx.x")
+ thread_x = te.thread_axis("threadIdx.x")
s[Dense].bind(tx, thread_x)
s[DenseF].compute_at(s[Dense], tx)
s[Dense].set_store_predicate(thread_x.var.equal(0))
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
2-D with shape [batch, in_dim]
- weight : tvm.Tensor
+ weight : tvm.te.Tensor
2-D with shape [out_dim, in_dim]
- bias : tvm.Tensor, optional
+ bias : tvm.te.Tensor, optional
1-D with shape [out_dim]
out_dtype : str
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D with shape [batch, out_dim]
"""
assert out_dtype == data.dtype, "Mixed precision not supported."
out_dim, _ = weight.shape
cfg.add_flop(batch * in_dim * out_dim * 2)
if bias is not None:
- matmul = tvm.compute((batch, out_dim),
- lambda i, j: matmul[i, j] + bias[j],
- tag=tag.BROADCAST)
+ matmul = te.compute((batch, out_dim),
+ lambda i, j: matmul[i, j] + bias[j],
+ tag=tag.BROADCAST)
return matmul
# pylint: disable=too-many-arguments
"""Argsort operator"""
import tvm
-from tvm import api
+from tvm import te
from .util import get_const_tuple
def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"):
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
The input tensor.
- valid_count : tvm.Tensor, optional
+ valid_count : tvm.te.Tensor, optional
1-D tensor for valid number of boxes only for ssd.
axis : int, optional
- Axis along which to sort the input tensor.
+ Axis along which to sort the input tensor.
By default the flattened array is used.
is_ascend : boolean, optional
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
Sorted index tensor.
Example
# An example to use argsort
dshape = (1, 5, 6)
- data = tvm.placeholder(dshape, name="data")
+ data = te.placeholder(dshape, name="data")
axis = 0
is_ascend = False
out = argsort(data, axis=axis, is_ascend=is_ascend)
tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data.dtype), ctx)
f(tvm_data, tvm_out)
"""
- data_buf = api.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8)
+ data_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8)
if valid_count is not None:
- valid_count_buf = api.decl_buffer(valid_count.shape, valid_count.dtype,
- "valid_count_buf", data_alignment=4)
- out_buf = api.decl_buffer(data.shape, "int32", "out_buf", data_alignment=8)
+ valid_count_buf = tvm.tir.decl_buffer(
+ valid_count.shape, valid_count.dtype,
+ "valid_count_buf", data_alignment=4)
+ out_buf = tvm.tir.decl_buffer(data.shape, "int32", "out_buf", data_alignment=8)
out = \
- tvm.extern(data.shape,
- [data, valid_count],
- lambda ins, outs: tvm.call_packed(
- "tvm.contrib.sort.argsort_nms", ins[0], ins[1],
- outs[0], axis, is_ascend),
- dtype="int32",
- in_buffers=[data_buf, valid_count_buf],
- out_buffers=out_buf,
- name="argsort_nms_cpu",
- tag="argsort_nms_cpu")
+ te.extern(data.shape,
+ [data, valid_count],
+ lambda ins, outs: tvm.tir.call_packed(
+ "tvm.contrib.sort.argsort_nms", ins[0], ins[1],
+ outs[0], axis, is_ascend),
+ dtype="int32",
+ in_buffers=[data_buf, valid_count_buf],
+ out_buffers=out_buf,
+ name="argsort_nms_cpu",
+ tag="argsort_nms_cpu")
else:
- out_buf = api.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8)
+ out_buf = tvm.tir.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8)
out = \
- tvm.extern(data.shape,
- [data],
- lambda ins, outs: tvm.call_packed(
- "tvm.contrib.sort.argsort", ins[0],
- outs[0], axis, is_ascend),
- dtype=dtype,
- in_buffers=[data_buf],
- out_buffers=out_buf,
- name="argsort_cpu",
- tag="argsort_cpu")
+ te.extern(data.shape,
+ [data],
+ lambda ins, outs: tvm.tir.call_packed(
+ "tvm.contrib.sort.argsort", ins[0],
+ outs[0], axis, is_ascend),
+ dtype=dtype,
+ in_buffers=[data_buf],
+ out_buffers=out_buf,
+ name="argsort_cpu",
+ tag="argsort_cpu")
return out
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
The input tensor.
k : int, optional
Returns
-------
- out : tvm.Tensor or List[tvm.Tensor]
+ out : tvm.te.Tensor or List[tvm.te.Tensor]
The computed result.
"""
assert ret_type in ["both", "values", "indices"]
- data_buf = api.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8)
+ data_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8)
out_shape = list(get_const_tuple(data.shape))
if k >= 1:
out_shape[axis] = k
out_bufs = []
if ret_type in ["both", "values"]:
- out_bufs.append(api.decl_buffer(out_shape, data.dtype, "value_buf", data_alignment=8))
+ out_bufs.append(tvm.tir.decl_buffer(out_shape, data.dtype, "value_buf", data_alignment=8))
if ret_type in ["both", "indices"]:
- out_bufs.append(api.decl_buffer(out_shape, dtype, "indices_buf", data_alignment=8))
+ out_bufs.append(tvm.tir.decl_buffer(out_shape, dtype, "indices_buf", data_alignment=8))
out_shapes = [out_shape] * len(out_bufs)
- out = tvm.extern(out_shapes,
- [data],
- lambda ins, outs: tvm.call_packed(
- "tvm.contrib.sort.topk", ins[0], *outs, k, axis, ret_type, is_ascend),
- in_buffers=[data_buf],
- out_buffers=out_bufs,
- name="topk_cpu",
- tag="topk_cpu")
+ out = te.extern(out_shapes,
+ [data],
+ lambda ins, outs: tvm.tir.call_packed(
+ "tvm.contrib.sort.topk", ins[0], *outs, k, axis, ret_type, is_ascend),
+ in_buffers=[data_buf],
+ out_buffers=out_bufs,
+ name="topk_cpu",
+ tag="topk_cpu")
return out
"""TVM operator compute SpMM in CSR format."""
from __future__ import absolute_import
import tvm
+from tvm import te
from .. import tag
from ..util import simplify
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
1-D with shape [nonzeros]
- indices : tvm.Tensor
+ indices : tvm.te.Tensor
1-D with shape [nonzeros]
- indptr : tvm.Tensor
+ indptr : tvm.te.Tensor
1-D with shape [m+1]
- weight : tvm.Tensor
+ weight : tvm.te.Tensor
2-D with shape [k, n]
- bias : tvm.Tensor, optional
+ bias : tvm.te.Tensor, optional
1-D with shape [m]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D with shape [m, n]
"""
assert len(data.shape) == 1 and len(indices.shape) == 1 and len(indptr.shape) == 1 \
and len(weight.shape) == 2, "only support 2-dim csrmm"
- assert isinstance(weight, tvm.tensor.Tensor), \
- "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight))
+ assert isinstance(weight, te.tensor.Tensor), \
+ "weight matrix is assumed to be tvm.te.Tensor, but weight is `%s`" % (type(weight))
if bias is not None:
assert len(bias.shape) == 1
M = simplify(indptr.shape[0]-1)
_, N = weight.shape
def csrmm_default_ir(data, indices, indptr, weight, out):
"""define ir for csrmm"""
- irb = tvm.ir_builder.create()
+ irb = tvm.tir.ir_builder.create()
data_ptr = irb.buffer_ptr(data)
indices_ptr = irb.buffer_ptr(indices)
indptr_ptr = irb.buffer_ptr(indptr)
out_ptr[row*N+n] += dot[0]
return irb.get()
oshape = (M, N)
- matmul = tvm.extern(oshape, [data, indices, indptr, weight],
- lambda ins, outs: csrmm_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
- tag="csrmm", dtype='float32', name='out')
+ matmul = te.extern(oshape, [data, indices, indptr, weight],
+ lambda ins, outs: csrmm_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
+ tag="csrmm", dtype='float32', name='out')
if bias is not None:
- matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[i], \
- tag=tag.BROADCAST)
+ matmul = te.compute(oshape, lambda i, j: matmul[i, j] + bias[i], \
+ tag=tag.BROADCAST)
return matmul
a : tvm.contrib.sparse.CSRNDArray
2-D sparse matrix with shape [m, k]
- b : tvm.Tensor
+ b : tvm.te.Tensor
2-D dense matrix with shape [k, n]
- c : tvm.Tensor, optional
+ c : tvm.te.Tensor, optional
1-D dense vector with shape [n]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D with shape [m, n]
"""
return csrmm_default(a.data, a.indices, a.indptr, b, c)
"""TVM operator compute SpMV in CSR format."""
from __future__ import absolute_import
import tvm
+from tvm import te
from .. import tag
def csrmv_default(data, indices, indptr, weight, bias=None):
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
1-D with shape [nonzeros]
- indices : tvm.Tensor
+ indices : tvm.te.Tensor
1-D with shape [nonzeros]
- indptr : tvm.Tensor
+ indptr : tvm.te.Tensor
1-D with shape [m+1]
- weight : tvm.Tensor
+ weight : tvm.te.Tensor
2-D with shape [k, 1]
- bias : tvm.Tensor, optional
+ bias : tvm.te.Tensor, optional
1-D with shape [1]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D with shape [m, 1]
"""
assert len(data.shape) == 1 and len(weight.shape) == 2, \
"only support 2-dim csrmv"
- assert isinstance(weight, tvm.tensor.Tensor), \
- "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight))
+ assert isinstance(weight, te.tensor.Tensor), \
+ "weight matrix is assumed to be tvm.te.Tensor, but weight is `%s`" % (type(weight))
if bias is not None:
assert len(bias.shape) == 1
batch = indptr.shape[0]-1
def csrmv_default_ir(data, indices, indptr, weight, out):
"""define ir for csrmv"""
- irb = tvm.ir_builder.create()
+ irb = tvm.tir.ir_builder.create()
data_ptr = irb.buffer_ptr(data)
indices_ptr = irb.buffer_ptr(indices)
indptr_ptr = irb.buffer_ptr(indptr)
out_ptr[row] += dot[0]
return irb.get()
oshape = (batch, 1)
- matmul = tvm.extern(oshape, [data, indices, indptr, weight],
- lambda ins, outs: csrmv_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
- tag="csrmv", dtype='float32', name='csrmv')
+ matmul = te.extern(oshape, [data, indices, indptr, weight],
+ lambda ins, outs: csrmv_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
+ tag="csrmv", dtype='float32', name='csrmv')
if bias is not None:
- matmul = tvm.compute((batch, 1), lambda i, j: matmul[i, 0] + bias[i], \
- tag=tag.BROADCAST)
+ matmul = te.compute((batch, 1), lambda i, j: matmul[i, 0] + bias[i], \
+ tag=tag.BROADCAST)
return matmul
a : tvm.contrib.sparse.CSRNDArray
2-D sparse matrix with shape [m, k]
- x : tvm.Tensor
+ x : tvm.te.Tensor
2-D dense matrix with shape [k, 1]
- y : tvm.Tensor, optional
+ y : tvm.te.Tensor, optional
1-D dense vector with shape [1]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D dense matrix with shape [m, 1]
"""
return csrmv_default(a.data, a.indices, a.indptr, x, y)
"""TVM operator compute Dense in CSR format."""
from __future__ import absolute_import
import tvm
+from tvm import te
from .. import tag
from ..util import simplify
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
1-D with shape [num_nonzeros]
- indices : tvm.Tensor
+ indices : tvm.te.Tensor
1-D with shape [num_nonzeros]
- indptr : tvm.Tensor
+ indptr : tvm.te.Tensor
1-D with shape [m+1]
- weight : tvm.Tensor
+ weight : tvm.te.Tensor
2-D with shape [k, n]
- bias : tvm.Tensor, optional
+ bias : tvm.te.Tensor, optional
1-D with shape [m]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D with shape [m, n]
"""
assert len(data.shape) == 1 and len(indices.shape) == 1 and len(indptr.shape) == 1 \
and len(weight.shape) == 2, "only support 2-dim dense"
- assert isinstance(weight, tvm.tensor.Tensor), \
- "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight))
+ assert isinstance(weight, te.tensor.Tensor), \
+ "weight matrix is assumed to be tvm.te.Tensor, but weight is `%s`" % (type(weight))
if bias is not None:
assert len(bias.shape) == 1
dtype = data.dtype
def dense_default_ir(data, indices, indptr, weight, out):
"""Define IR for Dense"""
dtype = data.dtype
- irb = tvm.ir_builder.create()
+ irb = tvm.tir.ir_builder.create()
data_ptr = irb.buffer_ptr(data)
indices_ptr = irb.buffer_ptr(indices)
indptr_ptr = irb.buffer_ptr(indptr)
with irb.for_range(0, N, for_type="vectorize", name='n') as n:
with irb.for_range(0, M, for_type="parallel", name='m') as m:
dot = irb.allocate(dtype, (1,), name='dot', scope='local')
- out_ptr[m*N+n] = tvm.const(0, dtype)
- dot[0] = tvm.const(0, dtype)
+ out_ptr[m*N+n] = tvm.tir.const(0, dtype)
+ dot[0] = tvm.tir.const(0, dtype)
row_start = indptr_ptr[m]
row_elems = indptr_ptr[m+1]-row_start
with irb.for_range(0, row_elems, name='k') as k:
out_ptr[m*N+n] += dot[0]
return irb.get()
oshape = (M, N)
- matmul = tvm.extern(oshape, [data, indices, indptr, weight],
- lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
- tag="dense", dtype=dtype, name='out')
+ matmul = te.extern(oshape, [data, indices, indptr, weight],
+ lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
+ tag="dense", dtype=dtype, name='out')
if bias is not None:
- matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[j], \
- tag=tag.BROADCAST)
+ matmul = te.compute(oshape, lambda i, j: matmul[i, j] + bias[j], \
+ tag=tag.BROADCAST)
return matmul
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
2-D with shape [m, k]
- w_data : tvm.Tensor
+ w_data : tvm.te.Tensor
1-D with shape [nonzeros]
- w_indices : tvm.Tensor
+ w_indices : tvm.te.Tensor
1-D with shape [nonzeros]
- w_indptr : tvm.Tensor
+ w_indptr : tvm.te.Tensor
1-D with shape [n+1]
- bias : tvm.Tensor, optional
+ bias : tvm.te.Tensor, optional
1-D with shape [n]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D with shape [m, n]
"""
assert len(w_data.shape) == 1 and len(w_indices.shape) == 1 and len(w_indptr.shape) == 1 \
and len(data.shape) == 2, "only support 2-dim dense"
- assert isinstance(data, tvm.tensor.Tensor), \
- "data matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(data))
+ assert isinstance(data, te.tensor.Tensor), \
+ "data matrix is assumed to be tvm.te.Tensor, but weight is `%s`" % (type(data))
if bias is not None:
assert len(bias.shape) == 1
dtype = data.dtype
def dense_default_ir(data, w_data, w_indices, w_indptr, out):
"""Define IR for Dense"""
dtype = data.dtype
- irb = tvm.ir_builder.create()
+ irb = tvm.tir.ir_builder.create()
data_ptr = irb.buffer_ptr(data)
w_data_ptr = irb.buffer_ptr(w_data)
w_indices_ptr = irb.buffer_ptr(w_indices)
with irb.for_range(0, M, for_type="vectorize", name='m') as m:
with irb.for_range(0, N, for_type="parallel", name='n') as n:
dot = irb.allocate(dtype, (1,), name='dot', scope='local')
- out_ptr[m*N+n] = tvm.const(0, dtype)
- dot[0] = tvm.const(0, dtype)
+ out_ptr[m*N+n] = tvm.tir.const(0, dtype)
+ dot[0] = tvm.tir.const(0, dtype)
row_start = w_indptr_ptr[n]
row_elems = w_indptr_ptr[n+1]-row_start
with irb.for_range(0, row_elems, name='k') as k:
out_ptr[m*N+n] += dot[0]
return irb.get()
oshape = (M, N)
- matmul = tvm.extern(oshape, [data, w_data, w_indices, w_indptr],
- lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
- tag="dense", dtype=dtype, name='out')
+ matmul = te.extern(oshape, [data, w_data, w_indices, w_indptr],
+ lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
+ tag="dense", dtype=dtype, name='out')
if bias is not None:
- matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[j], \
- tag=tag.BROADCAST)
+ matmul = te.compute(oshape, lambda i, j: matmul[i, j] + bias[j], \
+ tag=tag.BROADCAST)
return matmul
Parameters
----------
- data : tvm.contrib.sparse.CSRNDArray or tvm.tensor.Tensor
+ data : tvm.contrib.sparse.CSRNDArray or te.tensor.Tensor
2-D with shape [batch, in_dim]
- weight : tvm.tensor.Tensor or tvm.contrib.sparse.CSRNDArray
+ weight : te.tensor.Tensor or tvm.contrib.sparse.CSRNDArray
2-D with shape [out_dim, in_dim]
- bias : tvm.tensor.Tensor, optional
+ bias : te.tensor.Tensor, optional
1-D with shape [out_dim]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D with shape [batch, out_dim]
"""
ret = None
if isinstance(data, tvm.contrib.sparse.CSRPlaceholderOp) and \
- isinstance(weight, tvm.tensor.Tensor):
+ isinstance(weight, te.tensor.Tensor):
ret = dense_si(data.data, data.indices, data.indptr, weight, bias)
- elif isinstance(data, tvm.tensor.Tensor) and \
- isinstance(weight, tvm.contrib.sparse.CSRPlaceholderOp):
+ elif isinstance(data, te.tensor.Tensor) and \
+ isinstance(weight, tvm.contrib.sparse.CSRPlaceholderOp):
ret = dense_sw(data, weight.data, weight.indices, weight.indptr, bias)
else:
raise NotImplementedError("implementation for %s as data and %s as weights, "
Parameters
----------
- xs : list of tvm.Tensor
+ xs : list of tvm.te.Tensor
Input arguments.
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
return cpp.elemwise_sum(xs)
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
return cpp.full(shape, dtype, fill_value)
Parameters
----------
- x : tvm.Tensor
+ x : tvm.te.Tensor
Input argument.
fill_value : float
Value to be filled
Returns
-------
- y : tvm.Tensor
+ y : tvm.te.Tensor
The result.
"""
return cpp.full_like(x, fill_value)
bpad_left = filter_w - 1 - fpad_left
bpad_right = filter_w - 1 - fpad_right
padded_a_np = np.zeros((batch, in_c, dilated_a_np.shape[2]+bpad_top+bpad_bottom, \
- dilated_a_np.shape[3]+bpad_left+bpad_right))
+ dilated_a_np.shape[3]+bpad_left+bpad_right))
padded_a_np[:, :, bpad_top:dilated_a_np.shape[2]+bpad_top, \
- bpad_left:dilated_a_np.shape[3]+bpad_left] = dilated_a_np
+ bpad_left:dilated_a_np.shape[3]+bpad_left] = dilated_a_np
# convolution stage
out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h
out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w
if pad_d > 0 or pad_h > 0 or pad_w > 0:
apad = np.zeros((in_depth + pad_d, in_height + pad_h, in_width + pad_w))
apad[pad_front:pad_front + in_depth, pad_top:pad_top + in_height,\
- pad_left:pad_left + in_width] = a_np[n, c]
+ pad_left:pad_left + in_width] = a_np[n, c]
else:
apad = a_np[n, c]
out = scipy.signal.convolve(
if pad_d > 0 or pad_h > 0 or pad_w > 0:
apad = np.zeros((in_depth + pad_d, in_height + pad_h, in_width + pad_w))
apad[pad_front:pad_front + in_depth, pad_top:pad_top + in_height,\
- pad_left:pad_left + in_width] = at[n, c]
+ pad_left:pad_left + in_width] = at[n, c]
else:
apad = at[n, c]
out = scipy.signal.convolve(
for i in range(batch):
for j in range(out_channel):
output_np[i, j, :, :] = signal.convolve2d(input_np[i, j//channel_multiplier, :, :], \
- np.rot90(filter_np[j//channel_multiplier, j%channel_multiplier, :, :], 2), \
- mode='valid')[0:(in_height - filter_height + 1):stride_h, 0:(in_width - filter_height + 1):stride_w]
+ np.rot90(filter_np[j//channel_multiplier, j%channel_multiplier, :, :], 2), \
+ mode='valid')[0:(in_height - filter_height + 1):stride_h, 0:(in_width - filter_height + 1):stride_w]
if padding == 'SAME':
out_channel = in_channel * channel_multiplier
out_height = np.int(np.ceil(float(in_height) / float(stride_h)))
for i in range(batch):
for j in range(out_channel):
output_np[i, j, :, :] = signal.convolve2d(input_np[i, j//channel_multiplier, :, :], \
- np.rot90(filter_np[j//channel_multiplier, j%channel_multiplier, :, :], 2), \
- mode='same')[index_h:in_height:stride_h, index_w:in_width:stride_w]
+ np.rot90(filter_np[j//channel_multiplier, j%channel_multiplier, :, :], 2), \
+ mode='same')[index_h:in_height:stride_h, index_w:in_width:stride_w]
return output_np
for i in range(batch):
for j in range(out_channel):
output_np[i, :, :, j] = signal.convolve2d(input_np[i, :, :, j//channel_multiplier], \
- np.rot90(filter_np[:, :, j//channel_multiplier, j%channel_multiplier], 2), \
- mode='valid')[0:(in_height - filter_height + 1):stride_h, 0:(in_width - filter_height + 1):stride_w]
+ np.rot90(filter_np[:, :, j//channel_multiplier, j%channel_multiplier], 2), \
+ mode='valid')[0:(in_height - filter_height + 1):stride_h, 0:(in_width - filter_height + 1):stride_w]
if padding == 'SAME':
out_channel = in_channel * channel_multiplier
out_height = np.int(np.ceil(float(in_height) / float(stride_h)))
for i in range(batch):
for j in range(out_channel):
output_np[i, :, :, j] = signal.convolve2d(input_np[i, :, :, j//channel_multiplier], \
- np.rot90(filter_np[:, :, j//channel_multiplier, j%channel_multiplier], 2), \
- mode='same')[index_h:in_height:stride_h, index_w:in_width:stride_w]
+ np.rot90(filter_np[:, :, j//channel_multiplier, j%channel_multiplier], 2), \
+ mode='same')[index_h:in_height:stride_h, index_w:in_width:stride_w]
return output_np
assert out_shape[3] == int(math.floor(float(in_shape[3] - k_h + pt + pb) / s_h) + 1)
assert out_shape[4] == int(math.floor(float(in_shape[4] - k_w + pl + pr) / s_w) + 1)
- fill_value = tvm.const(0.0, dtype).value
+ fill_value = tvm.tir.const(0.0, dtype).value
if not(count_include_pad) and pool_type == 'max':
- fill_value = tvm.min_value(dtype).value
+ fill_value = tvm.te.min_value(dtype).value
pad_np = np.full(shape=(in_n, in_c,
in_d + pf + pk,
# take the first element, as they are the same across batch and channel
pad_count = pad_count.ravel()[0]
pad_pool_grad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw] += \
- out_grad_np[:, :, i, j].reshape(n, ic, 1, 1) / np.maximum(pad_count, 1)
+ out_grad_np[:, :, i, j].reshape(n, ic, 1, 1) / np.maximum(pad_count, 1)
elif pool_type == 'max':
for i in range(oh):
for j in range(ow):
lx = x - x_low
return (1 - ly) * (1 - lx) * a_np[b, c, y_low, x_low] + \
(1 - ly) * lx * a_np[b, c, y_low, x_high] + \
- ly * (1 - lx) * a_np[b, c, y_high, x_low] + \
- ly * lx * a_np[b, c, y_high, x_high]
+ ly * (1 - lx) * a_np[b, c, y_high, x_low] + \
+ ly * lx * a_np[b, c, y_high, x_high]
for i in range(num_roi):
roi = rois_np[i]
"""Injective transformation operators"""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
import topi
from . import cpp
from . import tag
Parameters
----------
- a : tvm.Tensor
+ a : tvm.te.Tensor
The tensor to be expanded.
num_newaxis: int, optional
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.expand_dims(a, axis, num_newaxis)
Parameters
----------
- a : tvm.Tensor
+ a : tvm.te.Tensor
The tensor to be expanded.
- shape_like : tvm.Tensor
+ shape_like : tvm.te.Tensor
The tensor to with target shape.
axis: list of int
axis to be expanded on
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
odim = len(axis) + len(a.shape)
if odim != len(shape_like.shape):
if len(a.shape) == 1 and len(axis) == len(shape_like.shape):
# A special case: `a` is a scalar represented as a 1-dim tensor
- return tvm.compute(shape_like.shape, lambda *idxs: a(0))
+ return te.compute(shape_like.shape, lambda *idxs: a(0))
raise ValueError("shape inconsistent when expand_like ({}, {}, {})".format(
len(axis), len(a.shape), len(shape_like.shape)))
indices.append(idxs[i])
axis_index += 1
return a(*indices)
- return tvm.compute(shape_like.shape, _compute)
+ return te.compute(shape_like.shape, _compute)
def transpose(a, axes=None):
Parameters
----------
- a : tvm.Tensor
+ a : tvm.te.Tensor
The tensor to be expanded.
axes: tuple of ints, optional
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.transpose(a, axes)
Parameters
----------
- a : tvm.Tensor
+ a : tvm.te.Tensor
The tensor to be expanded.
axis : int, optional
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.flip(a, axis)
Parameters
----------
- a : tvm.Tensor
+ a : tvm.te.Tensor
The tensor to be sliced.
begin: list of int
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
if strides is None:
strides = []
return cpp.strided_slice(a, begin, end, strides)
-@tvm.tag_scope(tag=tag.INJECTIVE+",strided_set")
+@tvm.te.tag_scope(tag=tag.INJECTIVE+",strided_set")
def strided_set(a, v, begin, end, strides=None):
"""Set slice of an array.
Parameters
----------
- a : tvm.Tensor
+ a : tvm.te.Tensor
The tensor to be sliced.
- v : tvm.Tensor
+ v : tvm.te.Tensor
The values to set
- begin: tvm.Tensor
+ begin: tvm.te.Tensor
The indices to begin with in the slicing.
- end: tvm.Tensor
+ end: tvm.te.Tensor
Indicies indicating end of the slice.
- strides: tvm.Tensor, optional
+ strides: tvm.te.Tensor, optional
Specifies the stride values, it can be negative
in that case, the input tensor will be reversed
in that particular axis.
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
n = len(a.shape)
raise TypeError("strides should be int32")
def _max(a, b):
- return tvm.expr.Select(a > b, a, b)
+ return tvm.tir.Select(a > b, a, b)
if strides is None:
- strides = [tvm.const(1, 'int32')] * n
+ strides = [tvm.tir.const(1, 'int32')] * n
else:
- strides = [tvm.if_then_else(strides.shape[0] > i,
- strides[i],
- tvm.const(1, 'int32'))
+ strides = [tvm.tir.if_then_else(strides.shape[0] > i,
+ strides[i],
+ tvm.tir.const(1, 'int32'))
for i in range(n)]
- begin = [tvm.if_then_else(begin.shape[0] > i,
- begin[i],
- tvm.expr.Select(strides[i] > 0,
- tvm.const(0, 'int32'),
- a.shape[i]))
+ begin = [tvm.tir.if_then_else(begin.shape[0] > i,
+ begin[i],
+ tvm.tir.Select(strides[i] > 0,
+ tvm.tir.const(0, 'int32'),
+ a.shape[i]))
for i in range(n)]
- end = [tvm.if_then_else(end.shape[0] > i,
- end[i],
- tvm.expr.Select(strides[i] > 0,
- a.shape[i] + 1,
- -(a.shape[i] + 1)))
+ end = [tvm.tir.if_then_else(end.shape[0] > i,
+ end[i],
+ tvm.tir.Select(strides[i] > 0,
+ a.shape[i] + 1,
+ -(a.shape[i] + 1)))
for i in range(n)]
# Convert negative indexes
for i in range(n):
- begin[i] = tvm.if_then_else(begin[i] < 0,
- begin[i] + a.shape[i],
- begin[i])
- end[i] = tvm.if_then_else(end[i] < 0,
- end[i] + a.shape[i],
- end[i])
+ begin[i] = tvm.tir.if_then_else(begin[i] < 0,
+ begin[i] + a.shape[i],
+ begin[i])
+ end[i] = tvm.tir.if_then_else(end[i] < 0,
+ end[i] + a.shape[i],
+ end[i])
def _select(*indices):
from_val = []
from_val.append(within_index(begin[i], end[i], strides[i], indices[i]))
index_tuple.append(
make_idx(begin[i], end[i], strides[i], a.shape[i], indices[i]))
- return tvm.if_then_else(tvm.all(*from_val), v(*index_tuple), a(*indices))
+ return tvm.tir.if_then_else(tvm.tir.all(*from_val), v(*index_tuple), a(*indices))
- return tvm.compute(a.shape, _select, name="strided_set")
+ return te.compute(a.shape, _select, name="strided_set")
def reshape(a, newshape):
Parameters
----------
- a : tvm.Tensor
+ a : tvm.te.Tensor
The tensor to be reshaped
newshape : tuple of ints
The new shape
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.reshape(a, newshape)
Parameters
----------
- a : tvm.Tensor
+ a : tvm.te.Tensor
axis : None or int or tuple of ints, optional
Selects a subset of the single-dimensional entries in the shape.
Returns
-------
- squeezed : tvm.Tensor
+ squeezed : tvm.te.Tensor
"""
return cpp.squeeze(a, axis)
Parameters
----------
- a_tuple : tuple of tvm.Tensor
+ a_tuple : tuple of tvm.te.Tensor
The arrays to concatenate
axis : int, optional
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.concatenate(a_tuple, axis)
Parameters
----------
- a : tvm.Tensor
+ a : tvm.te.Tensor
The tensor to be stacked.
axis : int, optional
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.stack(a, axis)
Parameters
----------
- ary : tvm.Tensor
+ ary : tvm.te.Tensor
indices_or_sections : int or 1-D array
Returns
-------
- ret : tuple of tvm.Tensor
+ ret : tuple of tvm.te.Tensor
"""
return cpp.split(ary, indices_or_sections, axis)
Parameters
----------
- a : tvm.Tensor
+ a : tvm.te.Tensor
The source array.
- indices : tvm.Tensor
+ indices : tvm.te.Tensor
The indices of the values to extract.
axis : int, optional
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
if axis is None:
return cpp.take(a, indices, mode)
Parameters
----------
- a : tvm.Tensor
+ a : tvm.te.Tensor
The source array.
- indices : tvm.Tensor
+ indices : tvm.te.Tensor
The indices of the values to extract.
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.gather_nd(a, indices)
Returns
-------
- result : tvm.Tensor
+ result : tvm.te.Tensor
The resulting tensor.
"""
if stop is None:
Parameters
----------
- a : tvm.Tensor
+ a : tvm.te.Tensor
The tensor to be repeated.
repeats: int, required
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.repeat(a, repeats, axis)
Parameters
----------
- a : tvm.Tensor
+ a : tvm.te.Tensor
The tensor to be tiled.
reps: tuple of ints, required
Returns
-------
- ret : tvm.Tensor
+ ret : tvm.te.Tensor
"""
return cpp.tile(a, reps)
Parameters
----------
- array : tvm.Tensor
+ array : tvm.te.Tensor
The source array.
src_layout : str
Parameters
----------
- array : tvm.Tensor
+ array : tvm.te.Tensor
The source tensor.
dtype : str, optional
Returns
-------
- result : tvm.Tensor
+ result : tvm.te.Tensor
The resulting tensor.
"""
return cpp.shape(array, dtype)
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
N-D with shape [MAX_LENGTH, batch_size, ...] or [batch_size, MAX_LENGTH, ...]
depending on the value of `axis`.
- valid_length : tvm.Tensor
+ valid_length : tvm.te.Tensor
1-D with shape [batch_size,]
mask_value : float, optional
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
N-D with shape [MAX_LENGTH, batch_size, ...] or [batch_size, MAX_LENGTH, ...]
depending on the value of `axis`.
"""
Parameters
----------
- array : tvm.Tensor
+ array : tvm.te.Tensor
The source tensor.
dtype : str, optional
Returns
-------
- result : tvm.Tensor
+ result : tvm.te.Tensor
The resulting tensor.
"""
return cpp.ndarray_size(array, dtype)
Parameters
----------
- condition : tvm.Tensor
+ condition : tvm.te.Tensor
The condition array.
- x : tvm.Tensor
+ x : tvm.te.Tensor
First array to be selected.
- y : tvm.Tensor
+ y : tvm.te.Tensor
Second array to be selected.
Returns
-------
- result : tvm.Tensor
+ result : tvm.te.Tensor
A Tensor selected from x or y depending on condition.
"""
return cpp.where(condition, x, y)
Parameters
----------
- indices : tvm.Tensor
+ indices : tvm.te.Tensor
Locations to set to on_value.
- on_value : tvm.Tensor
+ on_value : tvm.te.Tensor
Value to fill at indices.
- off_value : tvm.Tensor
+ off_value : tvm.te.Tensor
Value to fill at all other positions besides indices.
depth : int
from numbers import Integral
import tvm
-from tvm.api import layout, bijective_layout
+from tvm import te
+from tvm.tir import layout, bijective_layout
from . import tag, cpp
class InvalidShapeError(ValueError):
if op not in s.outputs:
s[op].compute_inline()
for tensor in op.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp):
+ if isinstance(tensor.op, tvm.te.ComputeOp):
_traverse(tensor.op)
callback(op)
The result value
"""
if not x:
- return tvm.const(1, "int32")
+ return tvm.tir.const(1, "int32")
res = x[0]
for i in range(1, len(x)):
res = res * x[i]
"""
if isinstance(expr, Integral):
return expr
- if not isinstance(expr, tvm.expr.IntImm):
- expr = tvm.ir_pass.Simplify(expr)
- if not isinstance(expr, tvm.expr.IntImm):
+ if not isinstance(expr, tvm.tir.IntImm):
+ expr = tvm.tir.ir_pass.Simplify(expr)
+ if not isinstance(expr, tvm.tir.IntImm):
raise ValueError("Expect value to be constant int")
return int(expr.value)
"""
if isinstance(expr, float):
return float(expr)
- if not isinstance(expr, tvm.expr.FloatImm):
- expr = tvm.ir_pass.Simplify(expr)
- if not isinstance(expr, tvm.expr.FloatImm):
+ if not isinstance(expr, tvm.tir.FloatImm):
+ expr = tvm.tir.ir_pass.Simplify(expr)
+ if not isinstance(expr, tvm.tir.FloatImm):
raise ValueError("Expect value to be constant float")
return float(expr.value)
"""
if isinstance(expr, Integral):
return expr == value
- if not isinstance(expr, tvm.expr.IntImm):
- expr = tvm.ir_pass.Simplify(expr)
- if not isinstance(expr, tvm.expr.IntImm):
+ if not isinstance(expr, tvm.tir.IntImm):
+ expr = tvm.tir.ir_pass.Simplify(expr)
+ if not isinstance(expr, tvm.tir.IntImm):
return False
return expr.value == value
"""
ret = []
for elem in in_tuple:
- if isinstance(elem, tvm.expr.Var):
+ if isinstance(elem, tvm.tir.Var):
ret.append(elem)
- elif not isinstance(elem, (tvm.expr.IntImm, int)):
- elem = tvm.ir_pass.Simplify(elem)
- if not isinstance(elem, tvm.expr.IntImm):
+ elif not isinstance(elem, (tvm.tir.IntImm, int)):
+ elem = tvm.tir.ir_pass.Simplify(elem)
+ if not isinstance(elem, tvm.tir.IntImm):
ret.append(elem)
else:
ret.append(get_const_int(elem))
out : Expr or int
The simplified output
"""
- return tvm.ir_pass.Simplify(expr) if isinstance(expr, tvm.expr.PrimExpr) else expr
+ return tvm.tir.ir_pass.Simplify(expr) if isinstance(expr, tvm.tir.PrimExpr) else expr
def ravel_index(indices, shape):
Parameters
----------
- indices : tuple of int or tvm.expr.IntImm
+ indices : tuple of int or tvm.tir.IntImm
The input coordinates
shape : tuple of int
Parameters
----------
- idx : int or tvm.expr.IntImm
+ idx : int or tvm.tir.IntImm
The 1D index
shape : tuple of int
Returns
-------
- indices : tuple of int or tvm.expr.IntImm
+ indices : tuple of int or tvm.tir.IntImm
Corresponding coordinate of the 1D index
"""
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
indices = []
for i in range(len(shape) - 1, -1, -1):
indices.append(idxm(idx, shape[i]))
"""
row, col = matrix.shape
dtype = str(matrix.dtype)
- idxm = tvm.indexmod
+ idxm = tvm.tir.indexmod
def select_array(i, j):
- now = tvm.const(0.0, dtype)
+ now = tvm.tir.const(0.0, dtype)
for ii in range(row):
for jj in range(col):
- now = tvm.expr.Select(tvm.all(idxm(i, row) == ii, idxm(j, col) == jj),
- tvm.const(matrix[ii][jj], dtype),
- now)
+ now = tvm.tir.Select(tvm.tir.all(idxm(i, row) == ii, idxm(j, col) == jj),
+ tvm.tir.const(matrix[ii][jj], dtype),
+ now)
return now
- return tvm.compute(matrix.shape, select_array, name=name)
+ return te.compute(matrix.shape, select_array, name=name)
def get_max_power2_factor(n, max_value=None):
layout_mapping = bijective_layout(src_layout, dst_layout)
dst_indices = layout_mapping.forward_index(
- tvm.convert(list(range(len(src_layout)))))
+ tvm.runtime.convert(list(range(len(src_layout)))))
return get_const_tuple(tuple([src_shape[i.value] for i in dst_indices]))
bool expression that is True is the array position would be selected
by the index and False otherwise
"""
- bc = tvm.expr.Select(s < 0, i <= e, i < b)
- ec = tvm.expr.Select(s < 0, i > b, i >= e)
- ss = tvm.if_then_else(s < 0,
- ((i - e) + (e % tvm.abs(s)) + 1) % tvm.abs(s),
- (i - b) % s)
- return tvm.expr.Select(tvm.expr.Or(bc, ec), tvm.const(False), ss.equal(0))
+ bc = tvm.tir.Select(s < 0, i <= e, i < b)
+ ec = tvm.tir.Select(s < 0, i > b, i >= e)
+ ss = te.if_then_else(s < 0,
+ ((i - e) + (e % te.abs(s)) + 1) % te.abs(s),
+ (i - b) % s)
+ return tvm.tir.Select(tvm.tir.Or(bc, ec), tvm.tir.const(False), ss.equal(0))
def make_idx(b, e, s, z, i):
postion: Expr
int expression that corresponds to an array position in the selection.
"""
- bc = tvm.expr.Select(s < 0, i <= e, i < b)
- ec = tvm.expr.Select(s < 0, i > b, i >= e)
+ bc = tvm.tir.Select(s < 0, i <= e, i < b)
+ ec = tvm.tir.Select(s < 0, i > b, i >= e)
# Clamp to array size
- b = tvm.expr.Select(z < b, z - 1, b)
+ b = tvm.tir.Select(z < b, z - 1, b)
- ss = tvm.if_then_else(s < 0,
- (b - i) // tvm.abs(s),
- (i - b) // s)
- return tvm.if_then_else(tvm.expr.Or(bc, ec), 88, ss)
+ ss = tvm.tir.if_then_else(s < 0,
+ (b - i) // te.abs(s),
+ (i - b) // s)
+ return tvm.tir.if_then_else(tvm.tir.Or(bc, ec), 88, ss)
def is_empty_shape(shape):
# pylint: disable=import-error, invalid-name, no-member, too-many-locals, too-many-arguments, undefined-variable, too-many-nested-blocks, too-many-branches, too-many-statements, too-many-function-args
"""Non-maximum suppression operator"""
import tvm
+from tvm import te
from tvm import hybrid
from ..sort import argsort
Parameters
----------
- data : tvm.Tensor or numpy NDArray
+ data : tvm.te.Tensor or numpy NDArray
NMS output. 3-D tensor with shape
[batch_size, num_anchors, 6].
- one: tvm.const
+ one: tvm.tir.const
Constant one with the same dtype as data.
Returns
-------
- output : tvm.Tensor or numpy NDArray
+ output : tvm.te.Tensor or numpy NDArray
Transformed NMS output. 3-D tensor with shape
[batch_size, num_anchors, 6].
"""
Parameters
----------
- data : tvm.Tensor or numpy NDArray
+ data : tvm.te.Tensor or numpy NDArray
Input data. 3-D tensor with shape [batch_size, num_anchors, 6]
or [batch_size, num_anchors, 5].
- score_threshold : tvm.const
+ score_threshold : tvm.tir.const
Lower limit of score for valid bounding boxes.
- id_index : tvm.const
+ id_index : tvm.tir.const
index of the class categories, -1 to disable.
- score_index: tvm.const
+ score_index: tvm.tir.const
Index of the scores/confidence of boxes.
- one: tvm.const
+ one: tvm.tir.const
Constant one with the same dtype as data.
Returns
-------
- out_tensor : tvm.Tensor or numpy NDArray
+ out_tensor : tvm.te.Tensor or numpy NDArray
Rearranged data tensor.
- valid_count : tvm.Tensor or numpy NDArray
+ valid_count : tvm.te.Tensor or numpy NDArray
1-D tensor for valid number of boxes.
"""
batch_size = data.shape[0]
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
Input data. 3-D tensor with shape [batch_size, num_anchors, 6]
or [batch_size, num_anchors, 5].
Returns
-------
- out_tensor : tvm.Tensor
+ out_tensor : tvm.te.Tensor
Rearranged data tensor.
- valid_count : tvm.Tensor
+ valid_count : tvm.te.Tensor
1-D tensor for valid number of boxes.
"""
- score_threshold_const = tvm.const(score_threshold, data.dtype)
- id_index_const = tvm.const(id_index, "int32")
- score_index_const = tvm.const(score_index, "int32")
+ score_threshold_const = tvm.tir.const(score_threshold, data.dtype)
+ id_index_const = tvm.tir.const(id_index, "int32")
+ score_index_const = tvm.tir.const(score_index, "int32")
return hybrid_get_valid_counts(data, score_threshold_const,
id_index_const, score_index_const,
- tvm.const(1, data.dtype))
+ tvm.tir.const(1, data.dtype))
@hybrid.script
Parameters
----------
- data: tvm.Tensor or numpy NDArray
+ data: tvm.te.Tensor or numpy NDArray
Bounding boxes with class and score. 3-D tensor with shape
[batch_size, num_anchors, 6].
- sorted_index : tvm.Tensor or numpy NDArray
+ sorted_index : tvm.te.Tensor or numpy NDArray
Bounding box indexes sorted by score, with shape
[batch_size, num_anchors].
- valid_count : tvm.Tensor or numpy NDArray
+ valid_count : tvm.te.Tensor or numpy NDArray
1-D tensor for valid number of boxes.
- max_output_size : tvm.const
+ max_output_size : tvm.tir.const
Max number of output valid boxes for each instance.
By default all valid boxes are returned.
- iou_threshold : tvm.const
+ iou_threshold : tvm.tir.const
Overlapping(IoU) threshold to suppress object with smaller score.
- force_suppress : tvm.const
+ force_suppress : tvm.tir.const
Whether to suppress all detections regardless of class_id.
- top_k : tvm.const
+ top_k : tvm.tir.const
Keep maximum top k detections before nms, -1 for no limit.
- coord_start : tvm.const
+ coord_start : tvm.tir.const
Start index of the consecutive 4 coordinates.
- id_index : tvm.const
+ id_index : tvm.tir.const
index of the class categories, -1 to disable.
- score_index: tvm.const
+ score_index: tvm.tir.const
Index of the scores/confidence of boxes.
- zero: tvm.const
+ zero: tvm.tir.const
Constant zero with the same dtype as data.
- one: tvm.const
+ one: tvm.tir.const
Constant one with the same dtype as data.
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
3-D tensor with shape [batch_size, num_anchors, 6].
- box_indices: tvm.Tensor
+ box_indices: tvm.te.Tensor
2-D tensor with shape [batch_size, num_anchors].
"""
batch_size = data.shape[0]
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
3-D tensor with shape [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5].
- valid_count : tvm.Tensor
+ valid_count : tvm.te.Tensor
1-D tensor for valid number of boxes.
max_output_size : optional, int
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
3-D tensor with shape [batch_size, num_anchors, 6].
Example
# An example to use non_max_suppression
dshape = (1, 5, 6)
- data = tvm.placeholder(dshape, name="data")
- valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count")
+ data = te.placeholder(dshape, name="data")
+ valid_count = te.placeholder((dshape[0],), dtype="int32", name="valid_count")
iou_threshold = 0.7
force_suppress = True
top_k = -1
num_anchors = data.shape[1]
score_axis = score_index
score_shape = (batch_size, num_anchors)
- score_tensor = tvm.compute(score_shape, lambda i, j: data[i, j, score_axis])
+ score_tensor = te.compute(score_shape, lambda i, j: data[i, j, score_axis])
sort_tensor = argsort(score_tensor, valid_count=valid_count, axis=1, is_ascend=False)
out, box_indices = hybrid_nms(data, sort_tensor, valid_count,
- tvm.const(max_output_size, dtype="int32"),
- tvm.const(iou_threshold, dtype=data.dtype),
- tvm.const(force_suppress, dtype="bool"),
- tvm.const(top_k, dtype="int32"),
- tvm.const(coord_start, dtype="int32"),
- tvm.const(id_index, dtype="int32"),
- tvm.const(score_index, dtype="int32"),
- zero=tvm.const(0, dtype=data.dtype),
- one=tvm.const(1, dtype=data.dtype))
+ tvm.tir.const(max_output_size, dtype="int32"),
+ tvm.tir.const(iou_threshold, dtype=data.dtype),
+ tvm.tir.const(force_suppress, dtype="bool"),
+ tvm.tir.const(top_k, dtype="int32"),
+ tvm.tir.const(coord_start, dtype="int32"),
+ tvm.tir.const(id_index, dtype="int32"),
+ tvm.tir.const(score_index, dtype="int32"),
+ zero=tvm.tir.const(0, dtype=data.dtype),
+ one=tvm.tir.const(1, dtype=data.dtype))
if not return_indices and invalid_to_bottom:
- out = hybrid_rearrange_out(out, one=tvm.const(1, dtype=data.dtype))
+ out = hybrid_rearrange_out(out, one=tvm.tir.const(1, dtype=data.dtype))
return box_indices if return_indices else out
"""Proposal operator"""
import math
import tvm
+from tvm import te
from ...util import get_const_tuple, get_const_int
from ...sort import argsort
pred_ctr_x = dx * bbox_w + ctr_x
pred_ctr_y = dy * bbox_h + ctr_y
- pred_w = tvm.exp(dw) * bbox_w
- pred_h = tvm.exp(dh) * bbox_h
+ pred_w = te.exp(dw) * bbox_w
+ pred_h = te.exp(dh) * bbox_h
pred_x1 = pred_ctr_x - 0.5 * (pred_w - 1.0)
pred_y1 = pred_ctr_y - 0.5 * (pred_h - 1.0)
Parameters
----------
- cls_prob_buf : tvm.schedule.Buffer
+ cls_prob_buf : tvm.te.schedule.Buffer
4-D with shape [batch, 2 * num_anchors, height, width]
- bbox_pred_buf : tvm.schedule.Buffer
+ bbox_pred_buf : tvm.te.schedule.Buffer
4-D with shape [batch, 4 * num_anchors, height, width]
- im_info_buf : tvm.schedule.Buffer
+ im_info_buf : tvm.te.schedule.Buffer
2-D with shape [batch, 3]
- out_buf : tvm.schedule.Buffer
+ out_buf : tvm.te.schedule.Buffer
3-D with shape [batch, num_bbox, 5]
The last dimension is in format of [w_start, h_start, w_end, h_end, score]
"""
batch, num_anchors, height, width = get_const_tuple(cls_prob_buf.shape)
num_anchors //= 2
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
p_score = ib.buffer_ptr(cls_prob_buf)
p_delta = ib.buffer_ptr(bbox_pred_buf)
p_im_info = ib.buffer_ptr(im_info_buf)
p_out = ib.buffer_ptr(out_buf)
- idxm = tvm.indexmod
- idxd = tvm.indexdiv
+ idxm = tvm.tir.indexmod
+ idxd = tvm.tir.indexdiv
with ib.for_range(0, batch * height * width) as tid:
w = idxm(tid, width)
regression_func = reg_iou if iou_loss else reg_bbox
pred_x1, pred_y1, pred_x2, pred_y2 = regression_func(x1, y1, x2, y2, *delta)
- pred_x1 = tvm.max(tvm.min(pred_x1, im_width - 1.0), 0.0)
- pred_y1 = tvm.max(tvm.min(pred_y1, im_height - 1.0), 0.0)
- pred_x2 = tvm.max(tvm.min(pred_x2, im_width - 1.0), 0.0)
- pred_y2 = tvm.max(tvm.min(pred_y2, im_height - 1.0), 0.0)
+ pred_x1 = tvm.te.max(tvm.te.min(pred_x1, im_width - 1.0), 0.0)
+ pred_y1 = tvm.te.max(tvm.te.min(pred_y1, im_height - 1.0), 0.0)
+ pred_x2 = tvm.te.max(tvm.te.min(pred_x2, im_width - 1.0), 0.0)
+ pred_y2 = tvm.te.max(tvm.te.min(pred_y2, im_height - 1.0), 0.0)
real_height = (im_height / feature_stride).astype('int32')
real_width = (im_width / feature_stride).astype('int32')
min_size = p_im_info[b * 3 + 2] * rpn_min_size
pred_score = p_score[((b * num_anchors * 2 + num_anchors + k) * height + h) * width + w]
- pred_score = tvm.expr.Select(tvm.any(h >= real_height, w >= real_width),
- -1.0, pred_score)
+ pred_score = tvm.tir.Select(tvm.tir.any(h >= real_height, w >= real_width),
+ -1.0, pred_score)
p_out[out_index * 5 + 0] = pred_x1
p_out[out_index * 5 + 1] = pred_y1
p_out[out_index * 5 + 2] = pred_x2
p_out[out_index * 5 + 3] = pred_y2
p_out[out_index * 5 + 4] = pred_score
- with ib.if_scope(tvm.any(bbox_w < min_size, bbox_h < min_size)):
+ with ib.if_scope(tvm.tir.any(bbox_w < min_size, bbox_h < min_size)):
p_out[out_index * 5 + 0] -= min_size / 2.0
p_out[out_index * 5 + 1] -= min_size / 2.0
p_out[out_index * 5 + 2] += min_size / 2.0
Parameters
----------
- data_buf : tvm.schedule.Buffer
+ data_buf : tvm.te.schedule.Buffer
2-D with shape [batch, num_bbox]
- out_index_buf : tvm.schedule.Buffer
+ out_index_buf : tvm.te.schedule.Buffer
2-D with shape [batch, num_bbox]. Indices of data in sorted order.
Returns
The result IR statement.
"""
batch, num_bbox = get_const_tuple(data_buf.shape)
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
p_data = ib.buffer_ptr(data_buf)
index_out = ib.buffer_ptr(out_index_buf)
temp_data = ib.allocate("float32", (1,), name="temp_data", scope="local")
temp_index = ib.allocate("int32", (1,), name="temp_index", scope="local")
- idxm = tvm.indexmod
+ idxm = tvm.tir.indexmod
with ib.for_range(0, batch, for_type="unroll") as b:
start = b * num_bbox
for i in range(2):
with ib.for_range(0, num_bbox) as k:
with ib.for_range(0, (num_bbox + 1) // 2) as tid:
offset = start + 2 * tid + idxm(k, 2)
- with ib.if_scope(tvm.all(offset + 1 < num_bbox,
- p_data[offset] < p_data[offset + 1])):
+ with ib.if_scope(tvm.tir.all(offset + 1 < num_bbox,
+ p_data[offset] < p_data[offset + 1])):
temp_data[0] = p_data[offset]
p_data[offset] = p_data[offset + 1]
p_data[offset + 1] = temp_data[0]
Parameters
----------
- sorted_bbox_buf : tvm.schedule.Buffer
+ sorted_bbox_buf : tvm.te.schedule.Buffer
3-D with shape [batch, num_bbox, 5]. The last dimension is in format of
[w_start, h_start, w_end, h_end, score].
- out_buf : tvm.schedule.Buffer
+ out_buf : tvm.te.schedule.Buffer
2-D with shape [batch, num_bbox]. Boolean mask of whether a bounding box should be removed.
nms_threshold : float
def calculate_overlap(out_tensor, box_a_idx, box_b_idx):
"""Calculate overlap of two boxes.
"""
- w = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2])
- - tvm.max(out_tensor[box_a_idx], out_tensor[box_b_idx]) + 1.0)
- h = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3])
- - tvm.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]) + 1.0)
+ w = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2])
+ - tvm.te.max(out_tensor[box_a_idx], out_tensor[box_b_idx]) + 1.0)
+ h = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3])
+ - tvm.te.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]) + 1.0)
i = w * h
u = (out_tensor[box_a_idx + 2] - out_tensor[box_a_idx] + 1.0) * \
(out_tensor[box_a_idx + 3] - out_tensor[box_a_idx + 1] + 1.0) + \
return i / u
batch, num_bbox = get_const_tuple(out_buf.shape)
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
p_data = ib.buffer_ptr(sorted_bbox_buf)
p_out = ib.buffer_ptr(out_buf)
with ib.for_range(0, batch, for_type="unroll", name="n") as b:
p_out[base_idx + i] = False
with ib.for_range(0, num_bbox - 1) as l:
with ib.for_range(0, num_bbox) as i:
- with ib.if_scope(tvm.all(i < num_bbox, i > l, p_out[base_idx + l] == False)):
+ with ib.if_scope(tvm.tir.all(i < num_bbox, i > l, p_out[base_idx + l] == False)):
iou = calculate_overlap(p_data, (base_idx + l) * 5, (base_idx + i) * 5)
with ib.if_scope(iou > nms_threshold):
p_out[base_idx + i] = True
Parameters
----------
- sorted_bbox_buf : tvm.schedule.Buffer
+ sorted_bbox_buf : tvm.te.schedule.Buffer
3-D with shape [batch, num_bbox, 5]. The last dimension is in format of
[w_start, h_start, w_end, h_end, score].
- remove_mask_buf : tvm.schedule.Buffer
+ remove_mask_buf : tvm.te.schedule.Buffer
2-D with shape [batch, num_bbox]. Boolean mask of whether a bounding box should be removed.
- out_buf : tvm.schedule.Buffer
+ out_buf : tvm.te.schedule.Buffer
2-D with shape [batch * rpn_post_nms_top_n, 5]. The last dimension is in format of
[batch_index, w_start, h_start, w_end, h_end].
"""
batch, num_bbox, _ = get_const_tuple(sorted_bbox_buf.shape)
rpn_post_nms_top_n = get_const_int(out_buf.shape[0]) // batch
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
i = ib.allocate('int32', (batch,), 'i', scope='local')
p_sorted_bbox = ib.buffer_ptr(sorted_bbox_buf)
p_remove = ib.buffer_ptr(remove_mask_buf)
nkeep[b] += 1
with ib.for_range(0, batch) as b:
with ib.if_scope(nkeep[b] > 0):
- with ib.for_range(0, tvm.ceil(
- tvm.const(rpn_post_nms_top_n, 'float32') / nkeep[b]).astype('int32')):
+ with ib.for_range(0, te.ceil(
+ tvm.tir.const(rpn_post_nms_top_n, 'float32') / nkeep[b]).astype('int32')):
with ib.for_range(0, num_bbox) as j:
offset_j = (b * num_bbox + j) * 5
offset_i = (b * rpn_post_nms_top_n + i[b]) * 5
- with ib.if_scope(tvm.all(i[b] < rpn_post_nms_top_n,
- p_remove[(b*num_bbox+j)] == False)):
- p_out[offset_i] = tvm.expr.Cast('float32', b)
+ with ib.if_scope(tvm.tir.all(i[b] < rpn_post_nms_top_n,
+ p_remove[(b*num_bbox+j)] == False)):
+ p_out[offset_i] = tvm.tir.Cast('float32', b)
with ib.for_range(0, 4, for_type='unroll') as k:
p_out[offset_i + k + 1] = p_sorted_bbox[offset_j + k]
i[b] = i[b] + 1
Parameters
----------
- cls_prob : tvm.Tensor
+ cls_prob : tvm.te.Tensor
4-D with shape [batch, 2 * num_anchors, height, width]
- bbox_pred : tvm.Tensor
+ bbox_pred : tvm.te.Tensor
4-D with shape [batch, 4 * num_anchors, height, width]
- im_info : tvm.Tensor
+ im_info : tvm.te.Tensor
2-D with shape [batch, 3]
scales : list/tuple of float
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
2-D tensor with shape [batch * rpn_post_nms_top_n, 5]. The last dimension is in format of
[batch_index, w_start, h_start, w_end, h_end].
"""
num_bbox = height * width * num_anchors
rpn_pre_nms_top_n = min(rpn_pre_nms_top_n, num_bbox) if rpn_pre_nms_top_n > 0 else num_bbox
- bbox = tvm.extern((batch, num_bbox, 5), [cls_prob, bbox_pred, im_info], lambda ins, outs:
- predict_bbox_ir(ins[0], ins[1], ins[2], outs[0], scales, ratios,
- feature_stride, rpn_min_size, iou_loss),
- dtype=bbox_pred.dtype)
- score = tvm.compute((batch, num_bbox), lambda b, i: bbox[b, i, 4], tag='bbox_score')
+ bbox = te.extern((batch, num_bbox, 5), [cls_prob, bbox_pred, im_info], lambda ins, outs:
+ predict_bbox_ir(ins[0], ins[1], ins[2], outs[0], scales, ratios,
+ feature_stride, rpn_min_size, iou_loss),
+ dtype=bbox_pred.dtype)
+ score = te.compute((batch, num_bbox), lambda b, i: bbox[b, i, 4], tag='bbox_score')
valid_count_shape = (1,)
- valid_count = tvm.compute(valid_count_shape, lambda i: num_bbox)
+ valid_count = te.compute(valid_count_shape, lambda i: num_bbox)
sorted_index = argsort(score, valid_count=valid_count, axis=1, is_ascend=False)
- sorted_bbox = tvm.compute((batch, rpn_pre_nms_top_n, 5),
- lambda b, i, j: bbox[b, sorted_index[b, i], j], tag='sorted_bbox')
- nms_remove_mask = tvm.extern((batch, rpn_pre_nms_top_n), [sorted_bbox],
- lambda ins, outs: nms_ir(ins[0], outs[0], threshold),
- dtype='bool')
- nms_out = tvm.extern((batch * rpn_post_nms_top_n, 5), [sorted_bbox, nms_remove_mask],
- lambda ins, outs: prepare_output_ir(ins[0], ins[1], outs[0]),
- dtype=sorted_bbox.dtype)
+ sorted_bbox = te.compute((batch, rpn_pre_nms_top_n, 5),
+ lambda b, i, j: bbox[b, sorted_index[b, i], j], tag='sorted_bbox')
+ nms_remove_mask = te.extern((batch, rpn_pre_nms_top_n), [sorted_bbox],
+ lambda ins, outs: nms_ir(ins[0], outs[0], threshold),
+ dtype='bool')
+ nms_out = te.extern((batch * rpn_post_nms_top_n, 5), [sorted_bbox, nms_remove_mask],
+ lambda ins, outs: prepare_output_ir(ins[0], ins[1], outs[0]),
+ dtype=sorted_bbox.dtype)
return nms_out
# pylint: disable=invalid-name
"""Roi align operator"""
import tvm
+from tvm import te
from ...util import get_const_tuple
from ...cpp.util import bilinear_sample_nchw
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D with shape [batch, channel, height, width]
- rois : tvm.Tensor
+ rois : tvm.te.Tensor
2-D with shape [num_roi, 5]. The last dimension should be in format of
[batch_index, w_start, h_start, w_end, h_end]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [num_roi, channel, pooled_size, pooled_size]
"""
dtype = rois.dtype
pooled_size_h, pooled_size_w = pooled_size
def _bilinear(i, c, y, x):
- outside = tvm.any(y < -1.0, x < -1.0, y > height, x > width)
- y = tvm.max(y, 0.0)
- x = tvm.max(x, 0.0)
+ outside = tvm.tir.any(y < -1.0, x < -1.0, y > height, x > width)
+ y = tvm.te.max(y, 0.0)
+ x = tvm.te.max(x, 0.0)
val = bilinear_sample_nchw(data, (i, c, y, x), height - 1, width - 1)
- return tvm.if_then_else(outside, 0.0, val)
+ return tvm.tir.if_then_else(outside, 0.0, val)
def _sample(i, c, ph, pw):
roi = rois[i]
roi_end_w *= spatial_scale
# force malformed ROIs to be 1x1
- roi_h = tvm.max(roi_end_h - roi_start_h, tvm.const(1.0, dtype))
- roi_w = tvm.max(roi_end_w - roi_start_w, tvm.const(1.0, dtype))
+ roi_h = tvm.te.max(roi_end_h - roi_start_h, tvm.tir.const(1.0, dtype))
+ roi_w = tvm.te.max(roi_end_w - roi_start_w, tvm.tir.const(1.0, dtype))
bin_h = roi_h / pooled_size_h
bin_w = roi_w / pooled_size_w
if sample_ratio > 0:
- roi_bin_grid_h = roi_bin_grid_w = tvm.const(sample_ratio, 'int32')
+ roi_bin_grid_h = roi_bin_grid_w = tvm.tir.const(sample_ratio, 'int32')
else:
- roi_bin_grid_h = tvm.ceil(roi_h / pooled_size_h).astype('int32')
- roi_bin_grid_w = tvm.ceil(roi_w / pooled_size_w).astype('int32')
+ roi_bin_grid_h = te.ceil(roi_h / pooled_size_h).astype('int32')
+ roi_bin_grid_w = te.ceil(roi_w / pooled_size_w).astype('int32')
count = roi_bin_grid_h * roi_bin_grid_w
- rh = tvm.reduce_axis((0, roi_bin_grid_h))
- rw = tvm.reduce_axis((0, roi_bin_grid_w))
+ rh = te.reduce_axis((0, roi_bin_grid_h))
+ rw = te.reduce_axis((0, roi_bin_grid_w))
roi_start_h += ph * bin_h
roi_start_w += pw * bin_w
- return tvm.sum(_bilinear(batch_index, c,
- roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
- roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w) / count,
- axis=[rh, rw])
+ return te.sum(_bilinear(batch_index, c,
+ roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
+ roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w) / count,
+ axis=[rh, rw])
- return tvm.compute((num_roi, channel, pooled_size_h, pooled_size_w), _sample,
- tag='pool,roi_align_nchw')
+ return te.compute((num_roi, channel, pooled_size_h, pooled_size_w), _sample,
+ tag='pool,roi_align_nchw')
# pylint: disable=invalid-name
"""ROI pool operator"""
import tvm
+from tvm import te
from ...util import get_const_tuple
def roi_pool_nchw(data, rois, pooled_size, spatial_scale):
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D with shape [batch, channel, height, width]
- rois : tvm.Tensor
+ rois : tvm.te.Tensor
2-D with shape [num_roi, 5]. The last dimension should be in format of
[batch_index, w_start, h_start, w_end, h_end]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [num_roi, channel, pooled_size, pooled_size]
"""
dtype = rois.dtype
batch_index = roi[0].astype('int32')
roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[3], roi[4]
- roi_start_h = tvm.round(roi_start_h * spatial_scale).astype('int32')
- roi_start_w = tvm.round(roi_start_w * spatial_scale).astype('int32')
- roi_end_h = tvm.round(roi_end_h * spatial_scale).astype('int32')
- roi_end_w = tvm.round(roi_end_w * spatial_scale).astype('int32')
+ roi_start_h = te.round(roi_start_h * spatial_scale).astype('int32')
+ roi_start_w = te.round(roi_start_w * spatial_scale).astype('int32')
+ roi_end_h = te.round(roi_end_h * spatial_scale).astype('int32')
+ roi_end_w = te.round(roi_end_w * spatial_scale).astype('int32')
# force malformed ROIs to be 1x1
- roi_h = tvm.max(roi_end_h - roi_start_h + 1, tvm.const(1, 'int32'))
- roi_w = tvm.max(roi_end_w - roi_start_w + 1, tvm.const(1, 'int32'))
+ roi_h = tvm.te.max(roi_end_h - roi_start_h + 1, tvm.tir.const(1, 'int32'))
+ roi_w = tvm.te.max(roi_end_w - roi_start_w + 1, tvm.tir.const(1, 'int32'))
bin_h = roi_h.astype(dtype) / pooled_size_h
bin_w = roi_w.astype(dtype) / pooled_size_w
# use epsilon to prevent floating point precision loss in floor/ceil
- epsilon = tvm.const(0.00001, dtype)
- hstart = tvm.floor(ph * bin_h + epsilon).astype('int32')
- wstart = tvm.floor(pw * bin_w + epsilon).astype('int32')
- hend = tvm.ceil((ph + 1) * bin_h - epsilon).astype('int32')
- wend = tvm.ceil((pw + 1) * bin_w - epsilon).astype('int32')
- hstart = tvm.min(tvm.max(hstart + roi_start_h, 0), height)
- wstart = tvm.min(tvm.max(wstart + roi_start_w, 0), width)
- hend = tvm.min(tvm.max(hend + roi_start_h, 0), height)
- wend = tvm.min(tvm.max(wend + roi_start_w, 0), width)
+ epsilon = tvm.tir.const(0.00001, dtype)
+ hstart = te.floor(ph * bin_h + epsilon).astype('int32')
+ wstart = te.floor(pw * bin_w + epsilon).astype('int32')
+ hend = te.ceil((ph + 1) * bin_h - epsilon).astype('int32')
+ wend = te.ceil((pw + 1) * bin_w - epsilon).astype('int32')
+ hstart = tvm.te.min(tvm.te.max(hstart + roi_start_h, 0), height)
+ wstart = tvm.te.min(tvm.te.max(wstart + roi_start_w, 0), width)
+ hend = tvm.te.min(tvm.te.max(hend + roi_start_h, 0), height)
+ wend = tvm.te.min(tvm.te.max(wend + roi_start_w, 0), width)
- non_empty = tvm.all(hstart < hend, wstart < wend)
- min_value = lambda dtype: tvm.if_then_else(non_empty, tvm.min_value(dtype),
- tvm.const(0.0, dtype))
+ non_empty = tvm.tir.all(hstart < hend, wstart < wend)
+ min_value = lambda dtype: tvm.tir.if_then_else(
+ non_empty, tvm.te.min_value(dtype), tvm.tir.const(0.0, dtype))
# pylint: disable=unnecessary-lambda
- _max = tvm.comm_reducer(lambda x, y: tvm.max(x, y), min_value, name='max')
- rh = tvm.reduce_axis((0, hend - hstart), 'rh')
- rw = tvm.reduce_axis((0, wend - wstart), 'rw')
+ _max = te.comm_reducer(lambda x, y: tvm.te.max(x, y), min_value, name='max')
+ rh = te.reduce_axis((0, hend - hstart), 'rh')
+ rw = te.reduce_axis((0, wend - wstart), 'rw')
return _max(data[batch_index, c, hstart+rh, wstart+rw], axis=[rh, rw])
- return tvm.compute((num_roi, channel, pooled_size_h, pooled_size_w), _pool, tag="pool,roi_pool")
+ return te.compute((num_roi, channel, pooled_size_h, pooled_size_w), _pool, tag="pool,roi_pool")
Parameters
----------
- Input : tvm.Tensor
+ Input : tvm.te.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
stride : int
Returns
-------
- Output : tvm.Tensor
+ Output : tvm.te.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
return cpp.vision.reorg(data, stride)
# under the License.
# pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, undefined-variable
"""SSD multibox operators"""
-from __future__ import absolute_import as _abs
import tvm
from tvm import hybrid
-from tvm.intrin import exp, sqrt
+from tvm.tir import exp, sqrt
import topi
Parameters
----------
- data : tvm.Tensor or numpy NDArray
+ data : tvm.te.Tensor or numpy NDArray
4-D tensor with shape [batch, channel, height, width]]
sizes : tvm ConsExpr
Returns
-------
- output : tvm.Tensor or numpy NDArray
+ output : tvm.te.Tensor or numpy NDArray
3-D tensor with shape [1, h_in * w_in * (num_sizes + num_ratios - 1), 4]
"""
in_height = data.shape[2]
* sqrt(ratios[k - num_sizes + 1] * 1.0) / 2.0
h = sizes[0] / sqrt(ratios[k - num_sizes + 1] * 1.0) / 2.0
count = i * in_width * (num_sizes + num_ratios - 1) \
- + j * (num_sizes + num_ratios - 1) + k
+ + j * (num_sizes + num_ratios - 1) + k
output[0, count, 0] = center_w - w
output[0, count, 1] = center_h - h
output[0, count, 2] = center_w + w
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D with shape [batch, c_in, h_in, w_in]]
sizes : tuple of float
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
3-D tensor with shape [1, h_in * w_in * (num_sizes + num_ratios - 1), 4]
"""
- out = hybrid_multibox_prior(data, tvm.convert(sizes), tvm.convert(ratios),
- tvm.convert(steps), tvm.convert(offsets))
+ out = hybrid_multibox_prior(data, tvm.runtime.convert(sizes), tvm.runtime.convert(ratios),
+ tvm.runtime.convert(steps), tvm.runtime.convert(offsets))
if clip:
out = topi.clip(out, 0, 1)
return out
Parameters
----------
- cls_prob : tvm.Tensor or numpy NDArray
+ cls_prob : tvm.te.Tensor or numpy NDArray
3-D tensor of class probabilities.
- loc_pred : tvm.Tensor or numpy NDArray
+ loc_pred : tvm.te.Tensor or numpy NDArray
2-D tensor of location regression predictions.
- anchor : tvm.Tensor or numpy NDArray
+ anchor : tvm.te.Tensor or numpy NDArray
3-D tensor of prior anchor boxes.
- clip : tvm.const
+ clip : tvm.tir.const
Whether to clip out-of-boundary boxes.
- threshold : tvm.const
+ threshold : tvm.tir.const
Threshold to be a positive prediction.
variances : tvm.nd.NDArray
Returns
-------
- out_loc : tvm.Tensor or numpy NDArray
+ out_loc : tvm.te.Tensor or numpy NDArray
3-D tensor of transformed location.
- valid_count : tvm.Tensor or numpy NDArray
+ valid_count : tvm.te.Tensor or numpy NDArray
1_d tensor of valid counts for boxes.
"""
batch_size = cls_prob.shape[0]
Parameters
----------
- cls_prob : tvm.Tensor
+ cls_prob : tvm.te.Tensor
Class probabilities.
- loc_pred : tvm.Tensor
+ loc_pred : tvm.te.Tensor
Location regression predictions.
- anchor : tvm.Tensor
+ anchor : tvm.te.Tensor
Prior anchor boxes.
clip : boolean
Returns
-------
- ret : tuple of tvm.Tensor
+ ret : tuple of tvm.te.Tensor
"""
return hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor,
- tvm.const(clip, "bool"),
- tvm.const(threshold, "float32"),
- tvm.convert(variances))
+ tvm.tir.const(clip, "bool"),
+ tvm.tir.const(threshold, "float32"),
+ tvm.runtime.convert(variances))
def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nms_threshold=0.5,
force_suppress=False, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=-1):
Parameters
----------
- cls_prob : tvm.Tensor
+ cls_prob : tvm.te.Tensor
Class probabilities.
- loc_pred : tvm.Tensor
+ loc_pred : tvm.te.Tensor
Location regression predictions.
- anchor : tvm.Tensor
+ anchor : tvm.te.Tensor
Prior anchor boxes.
clip : boolean
Returns
-------
- out : tvm.Tensor
+ out : tvm.te.Tensor
3-D tensor with shape (batch_size, num_anchors, 6)
"""
inter_out = multibox_transform_loc(cls_prob, loc_pred, anchor,
# under the License.
# pylint: disable=invalid-name,too-many-locals,unused-variable
"""x86 batch_matmul operators"""
-from __future__ import absolute_import as _abs
-import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import SplitEntity
from tvm.contrib import cblas
----------
cfg : ConfigSpace
Autotvm tuning space config file
- x : tvm.Tensor
+ x : tvm.te.Tensor
3-D with shape [batch, M, K]
- y : tvm.Tensor
+ y : tvm.te.Tensor
3-D with shape [batch, N, K]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
3-D with shape [batch, M, N]
"""
assert len(x.shape) == 3 and len(
if cfg.is_fallback:
_default_batch_matmul_config(cfg, M, N, K)
- k = tvm.reduce_axis((0, K), name='k')
- C = tvm.compute(
+ k = te.reduce_axis((0, K), name='k')
+ C = te.compute(
(B, M, N),
- lambda b, i, j: tvm.sum(x[b, i, k] * y[b, j, k], axis=k),
+ lambda b, i, j: te.sum(x[b, i, k] * y[b, j, k], axis=k),
tag='batch_matmul')
return C
sch: Schedule
The computation schedule for the op.
"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if "batch_matmul" in op.tag:
----------
cfg : ConfigSpace
Autotvm tuning space config file
- x : tvm.Tensor
+ x : tvm.te.Tensor
3-D with shape [batch, M, K]
- y : tvm.Tensor
+ y : tvm.te.Tensor
3-D with shape [batch, N, K]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
3-D with shape [batch, M, N]
"""
assert len(x.shape) == 3 and len(
# under the License.
# pylint: disable=invalid-name
"""Schedule for binarization and bit-packing."""
-from __future__ import absolute_import as _abs
-import tvm
+from tvm import te
def schedule_binarize_pack(outs):
s: Schedule
The computation schedule for binarize_pack.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(Out):
s[Out].parallel(Out.op.axis[0])
# under the License.
# pylint: disable=invalid-name, unused-variable, unused-argument
"""Schedule for binary dense operator."""
-from __future__ import absolute_import as _abs
-import tvm
+from tvm import te
from .. import tag
s: Schedule
The computation schedule for binary_dense.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
scheduled_ops = []
def _schedule(A, B, C):
if OP not in s.outputs:
s[OP].compute_inline()
for tensor in OP.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
# schedule binary_dense
elif OP.tag == 'binary_dense':
# pylint: disable=invalid-name,unused-variable,invalid-name
"""Bitserial conv2d schedule on x86"""
import tvm
+from tvm import te
from tvm import autotvm
from .. import tag
from ..util import get_const_int, get_const_tuple
else:
data_pad = data_q
- data_vec = tvm.compute(dvshape, lambda n, h, w, ci, vh, vw, b: \
- data_pad[b][n][ci][h*VH*HSTR+vh][w*VW*WSTR+vw], name='data_vec')
+ data_vec = te.compute(dvshape, lambda n, h, w, ci, vh, vw, b: \
+ data_pad[b][n][ci][h*VH*HSTR+vh][w*VW*WSTR+vw], name='data_vec')
if len(kernel.shape) == 4:
- kernel_vec = tvm.compute(kvshape, lambda co, ci, dh, dw, b, vc: \
- kernel_q[b][co*VC+vc][ci][dh][dw], name='kernel_vec')
+ kernel_vec = te.compute(kvshape, lambda co, ci, dh, dw, b, vc: \
+ kernel_q[b][co*VC+vc][ci][dh][dw], name='kernel_vec')
- ci = tvm.reduce_axis((0, CI), name='ci')
- dh = tvm.reduce_axis((0, KH), name='dh')
- dw = tvm.reduce_axis((0, KW), name='dw')
- b1 = tvm.reduce_axis((0, IB), name='ib')
- b2 = tvm.reduce_axis((0, KB), name='kb')
+ ci = te.reduce_axis((0, CI), name='ci')
+ dh = te.reduce_axis((0, KH), name='dh')
+ dw = te.reduce_axis((0, KW), name='dw')
+ b1 = te.reduce_axis((0, IB), name='ib')
+ b2 = te.reduce_axis((0, KB), name='kb')
def _conv(n, co, h, w, vh, vw, vc):
b1b2 = (b1+b2).astype(out_dtype)
if unipolar:
- return tvm.sum((tvm.popcount(
+ return te.sum((tvm.tir.popcount(
data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1].astype(out_dtype) &
kernel_vec[co, ci, dh, dw, b2, vc].astype(out_dtype)) -
- tvm.popcount(
- data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1].astype(out_dtype)
- & ~kernel_vec[co, ci, dh, dw, b2, vc]).astype(out_dtype)) << b1b2,
- axis=[ci, dh, dw, b1, b2])
+ tvm.tir.popcount(
+ data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1].astype(out_dtype)
+ & ~kernel_vec[co, ci, dh, dw, b2, vc]).astype(out_dtype)) << b1b2,
+ axis=[ci, dh, dw, b1, b2])
- return tvm.sum((tvm.popcount(
+ return te.sum((tvm.tir.popcount(
data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1] &
kernel_vec[co, ci, dh, dw, b2, vc])).astype(out_dtype) << b1b2,
- axis=[ci, dh, dw, b1, b2])
+ axis=[ci, dh, dw, b1, b2])
- conv = tvm.compute(ovshape, _conv, name='conv_out')
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ conv = te.compute(ovshape, _conv, name='conv_out')
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
- return tvm.compute(
+ return te.compute(
oshape, lambda n, co, h, w:
conv[n,
idxd(co, VC), idxd(h, VH), idxd(w, VW),
else:
data_pad = data_q
- data_vec = tvm.compute(dvshape, lambda n, h, w, vh, vw, ci, b: \
- data_pad[n][h*VH*HSTR+vh][w*VW*WSTR+vw][ci][b], name='data_vec')
+ data_vec = te.compute(dvshape, lambda n, h, w, vh, vw, ci, b: \
+ data_pad[n][h*VH*HSTR+vh][w*VW*WSTR+vw][ci][b], name='data_vec')
- kernel_vec = tvm.compute(kvshape, lambda co, dh, dw, ci, vc, b: \
- kernel_q[dh][dw][ci][co*VC+vc][b], name='kernel_vec')
+ kernel_vec = te.compute(kvshape, lambda co, dh, dw, ci, vc, b: \
+ kernel_q[dh][dw][ci][co*VC+vc][b], name='kernel_vec')
- ci = tvm.reduce_axis((0, CI), name='ci')
- dh = tvm.reduce_axis((0, KH), name='dh')
- dw = tvm.reduce_axis((0, KW), name='dw')
- b1 = tvm.reduce_axis((0, IB), name='ib')
- b2 = tvm.reduce_axis((0, KB), name='kb')
+ ci = te.reduce_axis((0, CI), name='ci')
+ dh = te.reduce_axis((0, KH), name='dh')
+ dw = te.reduce_axis((0, KW), name='dw')
+ b1 = te.reduce_axis((0, IB), name='ib')
+ b2 = te.reduce_axis((0, KB), name='kb')
def _conv(n, h, w, co, vh, vw, vc):
b1b2 = (b1+b2).astype(out_dtype)
if unipolar:
- return tvm.sum(
- ((tvm.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1] &
- kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype) -
- tvm.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1]&
- ~kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype)) << b1b2),
+ return te.sum(
+ ((tvm.tir.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1] &
+ kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype) -
+ tvm.tir.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1]&
+ ~kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype)) << b1b2),
axis=[dh, dw, ci, b1, b2])
- return tvm.sum(tvm.popcount(
+ return te.sum(tvm.tir.popcount(
data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1] &
kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype) << b1b2,
- axis=[dh, dw, ci, b1, b2])
+ axis=[dh, dw, ci, b1, b2])
- conv = tvm.compute(ovshape, _conv, name='conv')
+ conv = te.compute(ovshape, _conv, name='conv')
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
- return tvm.compute(
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
+ return te.compute(
oshape, lambda n, h, w, co:
conv[n,
idxd(h, VH), idxd(w, VW), idxd(co, VC),
def _schedule_bitserial_conv2d(cfg, outs):
"""CPU schedule for bitserial convolutions NCHW and NHWC"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
scheduled_ops = []
def traverse(op):
s[op].compute_inline()
for tensor in op.input_tensors:
if tensor.op.input_tensors and (tensor.op not in scheduled_ops):
- if isinstance(tensor.op, tvm.tensor.ComputeOp):
+ if isinstance(tensor.op, tvm.te.ComputeOp):
traverse(tensor.op)
elif 'spatial_bitserial_conv_nchw' in op.tag or 'spatial_bitserial_conv_nhwc' in op.tag:
data_q = data_vec.op.input_tensors[0]
data = data_q.op.input_tensors[0]
data_pad = None
- if isinstance(data_q.op, tvm.tensor.ComputeOp) and "pad" in data_q.op.tag:
+ if isinstance(data_q.op, tvm.te.ComputeOp) and "pad" in data_q.op.tag:
data_pad = data_q
data_q = data
data = data_q.op.input_tensors[0]
VH = cfg["tile_oh"].size[-1]
VW = cfg["tile_ow"].size[-1]
- ##### Schedule Data padding, and bitpacking
+ ##### Schedule Data padding, and bitpacking
if data_pad is not None:
s[data_pad].compute_inline()
"""Schedule for bitserial dense operator."""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from tvm import autotvm
from topi.util import get_const_int, get_const_tuple
from .. import tag
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
2-D with shape [batch, in_dim]
- weight : tvm.Tensor
+ weight : tvm.te.Tensor
2-D with shape [out_dim, in_dim] or
3-D with shape [out_dim, weight_bits, in_dim]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
2-D with shape [batch, out_dim]
"""
data_packed = bitpack(data, data_bits, pack_axis=1, bit_axis=1, pack_type=pack_dtype)
wvshape = (X//VX, WB, VX, K)
oshape = (Y, X)
- k = tvm.reduce_axis((0, K), name='k')
- db = tvm.reduce_axis((0, DB), name='db')
- wb = tvm.reduce_axis((0, WB), name='wb')
+ k = te.reduce_axis((0, K), name='k')
+ db = te.reduce_axis((0, DB), name='db')
+ wb = te.reduce_axis((0, WB), name='wb')
# Tile data and weights
- weight_vec = tvm.compute(wvshape, lambda xo, wb, vx, k:
- weight_packed[xo*VX+vx][wb][k], name='weight_vec')
+ weight_vec = te.compute(wvshape, lambda xo, wb, vx, k:
+ weight_packed[xo*VX+vx][wb][k], name='weight_vec')
- idxdiv = tvm.indexdiv
- idxmod = tvm.indexmod
+ idxdiv = tvm.tir.indexdiv
+ idxmod = tvm.tir.indexmod
- matmul_unipolar = tvm.compute(oshape, lambda i, j: tvm.sum(
- (tvm.popcount(weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k]) -
- tvm.popcount(~weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k])
- ).astype(out_dtype)
+ matmul_unipolar = te.compute(oshape, lambda i, j: te.sum(
+ (tvm.tir.popcount(weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k]) -
+ tvm.tir.popcount(~weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k])
+ ).astype(out_dtype)
<< (db+wb).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense_unipolar')
- matmul = tvm.compute(oshape, lambda i, j: tvm.sum(
- tvm.popcount(weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k]
- ).astype(out_dtype)
+ matmul = te.compute(oshape, lambda i, j: te.sum(
+ tvm.tir.popcount(weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k]
+ ).astype(out_dtype)
<< (db+wb).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense')
# binary ops
s: Schedule
The computation schedule for bitserial_dense.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _schedule(cfg, s, data_vec, weight_vec, output):
s[data_vec].parallel(s[data_vec].op.axis[0])
if op not in s.outputs:
s[op].compute_inline()
for tensor in op.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp):
+ if isinstance(tensor.op, tvm.te.ComputeOp):
traverse(tensor.op)
elif op.tag == 'bitserial_dense' or 'bitserial_dense_unipolar':
# under the License.
# pylint: disable=invalid-name,unused-variable,unused-argument,invalid-name
"""Conv1D schedule on for Intel CPU"""
-from __future__ import absolute_import as _abs
-import tvm
+from tvm import te
from .. import tag
def schedule_conv1d_ncw(outs):
"""Create schedule for tensors"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
output_op = outs[0].op
scheduled_ops = []
s[op].parallel(fused)
s[op].vectorize(w)
for tensor in op.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
if 'conv1d_ncw' in op.tag:
conv = op.output(0)
kernel = op.input_tensors[1]
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, te.tensor.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
data = op.input_tensors[0]
data_pad = None
- if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
+ if isinstance(data.op, te.tensor.ComputeOp) and "pad" in data.op.tag:
data_pad = data
data = data_pad.op.input_tensors[0]
def schedule_conv1d_nwc(outs):
"""Create schedule for tensors"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
output_op = outs[0].op
scheduled_ops = []
s[op].parallel(fused)
s[op].vectorize(c)
for tensor in op.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
if 'conv1d_nwc' in op.tag:
conv = op.output(0)
kernel = op.input_tensors[1]
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, te.tensor.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
data = op.input_tensors[0]
data_pad = None
- if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
+ if isinstance(data.op, te.tensor.ComputeOp) and "pad" in data.op.tag:
data_pad = data
data = data_pad.op.input_tensors[0]
import logging
import tvm
+from tvm import te
from tvm import autotvm
from .. import nn
from ..nn.conv2d import conv2d_infer_layout, _get_workload as _get_conv2d_workload
"""
static_data_shape = []
for dim in get_const_tuple(data.shape):
- if isinstance(dim, tvm.expr.Var):
+ if isinstance(dim, tvm.tir.Var):
static_data_shape.append(1)
else:
static_data_shape.append(dim)
- data = tvm.placeholder(static_data_shape, dtype=data.dtype)
+ data = te.placeholder(static_data_shape, dtype=data.dtype)
if is_depthwise:
wkl = _get_depthwise_conv2d_workload(data, kernel, strides, padding, out_dtype)
from .depthwise_conv2d import _fallback_schedule
_, data, kernel, strides, padding, dilation, layout, _, dtype = workload
batch_size, in_channel, in_height, in_width = data[1]
out_channel, _, k_height, k_width = kernel[1]
- idxdiv = tvm.indexdiv
+ idxdiv = tvm.tir.indexdiv
pt, pl, pb, pr = get_pad_tuple(padding, (k_height, k_width))
out_height = idxdiv(in_height + pt + pb - k_height, strides[0]) + 1
def schedule_conv2d_nhwc(outs):
"""Create schedule for conv2d_nhwc"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
output_op = outs[0].op
def _callback(op):
if 'conv2d_nhwc' in op.tag:
conv = op.output(0)
kernel = op.input_tensors[1]
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
data = op.input_tensors[0]
data_pad = None
- if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
+ if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag:
data_pad = data
data = data_pad.op.input_tensors[0]
ic_chunk = ic // ic_bn
oc_chunk = oc // oc_bn
- data = tvm.compute((n, ic_chunk, ih, iw, ic_bn),
- lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w],
- name="data_vec")
+ data = te.compute((n, ic_chunk, ih, iw, ic_bn),
+ lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w],
+ name="data_vec")
- kernel = tvm.compute(
+ kernel = te.compute(
(oc_chunk, ic_chunk, kh, kw, ic_bn, oc_bn),
lambda occ, icc, k_h, k_w, icb, ocb:
kernel[occ * oc_bn + ocb, icc * ic_bn + icb, k_h, k_w],
# If no config was set, we can fallback to default config.
if cfg.is_fallback:
- _get_default_config(cfg, tvm.placeholder((n, in_channel, ih, iw), dtype=data.dtype),
- tvm.placeholder((num_filter, in_channel, kernel_height, kernel_width),
- dtype=kernel.dtype),
+ _get_default_config(cfg, te.placeholder((n, in_channel, ih, iw), dtype=data.dtype),
+ te.placeholder((num_filter, in_channel, kernel_height, kernel_width),
+ dtype=kernel.dtype),
strides, padding, out_dtype)
# Pack data if raw 4-D data is provided.
@autotvm.register_topi_schedule("conv2d_NCHWc.x86")
def schedule_conv2d_NCHWc(cfg, outs):
"""Create schedule for tensors"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if 'conv2d_NCHWc' in op.tag:
import logging
import tvm
+from tvm import te
from tvm import relay
from tvm import autotvm
from .conv2d import _get_default_config
new_attrs['out_layout'] = 'NCHW%dc' % oc_bn
# Store altered operator's config
- new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn),
- dtype=data_dtype)
- new_kernel = tvm.placeholder((out_channel//oc_bn, in_channel//ic_bn,
- kh, kw, ic_bn, oc_bn), dtype=kernel_tensor.dtype)
+ new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn),
+ dtype=data_dtype)
+ new_kernel = te.placeholder((out_channel//oc_bn, in_channel//ic_bn,
+ kh, kw, ic_bn, oc_bn), dtype=kernel_tensor.dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, strides, padding, dilation, new_attrs["data_layout"],
new_attrs["out_layout"], out_dtype], topi_tmpl)
new_attrs['out_layout'] = 'NCHW%dc' % oc_bn
# Store altered operator's config.
- new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn),
- dtype=data_dtype)
- new_kernel = tvm.placeholder((out_channel // oc_bn,
- in_channel // ic_bn,
- kh,
- kw,
- ic_bn // n_elems,
- oc_bn,
- n_elems), dtype=kernel_dtype)
+ new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn),
+ dtype=data_dtype)
+ new_kernel = te.placeholder((out_channel // oc_bn,
+ in_channel // ic_bn,
+ kh,
+ kw,
+ ic_bn // n_elems,
+ oc_bn,
+ n_elems), dtype=kernel_dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, strides, padding, dilation, new_attrs['data_layout'],
new_attrs['out_layout'], out_dtype], topi_tmpl)
new_attrs['out_layout'] = 'NCHW%dc' % oc_bn
# Store altered operator's config.
- new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn),
- dtype=data_dtype)
- new_kernel = tvm.placeholder((out_channel//oc_bn, 1, kh, kw, 1, oc_bn), dtype=kernel_dtype)
+ new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn),
+ dtype=data_dtype)
+ new_kernel = te.placeholder((out_channel//oc_bn, 1, kh, kw, 1, oc_bn), dtype=kernel_dtype)
new_workload = autotvm.task.args_to_workload(
[new_data, new_kernel, strides, padding, dilation, new_attrs['data_layout'],
new_attrs['out_layout'], out_dtype], topi_tmpl)
"""1x1 Conv2D schedule on for Intel CPU"""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity
_, _, _, _, ic_bn = get_const_tuple(data_vec.shape)
# schedule pad
- if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \
+ if isinstance(s[data_vec].op, tvm.te.ComputeOp) \
and "pad" in data_vec.op.tag:
batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis
parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih)
# this part will be folded during Relay fold_constant pass.
s[data_vec].pragma(s[data_vec].op.axis[0], "debug_skip_region")
s[kernel_vec].pragma(s[kernel_vec].op.axis[0], "debug_skip_region")
- elif isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and \
+ elif isinstance(kernel_vec.op, tvm.te.ComputeOp) and \
kernel_vec.name == 'kernel_vec':
# data and kernel are not pre-computed, schedule layout transform here.
# this should only be used by x86 conv2d_nchw, which is for
# packing the Filter to let memory access be consecutive for AVX512 intrinsic
# Done in pre-compute stage
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
packw_shape = (kernel_h, kernel_w, idxd(num_filter, 16), 16 * idxd(channel, 4), 4)
- PackW = tvm.compute(packw_shape,
- lambda a, b, c, d, e:
- Filter[a, b,
- c*16 + idxm(d, 16),
- idxd(d, 16) * 4 + e],
- name="packed_filter")
-
- rc = tvm.reduce_axis((0, in_channel), name='rc')
- ry = tvm.reduce_axis((0, kernel_h), name='ry')
- rx = tvm.reduce_axis((0, kernel_w), name='rx')
- Output = tvm.compute(
+ PackW = te.compute(packw_shape,
+ lambda a, b, c, d, e:
+ Filter[a, b,
+ c*16 + idxm(d, 16),
+ idxd(d, 16) * 4 + e],
+ name="packed_filter")
+
+ rc = te.reduce_axis((0, in_channel), name='rc')
+ ry = te.reduce_axis((0, kernel_h), name='ry')
+ rx = te.reduce_axis((0, kernel_w), name='rx')
+ Output = te.compute(
(batch, out_height, out_width, out_channel),
- lambda nn, yy, xx, ff: tvm.sum(
+ lambda nn, yy, xx, ff: te.sum(
PaddedInput[nn, yy * stride_h + ry * dilation_h,
xx * stride_w + rx * dilation_w, rc].astype(out_dtype) *
PackW[ry, rx, idxd(ff, 16),
ic_factor, oc_factor = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1]
# schedule data
A = data
- if isinstance(s[A].op, tvm.tensor.ComputeOp):
+ if isinstance(s[A].op, tvm.te.ComputeOp):
batch, ih, iw, ic = s[A].op.axis
d_ic_chunk, d_ic_block = s[A].split(ic, factor=4)
s[A].vectorize(d_ic_block)
# under the License.
# pylint: disable=invalid-name,unused-variable,unused-argument,invalid-name
"""Conv2D schedule on for Intel CPU"""
-from __future__ import absolute_import as _abs
import tvm
from tvm import autotvm
from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity
_, _, _, _, ic_bn = get_const_tuple(data_vec.shape)
# schedule pad
- if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \
+ if isinstance(s[data_vec].op, tvm.te.ComputeOp) \
and "pad" in data_vec.op.tag:
batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis
parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih)
# this part will be folded during Relay fold_constant pass.
s[data_vec].pragma(s[data_vec].op.axis[0], "debug_skip_region")
s[kernel_vec].pragma(s[kernel_vec].op.axis[0], "debug_skip_region")
- elif isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and \
+ elif isinstance(kernel_vec.op, tvm.te.ComputeOp) and \
kernel_vec.name == 'kernel_vec':
# data and kernel are not pre-computed, schedule layout transform here.
# this should only be used by x86 conv2d_nchw, which is for
"""Conv2D int8 schedule on x86"""
import tvm
+from tvm import te
from tvm import autotvm
from ..nn.conv2d import _get_workload as _get_conv2d_workload
from .. import tag
ic_chunk = ic // ic_bn
oc_chunk = oc // oc_bn
- data = tvm.compute((n, ic_chunk, ih, iw, ic_bn),
- lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w],
- name="data_vec")
+ data = te.compute((n, ic_chunk, ih, iw, ic_bn),
+ lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w],
+ name="data_vec")
- kernel = tvm.compute(
+ kernel = te.compute(
(oc_chunk, ic_chunk, kh, kw, ic_bn//n_elems, oc_bn, n_elems),
lambda occ, icc, k_h, k_w, icbc, ocb, icbb:
kernel[occ * oc_bn + ocb,
# If no config was set, we can fallback to default config.
if cfg.is_fallback:
_get_default_config_int8(
- cfg, tvm.placeholder((n, in_channel, ih, iw), dtype=data.dtype),
- tvm.placeholder((num_filter, in_channel, kernel_height, kernel_width),
- dtype=kernel.dtype),
+ cfg, te.placeholder((n, in_channel, ih, iw), dtype=data.dtype),
+ te.placeholder((num_filter, in_channel, kernel_height, kernel_width),
+ dtype=kernel.dtype),
strides, padding, out_dtype)
# Pack data if raw 4-D data is provided.
@autotvm.register_topi_schedule("conv2d_NCHWc_int8.x86")
def schedule_conv2d_NCHWc_int8(cfg, outs):
"""Create schedule for tensors"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
"""Traverse operators from computation graph"""
@autotvm.register_topi_schedule("conv2d_nhwc_pack_int8.x86")
def schedule_conv2d_nhwc_pack_int8(cfg, outs):
"""Create schedule for tensors"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
output_op = outs[0].op
scheduled_ops = []
s[op].parallel(fused)
s[op].vectorize(c)
for tensor in op.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
if 'conv2d_nhwc_pack_int8' in op.tag:
kernel = conv_out.op.input_tensors[1]
data_vec = conv_out.op.input_tensors[0]
data = data_vec.op.input_tensors[0] \
- if isinstance(data_vec.op, tvm.tensor.ComputeOp) and "pad" not in data_vec.op.tag \
+ if isinstance(data_vec.op, te.tensor.ComputeOp) and "pad" not in data_vec.op.tag \
else data_vec
- if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
+ if isinstance(data.op, te.tensor.ComputeOp) and "pad" in data.op.tag:
data_pad = data
data = data_pad.op.input_tensors[0]
# under the License.
# pylint: disable=invalid-name,unused-variable,unused-argument,no-member
"""Conv2D Transpose schedule on x86"""
-import tvm
+from tvm import te
from ..util import traverse_inline
from .. import nn
from .conv2d import conv2d_nchw, schedule_conv2d_nchw
def schedule_conv2d_transpose_nchw(outs):
"""Create schedule for tensors"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
s = schedule_conv2d_nchw(outs)
def _callback(op):
if 'unpack_nchwc' in op.tag:
"""Conv3D operators"""
from collections import namedtuple
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity
from ..util import traverse_inline
Parameters
----------
- input : tvm.Tensor
+ input : tvm.te.Tensor
5-D input data with shapes:
[batch, in_channel, in_depth, in_height, in_width] for NCDHW layout
[batch, in_depth, in_height, in_width, in_channel] for NDHWC layout
- filter : tvm.Tensor
+ filter : tvm.te.Tensor
5-D filter with shape [kernel_depth, kernel_height, kernel_width, in_channels, out_channels]
strides : int or a list/tuple of three ints
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
5-D with shape [batch, out_depth, out_height, out_width, out_channel] for NDHWC layout
5-D with shape [batch, out_channel, out_depth, out_height, out_width] for NCDHW layout
"""
s: Schedule
The computation schedule for conv3d.
"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _traverse(op):
if 'conv3d_ndhwc' in op.tag:
conv_out = op.input_tensors[0]
kernel_vec = conv_out.op.input_tensors[1]
kernel = kernel_vec.op.input_tensors[0]
- if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
+ if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()
data_vec = conv_out.op.input_tensors[0]
data = data_vec.op.input_tensors[0]
data_pad = None
- if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
+ if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag:
data_pad = data
data = data_pad.op.input_tensors[0]
# fetch schedule
ic_bn, oc_bn = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1]
shape = (batch_size, in_channel // ic_bn, pad_depth, pad_height, ic_bn, pad_width)
- data_vec = tvm.compute(shape,
- lambda n, C, d, h, c, w: data_pad[n, d, h, w, C * ic_bn + c],
- name='data_vec')
+ data_vec = te.compute(shape,
+ lambda n, C, d, h, c, w: data_pad[n, d, h, w, C * ic_bn + c],
+ name='data_vec')
# pack kernel
shape = (num_filter//oc_bn, in_channel//ic_bn,
kernel_depth, kernel_height, kernel_width, ic_bn, oc_bn)
- kernel_vec = tvm.compute(shape,
- lambda CO, CI, d, h, w, ci, co:
- kernel[d, h, w, CI * ic_bn + ci, CO * oc_bn + co],
- name='kernel_vec')
+ kernel_vec = te.compute(shape,
+ lambda CO, CI, d, h, w, ci, co:
+ kernel[d, h, w, CI * ic_bn + ci, CO * oc_bn + co],
+ name='kernel_vec')
# convolution
oshape = (batch_size, num_filter//oc_bn, out_depth, out_height, out_width, oc_bn)
unpack_shape = (batch_size, out_depth, out_height, out_width, num_filter)
- ic = tvm.reduce_axis((0, in_channel), name='ic')
- kh = tvm.reduce_axis((0, kernel_height), name='kh')
- kw = tvm.reduce_axis((0, kernel_width), name='kw')
- kd = tvm.reduce_axis((0, kernel_depth), name='kd')
- idxmod = tvm.indexmod
- idxdiv = tvm.indexdiv
-
- conv = tvm.compute(oshape, lambda n, oc_chunk, od, oh, ow, oc_block:
- tvm.sum(data_vec[n,
- idxdiv(ic, ic_bn),
- od*DSTR+kd*dilation_d,
- oh*HSTR+kh*dilation_h,
+ ic = te.reduce_axis((0, in_channel), name='ic')
+ kh = te.reduce_axis((0, kernel_height), name='kh')
+ kw = te.reduce_axis((0, kernel_width), name='kw')
+ kd = te.reduce_axis((0, kernel_depth), name='kd')
+ idxmod = tvm.tir.indexmod
+ idxdiv = tvm.tir.indexdiv
+
+ conv = te.compute(oshape, lambda n, oc_chunk, od, oh, ow, oc_block:
+ te.sum(data_vec[n,
+ idxdiv(ic, ic_bn),
+ od*DSTR+kd*dilation_d,
+ oh*HSTR+kh*dilation_h,
+ idxmod(ic, ic_bn),
+ ow*WSTR+kw*dilation_w].astype(out_dtype) *
+ kernel_vec[oc_chunk, idxdiv(ic, ic_bn), kd, kh, kw,
idxmod(ic, ic_bn),
- ow*WSTR+kw*dilation_w].astype(out_dtype) *
- kernel_vec[oc_chunk, idxdiv(ic, ic_bn), kd, kh, kw,
- idxmod(ic, ic_bn),
- oc_block].astype(out_dtype),
- axis=[kd, kh, kw, ic]), name='conv')
- conv_unpacked = tvm.compute(unpack_shape,
- lambda n, d, h, w, c: conv[n, idxdiv(c, oc_bn),
- d, h, w,
- idxmod(c, oc_bn)]
- .astype(out_dtype),
- name='output_unpack',
- tag='conv3d_ndhwc')
+ oc_block].astype(out_dtype),
+ axis=[kd, kh, kw, ic]), name='conv')
+ conv_unpacked = te.compute(unpack_shape,
+ lambda n, d, h, w, c: conv[n, idxdiv(c, oc_bn),
+ d, h, w,
+ idxmod(c, oc_bn)]
+ .astype(out_dtype),
+ name='output_unpack',
+ tag='conv3d_ndhwc')
return conv_unpacked
static_data_shape = []
for dim in get_const_tuple(data.shape):
- if isinstance(dim, tvm.expr.Var):
+ if isinstance(dim, tvm.tir.Var):
static_data_shape.append(1)
else:
static_data_shape.append(dim)
- data = tvm.placeholder(static_data_shape, dtype=data.dtype)
+ data = te.placeholder(static_data_shape, dtype=data.dtype)
wkl = _get_conv3d_workload(data, kernel, strides, padding, out_dtype, layout)
_fallback_schedule(cfg, wkl)
"""x86 dense operators"""
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import SplitEntity
from tvm.contrib import cblas
def _default_dense_pack_config(cfg, M, N, K):
# Generate default schedule for dynamic shape.
- if isinstance(M, tvm.expr.Var):
+ if isinstance(M, tvm.tir.Var):
M = 16
- if isinstance(N, tvm.expr.Var):
+ if isinstance(N, tvm.tir.Var):
N = 16
- if isinstance(K, tvm.expr.Var):
+ if isinstance(K, tvm.tir.Var):
K = 16
vec_width = get_fp32_len()
def _default_dense_nopack_config(cfg, M, N, K):
# Generate default schedule for dynamic shape.
- if isinstance(M, tvm.expr.Var):
+ if isinstance(M, tvm.tir.Var):
M = 16
- if isinstance(N, tvm.expr.Var):
+ if isinstance(N, tvm.tir.Var):
N = 16
- if isinstance(K, tvm.expr.Var):
+ if isinstance(K, tvm.tir.Var):
K = 16
vec_width = get_fp32_len()
M, K = get_const_tuple(data.shape)
N, _ = get_const_tuple(weight.shape)
# create tuning space
- cfg.define_split("tile_y", 32 if isinstance(M, tvm.expr.Var) else M, num_outputs=2)
- cfg.define_split("tile_x", 32 if isinstance(N, tvm.expr.Var) else N, num_outputs=2)
- cfg.define_split("tile_k", 32 if isinstance(K, tvm.expr.Var) else K, num_outputs=2)
+ cfg.define_split("tile_y", 32 if isinstance(M, tvm.tir.Var) else M, num_outputs=2)
+ cfg.define_split("tile_x", 32 if isinstance(N, tvm.tir.Var) else N, num_outputs=2)
+ cfg.define_split("tile_k", 32 if isinstance(K, tvm.tir.Var) else K, num_outputs=2)
if cfg.is_fallback:
_default_dense_nopack_config(cfg, M, N, K)
vec = cfg["tile_k"].size[-1]
- k = tvm.reduce_axis((0, K // vec), "k")
- CC = tvm.compute((M, N, vec),
- lambda z, y, x: tvm.sum(
- data[z, k * vec + x].astype(out_dtype) *
- weight[y, k * vec + x].astype(out_dtype), axis=k))
-
- kk = tvm.reduce_axis((0, vec), "kk")
- C = tvm.compute((M, N),
- lambda y, x: tvm.sum(CC[y, x, kk], axis=kk),
- tag="dense_nopack")
+ k = te.reduce_axis((0, K // vec), "k")
+ CC = te.compute((M, N, vec),
+ lambda z, y, x: te.sum(
+ data[z, k * vec + x].astype(out_dtype) *
+ weight[y, k * vec + x].astype(out_dtype), axis=k))
+
+ kk = te.reduce_axis((0, vec), "kk")
+ C = te.compute((M, N),
+ lambda y, x: te.sum(CC[y, x, kk], axis=kk),
+ tag="dense_nopack")
if bias is not None:
- C = tvm.compute((M, N), lambda i, j: C[i, j] + bias[j].astype(out_dtype),
- tag=tag.BROADCAST)
+ C = te.compute((M, N), lambda i, j: C[i, j] + bias[j].astype(out_dtype),
+ tag=tag.BROADCAST)
return C
@autotvm.register_topi_schedule("dense_nopack.x86")
def schedule_dense_nopack(cfg, outs):
"""Create the schedule for dense_nopack"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if 'dense_nopack' in op.tag:
packw_bn = cfg["tile_x"].size[-1]
packw_shape = (N // packw_bn, K, packw_bn)
- packw = tvm.compute(packw_shape,
- lambda z, y, x: weight[z * packw_bn + x, y], name="packed_weight")
-
- idxdiv = tvm.indexdiv
- idxmod = tvm.indexmod
- k = tvm.reduce_axis((0, K), name="k")
- C = tvm.compute((M, N),
- lambda y, x: tvm.sum(
- data[y, k].astype(out_dtype) *
- packw[idxdiv(x, packw_bn), k, idxmod(x, packw_bn)].astype(out_dtype),
- axis=k),
- tag="dense_pack")
+ packw = te.compute(packw_shape,
+ lambda z, y, x: weight[z * packw_bn + x, y], name="packed_weight")
+
+ idxdiv = tvm.tir.indexdiv
+ idxmod = tvm.tir.indexmod
+ k = te.reduce_axis((0, K), name="k")
+ C = te.compute((M, N),
+ lambda y, x: te.sum(
+ data[y, k].astype(out_dtype) *
+ packw[idxdiv(x, packw_bn), k, idxmod(x, packw_bn)].astype(out_dtype),
+ axis=k),
+ tag="dense_pack")
if bias is not None:
- C = tvm.compute((M, N), lambda i, j: C[i, j] + bias[j].astype(out_dtype),
- tag=tag.BROADCAST)
+ C = te.compute((M, N), lambda i, j: C[i, j] + bias[j].astype(out_dtype),
+ tag=tag.BROADCAST)
return C
@autotvm.register_topi_schedule("dense_pack.x86")
def schedule_dense_pack(cfg, outs):
"""Create the schedule for dense_pack"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
if "dense_pack" in op.tag:
cfg.add_flop(M * K * N * 2)
C = cblas.matmul(data, weight, False, True)
if bias is not None:
- C = tvm.compute(C.shape, lambda i, j: C[i, j] + bias[j].astype(out_dtype),
- tag=tag.BROADCAST)
+ C = te.compute(C.shape, lambda i, j: C[i, j] + bias[j].astype(out_dtype),
+ tag=tag.BROADCAST)
return C
@autotvm.register_topi_schedule("dense_cblas.x86")
# pylint: disable=no-value-for-parameter
"""Depthwise Conv2D schedule on x86"""
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import SplitEntity
from ..nn.pad import pad
ic_chunk = ic // ic_bn
oc_chunk = oc // oc_bn
- data = tvm.compute((n, ic_chunk, ih, iw, ic_bn),
- lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w],
- name="data_vec")
+ data = te.compute((n, ic_chunk, ih, iw, ic_bn),
+ lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w],
+ name="data_vec")
- kernel = tvm.compute(
+ kernel = te.compute(
(oc_chunk, 1, kh, kw, 1, oc_bn),
lambda occ, icc, k_h, k_w, icb, ocb:
kernel[(occ * oc_bn + ocb) // cm,
# get workload and related schedule config
wkl = _get_workload(
- tvm.placeholder((batch, in_channel, in_height, in_width), dtype=data.dtype),
- tvm.placeholder((out_channel, channel_multiplier, filter_height, filter_width),
- dtype=kernel.dtype),
+ te.placeholder((batch, in_channel, in_height, in_width), dtype=data.dtype),
+ te.placeholder((out_channel, channel_multiplier, filter_height, filter_width),
+ dtype=kernel.dtype),
strides, padding, out_dtype)
if cfg.is_fallback:
_fallback_schedule(cfg, wkl)
data_pad = data
# depthconv stage
- idxdiv = tvm.indexdiv
- idxmod = tvm.indexmod
+ idxdiv = tvm.tir.indexdiv
+ idxmod = tvm.tir.indexmod
- kh = tvm.reduce_axis((0, filter_height), name='kh')
- kw = tvm.reduce_axis((0, filter_width), name='kw')
- Output = tvm.compute(
+ kh = te.reduce_axis((0, filter_height), name='kh')
+ kw = te.reduce_axis((0, filter_width), name='kw')
+ Output = te.compute(
(batch, out_channel_chunk, out_height, out_width, out_channel_block),
- lambda b, oco, oh, ow, oci: tvm.sum(
+ lambda b, oco, oh, ow, oci: te.sum(
(data_pad[
b,
idxdiv(idxdiv(oco * out_channel_block + oci, channel_multiplier), in_channel_block),
@autotvm.register_topi_schedule("depthwise_conv2d_NCHWc.x86")
def schedule_depthwise_conv2d_NCHWc(cfg, outs):
"""CPU schedule for depthwise conv2d in NCHW[x]c layout"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
"""Traverse operators from computation graph"""
def _schedule_depthwise_conv2d_NCHWc_impl(s, cfg, data_vec, kernel_vec, conv_out, output):
tile_ow, oc_bn = cfg["tile_ow"].size[-1], cfg["tile_oc"].size[-1]
# schedule pad
- if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \
+ if isinstance(s[data_vec].op, tvm.te.ComputeOp) \
and "pad" in data_vec.op.tag:
batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis
parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih)
# under the License.
# pylint: disable=invalid-name
"""x86 declaration and schedules."""
-from __future__ import absolute_import as _abs
-import tvm
+from tvm import te
from ..util import is_empty_shape
def schedule_injective_from_existing(sch, out):
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
x = outs[0]
- s = tvm.create_schedule([x.op for x in outs])
- tvm.schedule.AutoInlineInjective(s)
+ s = te.create_schedule([x.op for x in outs])
+ te.schedule.AutoInlineInjective(s)
if not is_empty_shape(x.shape):
schedule_injective_from_existing(s, x)
_, inner_i = sch[tensor].split(inner_axis, split_factor)
sch[tensor].vectorize(inner_i)
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
x = outs[0]
- s = tvm.create_schedule([x.op for x in outs])
- tvm.schedule.AutoInlineInjective(s)
+ s = te.create_schedule([x.op for x in outs])
+ te.schedule.AutoInlineInjective(s)
if len(s[x].op.axis) >= 5:
fused = s[x].fuse(s[x].op.axis[0], s[x].op.axis[1], s[x].op.axis[2])
vectorize(s, x, 64)
# under the License.
# pylint: disable=invalid-name,too-many-locals,unused-variable
"""x86 nn operators"""
-from __future__ import absolute_import as _abs
-import tvm
+from tvm import te
def schedule_softmax(outs):
"""Schedule for softmax
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
softmax = outs[0]
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
op_tag = softmax.op.tag
if op_tag == 'softmax_output':
# under the License.
# pylint: disable=invalid-name, unused-variable
"""Schedule for pooling operators"""
-import tvm
+from tvm import te
from .. import tag
def _parallel_sch(sch, oshape, do_vectorize=False):
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
scheduled_ops = []
def _schedule(PaddedInput, Pool):
- if isinstance(PaddedInput.op, tvm.tensor.ComputeOp):
+ if isinstance(PaddedInput.op, te.tensor.ComputeOp):
s[PaddedInput].compute_inline()
do_vectorize = layout[-1] not in "HWhw"
_parallel_sch(s[Pool], outs[0].shape, do_vectorize)
if OP not in s.outputs:
s[OP].compute_inline()
for tensor in OP.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
# schedule pool
elif OP.tag.startswith('pool'):
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- s = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
scheduled_ops = []
def traverse(OP):
if OP not in s.outputs:
s[OP].compute_inline()
for tensor in OP.input_tensors:
- if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+ if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
traverse(tensor.op)
# schedule pool
elif OP.tag.startswith('adaptive_pool'):
# under the License.
# pylint: disable=invalid-name
"""x86 declaration and schedules."""
-from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from .injective import schedule_injective_from_existing
from .. import tag
from ..util import get_const_tuple
sch: Schedule
The computation schedule for the op.
"""
- outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
- sch = tvm.create_schedule([x.op for x in outs])
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ sch = te.create_schedule([x.op for x in outs])
scheduled_ops = []
def traverse_before_reduce(operator):
"""Internal traverse function"""
- if isinstance(operator, tvm.tensor.PlaceholderOp):
+ if isinstance(operator, tvm.te.PlaceholderOp):
return
if tag.is_injective(operator.tag):
sch[operator].compute_inline()
for tensor in input_tensors:
if tensor.op not in scheduled_ops:
traverse_before_reduce(tensor.op)
- elif isinstance(operator, tvm.tensor.PlaceholderOp):
+ elif isinstance(operator, tvm.te.PlaceholderOp):
pass
else:
raise RuntimeError("Unsupported operator: %s (tag: %s)" % (operator, operator.tag))
Parameters
----------
- data : tvm.Tensor or numpy NDArray
+ data : tvm.te.Tensor or numpy NDArray
4-D with shape [batch, channel, height, width]
- rois : tvm.Tensor or numpy NDArray
+ rois : tvm.te.Tensor or numpy NDArray
2-D with shape [num_roi, 5]. The last dimension should be in format of
[batch_index, w_start, h_start, w_end, h_end]
- w_pc : tvm.Tensor or numpy NDArray
+ w_pc : tvm.te.Tensor or numpy NDArray
3-D weight pre-calculation buffer
- pos_pc : tvm.Tensor or numpy NDArray
+ pos_pc : tvm.te.Tensor or numpy NDArray
3-D position pre-calculation buffer
pooled_size : tvm ConsExpr
[out_height, out_width]
- spatial_scale : tvm.const
+ spatial_scale : tvm.tir.const
Ratio of input feature map height (or w) to raw image height (or w). Equals the reciprocal
of total stride in convolutional layers, which should be in range (0.0, 1.0]
- sample_ratio : tvm.const
+ sample_ratio : tvm.tir.const
Sampling ratio of ROI align, using adaptive size by default.
Returns
-------
- output : tvm.Tensor or numpy NDArray
+ output : tvm.te.Tensor or numpy NDArray
4-D with shape [num_roi, channel, pooled_size, pooled_size]
"""
channels = data.shape[1]
for iy in range(roi_bin_grid_h):
for ix in range(roi_bin_grid_w):
output_val += w_pc[n, pre_calc_index, 0] \
- * data[roi_batch_index, c,
- pos_pc[n, pre_calc_index, 2],
- pos_pc[n, pre_calc_index, 0]] \
- + w_pc[n, pre_calc_index, 1] \
- * data[roi_batch_index, c,
- pos_pc[n, pre_calc_index, 2],
- pos_pc[n, pre_calc_index, 1]] \
- + w_pc[n, pre_calc_index, 2] \
- * data[roi_batch_index, c,
- pos_pc[n, pre_calc_index, 3],
- pos_pc[n, pre_calc_index, 0]] \
- + w_pc[n, pre_calc_index, 3] \
- * data[roi_batch_index, c,
- pos_pc[n, pre_calc_index, 3],
- pos_pc[n, pre_calc_index, 1]]
+ * data[roi_batch_index, c,
+ pos_pc[n, pre_calc_index, 2],
+ pos_pc[n, pre_calc_index, 0]] \
+ + w_pc[n, pre_calc_index, 1] \
+ * data[roi_batch_index, c,
+ pos_pc[n, pre_calc_index, 2],
+ pos_pc[n, pre_calc_index, 1]] \
+ + w_pc[n, pre_calc_index, 2] \
+ * data[roi_batch_index, c,
+ pos_pc[n, pre_calc_index, 3],
+ pos_pc[n, pre_calc_index, 0]] \
+ + w_pc[n, pre_calc_index, 3] \
+ * data[roi_batch_index, c,
+ pos_pc[n, pre_calc_index, 3],
+ pos_pc[n, pre_calc_index, 1]]
pre_calc_index += 1
output_val /= count
Parameters
----------
- data : tvm.Tensor
+ data : tvm.te.Tensor
4-D with shape [batch, channel, height, width]
- rois : tvm.Tensor
+ rois : tvm.te.Tensor
2-D with shape [num_roi, 5]. The last dimension should be in format of
[batch_index, w_start, h_start, w_end, h_end]
Returns
-------
- output : tvm.Tensor
+ output : tvm.te.Tensor
4-D with shape [num_roi, channel, pooled_size, pooled_size]
"""
if not isinstance(pooled_size, (tuple, list)):
w_pc_buffer = full(max_pc_shape, data.dtype, 0)
pos_pc_buffer = full(max_pc_shape, "int32", 0)
- pooled_size = tvm.convert(pooled_size)
- spatial_scale = tvm.const(spatial_scale, "float32")
- sample_ratio = tvm.const(sample_ratio, "int32")
+ pooled_size = tvm.runtime.convert(pooled_size)
+ spatial_scale = tvm.tir.const(spatial_scale, "float32")
+ sample_ratio = tvm.tir.const(sample_ratio, "int32")
return roi_align_nchw_ir(data, rois, w_pc_buffer, pos_pc_buffer,
pooled_size, spatial_scale, sample_ratio)
# under the License.
"""sparse_dense schedule on x86"""
-import tvm
+from tvm import te
from ..util import traverse_inline, get_const_int
from .util import get_fp32_len
def schedule_sparse_dense(outs):
"""Create schedule for sparse dense"""
- s = tvm.create_schedule([x.op for x in outs])
+ s = te.create_schedule([x.op for x in outs])
def _callback(op):
simd_width = get_fp32_len()
"""Core kernel of dot product of 4 Int8 operations"""
#pylint: disable=invalid-name
import tvm
+from tvm import te
import tvm.target.codegen
mcpu = tvm.target.Target.current().mcpu
assert mcpu in ("skylake-avx512", "cascadelake"), \
- "An old Intel machine that does not have fast Int8 support."
+ "An old Intel machine that does not have fast Int8 support."
if mcpu == "skylake-avx512":
return dot_16x1x16_uint8_int8_int32_skylake()
# cascadelake
int32_lanes = 16 # 16 int32 lanes in AVX512
num_int8_elements = 4 # 4 int8 elements in int32
- data = tvm.placeholder((num_int8_elements,), dtype='uint8', name='data')
- kernel = tvm.placeholder((int32_lanes, num_int8_elements), dtype='int8', name='kernel')
- k = tvm.reduce_axis((0, num_int8_elements), name='k')
- C = tvm.compute((int32_lanes,),
- lambda i: tvm.sum(data[k].astype('int32') *
- kernel[i, k].astype('int32'),
- axis=k),
- name="C")
-
- a_buffer = tvm.decl_buffer(data.shape, dtype='uint8', name="a_buffer",
- offset_factor=1,
- strides=[1])
- b_buffer = tvm.decl_buffer(kernel.shape, dtype='int8', name="b_buffer",
- offset_factor=1,
- strides=[tvm.var('ldw'), 1])
+ data = te.placeholder((num_int8_elements,), dtype='uint8', name='data')
+ kernel = te.placeholder((int32_lanes, num_int8_elements), dtype='int8', name='kernel')
+ k = te.reduce_axis((0, num_int8_elements), name='k')
+ C = te.compute((int32_lanes,),
+ lambda i: te.sum(data[k].astype('int32') *
+ kernel[i, k].astype('int32'),
+ axis=k),
+ name="C")
+
+ a_buffer = tvm.tir.decl_buffer(data.shape, dtype='uint8', name="a_buffer",
+ offset_factor=1,
+ strides=[1])
+ b_buffer = tvm.tir.decl_buffer(kernel.shape, dtype='int8', name="b_buffer",
+ offset_factor=1,
+ strides=[te.var('ldw'), 1])
def _intrin_func(ins, outs):
def _instr(index):
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
if index == 1:
- ib.emit(outs[0].vstore(0, tvm.const(0, 'int32x16')))
+ ib.emit(outs[0].vstore(0, tvm.tir.const(0, 'int32x16')))
return ib.get()
a_int8 = ins[0].vload([0], "uint8x4")
- re_int32 = tvm.call_pure_intrin('int32', 'reinterpret', a_int8)
+ re_int32 = tvm.tir.call_pure_intrin('int32', 'reinterpret', a_int8)
vec_ai32 = re_int32.astype('int32x16')
- vec_a = tvm.call_pure_intrin('int8x64', 'reinterpret', vec_ai32)
+ vec_a = tvm.tir.call_pure_intrin('int8x64', 'reinterpret', vec_ai32)
vec_b = ins[1].vload([0, 0], "int8x64")
- vec_one = tvm.const(1, "int16x32")
- pair_reduction = tvm.call_llvm_intrin('int16x32',
- 'llvm.x86.avx512.pmaddubs.w.512',
- tvm.const(0, 'uint32'),
- vec_a, vec_b)
- quad_reduction = tvm.call_llvm_intrin('int32x16',
- 'llvm.x86.avx512.pmaddw.d.512',
- tvm.const(0, 'uint32'),
- pair_reduction, vec_one)
+ vec_one = tvm.tir.const(1, "int16x32")
+ pair_reduction = tvm.tir.call_llvm_intrin('int16x32',
+ 'llvm.x86.avx512.pmaddubs.w.512',
+ tvm.tir.const(0, 'uint32'),
+ vec_a, vec_b)
+ quad_reduction = tvm.tir.call_llvm_intrin('int32x16',
+ 'llvm.x86.avx512.pmaddw.d.512',
+ tvm.tir.const(0, 'uint32'),
+ pair_reduction, vec_one)
if index == 0:
ib.emit(outs[0].vstore(0, quad_reduction))
else:
# body, reset, update
return _instr(0), _instr(1), _instr(2)
- with tvm.build_config(offset_factor=1, partition_const_loop=True):
- return tvm.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
+ with tvm.target.build_config(offset_factor=1, partition_const_loop=True):
+ return te.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
def dot_16x1x16_uint8_int8_int16():
int16_lanes = 4*32 # 4*32 int32 lanes in 4 AVX512 vector registers
num_int8_elements = 2 # 2 int8 elements in int16
- data = tvm.placeholder((num_int8_elements,), dtype='uint8', name='data')
- kernel = tvm.placeholder((int16_lanes, num_int8_elements), dtype='int8', name='kernel')
- k = tvm.reduce_axis((0, num_int8_elements), name='k')
- C = tvm.compute((int16_lanes, ),
- lambda i: tvm.sum(data[k].astype('int16') *
- kernel[i, k].astype('int16'),
- axis=k),
- name="C")
-
- a_buffer = tvm.decl_buffer(data.shape, dtype='uint8', name="a_buffer",
- offset_factor=1,
- strides=[1])
- b_buffer = tvm.decl_buffer(kernel.shape, dtype='int8', name="b_buffer",
- offset_factor=1)
- # strides=[tvm.var('ldw'), 1, 1])
+ data = te.placeholder((num_int8_elements,), dtype='uint8', name='data')
+ kernel = te.placeholder((int16_lanes, num_int8_elements), dtype='int8', name='kernel')
+ k = te.reduce_axis((0, num_int8_elements), name='k')
+ C = te.compute((int16_lanes, ),
+ lambda i: te.sum(data[k].astype('int16') *
+ kernel[i, k].astype('int16'),
+ axis=k),
+ name="C")
+
+ a_buffer = tvm.tir.decl_buffer(data.shape, dtype='uint8', name="a_buffer",
+ offset_factor=1,
+ strides=[1])
+ b_buffer = tvm.tir.decl_buffer(kernel.shape, dtype='int8', name="b_buffer",
+ offset_factor=1)
+ # strides=[te.var('ldw'), 1, 1])
def _intrin_func(ins, outs):
def _instr(index):
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
if index == 1:
for i in range(4):
- ib.emit(outs[0].vstore([i*32], tvm.const(0, 'int16x32')))
+ ib.emit(outs[0].vstore([i*32], tvm.tir.const(0, 'int16x32')))
return ib.get()
a_int8 = ins[0].vload([0], "uint8x2")
- re_int16 = tvm.call_pure_intrin('int16', 'reinterpret', a_int8)
+ re_int16 = tvm.tir.call_pure_intrin('int16', 'reinterpret', a_int8)
vec_ai16 = re_int16.astype('int16x32')
- vec_a = tvm.call_pure_intrin('int8x64', 'reinterpret', vec_ai16)
+ vec_a = tvm.tir.call_pure_intrin('int8x64', 'reinterpret', vec_ai16)
for i in range(4):
vec_b = ins[1].vload([i*32, 0], "int8x64")
- pair_reduction = tvm.call_llvm_intrin('int16x32',
- 'llvm.x86.avx512.pmaddubs.w.512',
- tvm.const(0, 'uint32'),
- vec_a, vec_b)
+ pair_reduction = tvm.tir.call_llvm_intrin('int16x32',
+ 'llvm.x86.avx512.pmaddubs.w.512',
+ tvm.tir.const(0, 'uint32'),
+ vec_a, vec_b)
if index == 0:
ib.emit(outs[0].vstore([i*32], pair_reduction))
else:
# body, reset, update
return _instr(0), _instr(1), _instr(2)
- with tvm.build_config(offset_factor=1, partition_const_loop=True):
- return tvm.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
+ with tvm.target.build_config(offset_factor=1, partition_const_loop=True):
+ return te.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
def dot_16x1x16_uint8_int8_int32_cascadelake():
int32_lanes = 16 # 16 int32 lanes in AVX512
num_int8_elements = 4 # 4 int8 elements in int32
- data = tvm.placeholder((num_int8_elements,), dtype='uint8', name='data')
- kernel = tvm.placeholder((int32_lanes, num_int8_elements), dtype='int8', name='kernel')
- k = tvm.reduce_axis((0, num_int8_elements), name='k')
- C = tvm.compute((int32_lanes,),
- lambda i: tvm.sum(data[k].astype('int32') *
- kernel[i, k].astype('int32'),
- axis=k),
- name="C")
-
- a_buffer = tvm.decl_buffer(data.shape, dtype='uint8', name="a_buffer",
- offset_factor=1,
- strides=[1])
- b_buffer = tvm.decl_buffer(kernel.shape, dtype='int8', name="b_buffer",
- offset_factor=1,
- strides=[tvm.var('ldw'), 1])
+ data = te.placeholder((num_int8_elements,), dtype='uint8', name='data')
+ kernel = te.placeholder((int32_lanes, num_int8_elements), dtype='int8', name='kernel')
+ k = te.reduce_axis((0, num_int8_elements), name='k')
+ C = te.compute((int32_lanes,),
+ lambda i: te.sum(data[k].astype('int32') *
+ kernel[i, k].astype('int32'),
+ axis=k),
+ name="C")
+
+ a_buffer = tvm.tir.decl_buffer(data.shape, dtype='uint8', name="a_buffer",
+ offset_factor=1,
+ strides=[1])
+ b_buffer = tvm.tir.decl_buffer(kernel.shape, dtype='int8', name="b_buffer",
+ offset_factor=1,
+ strides=[te.var('ldw'), 1])
def _intrin_func(ins, outs):
def _instr(index):
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
if index == 1:
- ib.emit(outs[0].vstore(0, tvm.const(0, 'int32x16')))
+ ib.emit(outs[0].vstore(0, tvm.tir.const(0, 'int32x16')))
return ib.get()
a_int8 = ins[0].vload([0], "uint8x4")
- re_int32 = tvm.call_pure_intrin('int32', 'reinterpret', a_int8)
+ re_int32 = tvm.tir.call_pure_intrin('int32', 'reinterpret', a_int8)
vec_ai32 = re_int32.astype('int32x16')
vec_b = ins[1].vload([0, 0], "int8x64")
llvm_id = tvm.target.codegen.llvm_lookup_intrinsic_id(vnni_inst_name)
if llvm_id != 0: # VNNI is available for current LLVM version
- vec_bi32 = tvm.call_pure_intrin('int32x16', 'reinterpret', vec_b)
- vec_zero = tvm.const(0, "int32x16")
- quad_reduction = tvm.call_llvm_intrin('int32x16',
- 'llvm.x86.avx512.vpdpbusd.512',
- tvm.const(0, 'uint32'),
- vec_zero,
- vec_ai32, vec_bi32)
+ vec_bi32 = tvm.tir.call_pure_intrin('int32x16', 'reinterpret', vec_b)
+ vec_zero = tvm.tir.const(0, "int32x16")
+ quad_reduction = tvm.tir.call_llvm_intrin('int32x16',
+ 'llvm.x86.avx512.vpdpbusd.512',
+ tvm.tir.const(0, 'uint32'),
+ vec_zero,
+ vec_ai32, vec_bi32)
else: # Fall back to the normal AVX512
- vec_a = tvm.call_pure_intrin('int8x64', 'reinterpret', vec_ai32)
- vec_one = tvm.const(1, "int16x32")
- pair_reduction = tvm.call_llvm_intrin('int16x32',
- 'llvm.x86.avx512.pmaddubs.w.512',
- tvm.const(0, 'uint32'),
- vec_a, vec_b)
- quad_reduction = tvm.call_llvm_intrin('int32x16',
- 'llvm.x86.avx512.pmaddw.d.512',
- tvm.const(0, 'uint32'),
- pair_reduction, vec_one)
+ vec_a = tvm.tir.call_pure_intrin('int8x64', 'reinterpret', vec_ai32)
+ vec_one = tvm.tir.const(1, "int16x32")
+ pair_reduction = tvm.tir.call_llvm_intrin('int16x32',
+ 'llvm.x86.avx512.pmaddubs.w.512',
+ tvm.tir.const(0, 'uint32'),
+ vec_a, vec_b)
+ quad_reduction = tvm.tir.call_llvm_intrin('int32x16',
+ 'llvm.x86.avx512.pmaddw.d.512',
+ tvm.tir.const(0, 'uint32'),
+ pair_reduction, vec_one)
if index == 0:
ib.emit(outs[0].vstore(0, quad_reduction))
# body, reset, update
return _instr(0), _instr(1), _instr(2)
- with tvm.build_config(offset_factor=1, partition_const_loop=True):
- return tvm.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
+ with tvm.target.build_config(offset_factor=1, partition_const_loop=True):
+ return te.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer})
# specific language governing permissions and limitations
# under the License.
"""Common x86 related utilities"""
-from __future__ import absolute_import as _abs
import tvm
+
def get_fp32_len():
mcpu = tvm.target.Target.current().mcpu
fp32_vec_len = 8
# under the License.
import os
import tvm
+from tvm import te
from tvm.contrib import nvcc
import numpy as np
TASK = "bcast_to_i" + "_".join([str(ele) for ele in in_shape])\
+ "o" + "_".join([str(ele) for ele in out_shape])
# Build the logic and compile the function
- A = tvm.placeholder(shape=in_shape, name="A")
+ A = te.placeholder(shape=in_shape, name="A")
B = topi.broadcast_to(A, out_shape)
s = topi.cuda.schedule_broadcast(B)
fcuda = tvm.build(s, [A, B], "cuda", name="broadcast_to")
TASK = "bcast_binary_" + typ + "_lhs" +\
"_".join([str(ele) for ele in lhs_shape]) +\
"rhs" + "_".join([str(ele) for ele in rhs_shape])
- A = tvm.placeholder(shape=lhs_shape, name="A")
- B = tvm.placeholder(shape=rhs_shape, name="B")
+ A = te.placeholder(shape=lhs_shape, name="A")
+ B = te.placeholder(shape=rhs_shape, name="B")
if typ == "add":
C = topi.broadcast_add(A, B)
elif typ == "sub":
# under the License.
import os
import tvm
+from tvm import te
import numpy as np
from scipy import signal
from tvm.contrib import nvcc
padding = 'SAME' # or 'VALID'
# Placeholder
- Input = tvm.placeholder((batch, in_channel, in_height, in_width), name='Input')
- Filter = tvm.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter')
+ Input = te.placeholder((batch, in_channel, in_height, in_width), name='Input')
+ Filter = te.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter')
Stride = [stride_h, stride_w]
- Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale')
- Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift')
+ Scale = te.placeholder((in_channel * channel_multiplier,), name='Scale')
+ Shift = te.placeholder((in_channel * channel_multiplier,), name='Shift')
# Declare
DepthwiseConv2d = topi.nn.depthwise_conv2d_nchw(Input, Filter, Stride, padding)
ScaleShift = topi.nn.scale_shift_nchw(DepthwiseConv2d, Scale, Shift)
print("success")
for device in ['cuda', 'opencl', 'rocm']:
- with tvm.build_config(auto_unroll_max_step=128,
+ with tvm.target.build_config(auto_unroll_max_step=128,
unroll_explicit=device == 'rocm',
detect_global_barrier=False,
restricted_func=True):
padding = 'SAME' # or 'VALID'
# Placeholder
- Input = tvm.placeholder((batch, in_height, in_width, in_channel), name='Input')
- Filter = tvm.placeholder((filter_height, filter_width,filter_channel, channel_multiplier), name='Filter')
+ Input = te.placeholder((batch, in_height, in_width, in_channel), name='Input')
+ Filter = te.placeholder((filter_height, filter_width,filter_channel, channel_multiplier), name='Filter')
Stride = [stride_h, stride_w]
- Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale')
- Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift')
+ Scale = te.placeholder((in_channel * channel_multiplier,), name='Scale')
+ Shift = te.placeholder((in_channel * channel_multiplier,), name='Shift')
# Declare
DepthwiseConv2d = topi.nn.depthwise_conv2d_nhwc(Input, Filter, Stride, padding)
ScaleShift = topi.nn.scale_shift_nhwc(DepthwiseConv2d, Scale, Shift)
print("success")
for device in ['cuda', 'opencl', 'rocm']:
- with tvm.build_config(auto_unroll_max_step=128,
+ with tvm.target.build_config(auto_unroll_max_step=128,
detect_global_barrier=False,
restricted_func=True):
check_device(device)
import numpy as np
import scipy.signal
import tvm
+from tvm import te
from tvm.contrib import nvcc
import topi
from topi.util import get_const_tuple
stride = 2
padding = 'SAME'
- A = tvm.placeholder((in_height, in_width, in_channel, batch), name='A')
- W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W')
+ A = te.placeholder((in_height, in_width, in_channel, batch), name='A')
+ W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W')
B = topi.nn.conv2d_hwcn(A, W, stride, padding)
C = topi.nn.relu(B)
s1 = topi.cuda.schedule_conv2d_hwcn([B])
w = tvm.nd.array(w_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
- with tvm.build_config(auto_unroll_max_step=128,
+ with tvm.target.build_config(auto_unroll_max_step=128,
unroll_explicit=device == 'rocm'):
func1 = tvm.build(s1, [A, W, B], device)
func1(a, w, b)
import logging
import numpy as np
import tvm
+from tvm import te
import topi
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
hstride, wstride, out_dtype)
# Create TVM placeholders
- data = tvm.placeholder(data_shape, name='data', dtype=data_dtype)
- kernel = tvm.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype)
+ data = te.placeholder(data_shape, name='data', dtype=data_dtype)
+ kernel = te.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype)
# Create the numpy arrays to be used for executing conv models
if data_dtype == 'float32':
padding=hpad, dilation=(1, 1),
layout='NCHWc', out_layout='NCHWc', out_dtype=out_dtype)
out = topi.nn.relu(conv)
- sch = tvm.create_schedule(out.op)
+ sch = te.create_schedule(out.op)
func = tvm.build(sch, [data, kernel, out], target=TARGET_NAME, name='out')
func(data_array, kernel_array, c_orig)
LOGGER.debug(tvm.lower(sch, [data, kernel], simple_mode=True))
import logging
import numpy as np
import tvm
+from tvm import te
import topi
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
hstride, wstride, out_dtype)
# Create TVM placeholders
- data = tvm.placeholder(data_shape, name='data', dtype=data_dtype)
- kernel = tvm.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype)
+ data = te.placeholder(data_shape, name='data', dtype=data_dtype)
+ kernel = te.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype)
# Create the numpy arrays to be used for executing conv models
if data_dtype == 'float32':
padding=hpad, dilation=(1, 1),
layout='NCHWc', out_layout='NCHWc', out_dtype=out_dtype)
out = topi.nn.relu(conv)
- sch = tvm.create_schedule(out.op)
+ sch = te.create_schedule(out.op)
func = tvm.build(sch, [data, kernel, out], target=TARGET_NAME, name='out')
func(data_array, kernel_array, c_orig)
LOGGER.debug(tvm.lower(sch, [data, kernel], simple_mode=True))
# under the License.
"""Example code to do square matrix multiplication on Android Phone."""
import tvm
+from tvm import te
import os
from tvm import rpc
from tvm.contrib import util, ndk
assert(bn <= N)
assert(num_thread * num_thread * 16 <= N)
assert(num_block * num_block * 2 <= N)
- A = tvm.placeholder((N, N), name='A')
- B = tvm.placeholder((N, N), name='Btmp')
- k = tvm.reduce_axis((0, N), name='k')
+ A = te.placeholder((N, N), name='A')
+ B = te.placeholder((N, N), name='Btmp')
+ k = te.reduce_axis((0, N), name='k')
- packedB = tvm.compute((N, N / bn, bn),
+ packedB = te.compute((N, N / bn, bn),
lambda x, y, z: B[x, y * bn + z], name = 'B')
- C = tvm.compute(
+ C = te.compute(
(N, N),
- lambda ii, jj: tvm.sum(A[ii, k] * packedB[k, jj / bn, jj % bn], axis=k),
+ lambda ii, jj: te.sum(A[ii, k] * packedB[k, jj / bn, jj % bn], axis=k),
name='C')
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
CC = s.cache_write(C, "local")
- block_x = tvm.thread_axis("blockIdx.x")
- block_y = tvm.thread_axis("blockIdx.y")
- thread_x = tvm.thread_axis("threadIdx.x")
- thread_y = tvm.thread_axis("threadIdx.y")
+ block_x = te.thread_axis("blockIdx.x")
+ block_y = te.thread_axis("blockIdx.y")
+ thread_x = te.thread_axis("threadIdx.x")
+ thread_y = te.thread_axis("threadIdx.y")
- thread_xz = tvm.thread_axis((0, 2), "vthread", name="vx")
- thread_yz = tvm.thread_axis((0, 2), "vthread", name="vy")
+ thread_xz = te.thread_axis((0, 2), "vthread", name="vx")
+ thread_yz = te.thread_axis((0, 2), "vthread", name="vy")
pby, pbi = s[packedB].split(packedB.op.axis[0], nparts=num_thread)
pbx, pbj = s[packedB].split(packedB.op.axis[1], nparts=num_thread)
# under the License.
"""Example code to do square matrix multiplication."""
import tvm
+from tvm import te
import os
from tvm.contrib import nvcc
from tvm.contrib import spirv
def test_gemm():
# graph
nn = 2048
- n = tvm.var('n')
- n = tvm.convert(nn)
+ n = te.var('n')
+ n = tvm.runtime.convert(nn)
m, l = n, n
- A = tvm.placeholder((l, n), name='A')
- B = tvm.placeholder((l, m), name='B')
- k = tvm.reduce_axis((0, l), name='k')
- C = tvm.compute(
+ A = te.placeholder((l, n), name='A')
+ B = te.placeholder((l, m), name='B')
+ k = te.reduce_axis((0, l), name='k')
+ C = te.compute(
(m, n),
- lambda ii, jj: tvm.sum(A[k, jj] * B[k, ii], axis=k),
+ lambda ii, jj: te.sum(A[k, jj] * B[k, ii], axis=k),
name='C')
# schedule
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
AA = s.cache_read(A, "shared", [C])
BB = s.cache_read(B, "shared", [C])
AL = s.cache_read(AA, "local", [C])
scale = 8
num_thread = 8
block_factor = scale * num_thread
- block_x = tvm.thread_axis("blockIdx.x")
- thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x")
- block_y = tvm.thread_axis("blockIdx.y")
- thread_y = tvm.thread_axis((0, num_thread), "threadIdx.y")
- thread_xz = tvm.thread_axis((0, 2), "vthread", name="vx")
- thread_yz = tvm.thread_axis((0, 2), "vthread", name="vy")
+ block_x = te.thread_axis("blockIdx.x")
+ thread_x = te.thread_axis((0, num_thread), "threadIdx.x")
+ block_y = te.thread_axis("blockIdx.y")
+ thread_y = te.thread_axis((0, num_thread), "threadIdx.y")
+ thread_xz = te.thread_axis((0, 2), "vthread", name="vx")
+ thread_yz = te.thread_axis((0, 2), "vthread", name="vy")
by, yi = s[C].split(C.op.axis[0], factor=block_factor)
bx, xi = s[C].split(C.op.axis[1], factor=block_factor)
print("average time cost of %d runs = %g ms, %g GFLOPS." % (num_runs, t * 1e3, GFLOPS))
for device in ["cuda", "opencl", "rocm", "nvptx", "vulkan"]:
- with tvm.build_config(auto_unroll_max_step=128,
+ with tvm.target.build_config(auto_unroll_max_step=128,
unroll_explicit=(device != "cuda")):
check_device(device)
import sys
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from topi.cuda.tensor_intrin import dp4a
@autotvm.template
def gemm_int8(n, m, l):
- A = tvm.placeholder((n, l), name='A', dtype='int8')
- B = tvm.placeholder((m, l), name='B', dtype='int8')
+ A = te.placeholder((n, l), name='A', dtype='int8')
+ B = te.placeholder((m, l), name='B', dtype='int8')
- k = tvm.reduce_axis((0, l), name='k')
- C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k].astype('int32') * B[j, k].astype(
+ k = te.reduce_axis((0, l), name='k')
+ C = te.compute((n, m), lambda i, j: te.sum(A[i, k].astype('int32') * B[j, k].astype(
'int32'), axis=k), name='C')
cfg = autotvm.get_config()
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
y, x = C.op.axis
AA = s.cache_read(A, 'shared', [C])
s[CC].tensorize(ki, intrin_dp4a)
- block_x = tvm.thread_axis('blockIdx.x')
- block_y = tvm.thread_axis('blockIdx.y')
- thread_x = tvm.thread_axis('threadIdx.x')
- thread_y = tvm.thread_axis('threadIdx.y')
+ block_x = te.thread_axis('blockIdx.x')
+ block_y = te.thread_axis('blockIdx.y')
+ thread_x = te.thread_axis('threadIdx.x')
+ thread_y = te.thread_axis('threadIdx.y')
def block_size_filter(entity):
return entity.size[0] * 2 >= entity.size[1] * 2 and \
s[C].bind(by, block_y)
s[C].bind(bx, block_x)
- s[C].bind(tyz, tvm.thread_axis('vthread'))
- s[C].bind(txz, tvm.thread_axis('vthread'))
+ s[C].bind(tyz, te.thread_axis('vthread'))
+ s[C].bind(txz, te.thread_axis('vthread'))
s[C].bind(ty, thread_y)
s[C].bind(tx, thread_x)
s[C].reorder(by, bx, tyz, txz, ty, tx, yi, xi)
# under the License.
import os
import tvm
+from tvm import te
from tvm.contrib import nvcc
import numpy as np
def test_reduce_map(in_shape, axis, keepdims, type="sum", test_id=0):
global TASK
# Build the logic and compile the function
- A = tvm.placeholder(shape=in_shape, name="A")
+ A = te.placeholder(shape=in_shape, name="A")
if type == "sum":
TASK = "sum_map_id%d" %test_id
B = topi.sum(A, axis=axis, keepdims=keepdims)
else:
raise NotImplementedError
s = topi.cuda.schedule_reduce(B)
- with tvm.build_config(auto_unroll_max_step=16,
+ with tvm.target.build_config(auto_unroll_max_step=16,
auto_unroll_min_depth=0):
fcuda = tvm.build(s, [A, B], "cuda", name="sum")
# under the License.
"""LSTM Example, still work in progress.."""
import tvm
+from tvm import te
import os
from tvm.contrib import nvcc
import numpy as np
num_thread_x = 16 * 3 // 2
num_sm = 24
n_num_step = 128
- num_step = tvm.var('num_step')
+ num_step = te.var('num_step')
num_hidden = 1152 // 2
batch_size = 1
# Global transition matrix
# Input hidden channel can be pre-caculated by a gemm
- Xi2h = tvm.placeholder((num_step, batch_size, 4, num_hidden), name="Xi2h")
+ Xi2h = te.placeholder((num_step, batch_size, 4, num_hidden), name="Xi2h")
# Only handle hidden transition, saves space.
- Wh2h = tvm.placeholder((4, num_hidden, num_hidden), name="Wh2h")
+ Wh2h = te.placeholder((4, num_hidden, num_hidden), name="Wh2h")
# h: output hidden state, c: cell state.
- s_state_h = tvm.placeholder((num_step, batch_size, num_hidden))
- s_state_c = tvm.placeholder((num_step, batch_size, num_hidden))
- s_init_c = tvm.compute((1, batch_size, num_hidden),
+ s_state_h = te.placeholder((num_step, batch_size, num_hidden))
+ s_state_c = te.placeholder((num_step, batch_size, num_hidden))
+ s_init_c = te.compute((1, batch_size, num_hidden),
lambda *i: 0.0, name="init_c")
- s_init_h = tvm.compute((1, batch_size, num_hidden),
+ s_init_h = te.compute((1, batch_size, num_hidden),
lambda *i: 0.0, name="init_h")
# LSTM transition
- k = tvm.reduce_axis((0, num_hidden), name="ki2h")
- s_h2h = tvm.compute(
+ k = te.reduce_axis((0, num_hidden), name="ki2h")
+ s_h2h = te.compute(
(num_step, batch_size, 4, num_hidden),
- lambda t, i, x, j: tvm.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k),
+ lambda t, i, x, j: te.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k),
name="s_h2h")
# Gate rules
- gates = tvm.compute(Xi2h.shape, lambda *i:
+ gates = te.compute(Xi2h.shape, lambda *i:
Xi2h(*i) + s_h2h(*i), name="gates")
gshape = (num_step, batch_size, num_hidden)
- in_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, i, 0, j]), name="in_gate")
- in_transform = tvm.compute(gshape, lambda t, i, j: tvm.tanh(gates[t, i, 1, j]), name="in_transform")
- forget_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, i, 2, j]), name="forget_gate")
- out_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, i, 3, j]), name="out_gate")
- next_c = tvm.compute(gshape,
+ in_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, i, 0, j]), name="in_gate")
+ in_transform = te.compute(gshape, lambda t, i, j: te.tanh(gates[t, i, 1, j]), name="in_transform")
+ forget_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, i, 2, j]), name="forget_gate")
+ out_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, i, 3, j]), name="out_gate")
+ next_c = te.compute(gshape,
lambda t, i, j:
forget_gate[t, i, j] * s_state_c[t - 1, i, j] +
in_gate[t, i, j] * in_transform[t, i, j], name="next_c")
- next_h = tvm.compute(gshape,
- lambda t, i, j: out_gate[t, i, j] * tvm.tanh(next_c[t, i, j]), name="next_h")
- update_c = tvm.compute(gshape, lambda *i: next_c(*i), name="update_c")
- update_h = tvm.compute(gshape, lambda *i: next_h(*i), name="update_h")
+ next_h = te.compute(gshape,
+ lambda t, i, j: out_gate[t, i, j] * te.tanh(next_c[t, i, j]), name="next_h")
+ update_c = te.compute(gshape, lambda *i: next_c(*i), name="update_c")
+ update_h = te.compute(gshape, lambda *i: next_h(*i), name="update_h")
# schedule
- scan_h, scan_c = tvm.scan(
+ scan_h, scan_c = tvm.te.scan(
[s_init_h, s_init_c],
[update_h, update_c],
[s_state_h, s_state_c],
inputs=[Xi2h],
name="lstm_scan")
# schedule
- s = tvm.create_schedule(scan_h.op)
+ s = te.create_schedule(scan_h.op)
# Inline gate computations
s[gates].compute_inline()
s[in_gate].compute_inline()
s[forget_gate].compute_inline()
s[out_gate].compute_inline()
- block_x = tvm.thread_axis((0, num_sm), "blockIdx.x")
- thread_x = tvm.thread_axis((0, num_thread_x), "threadIdx.x")
- thread_y = tvm.thread_axis((0, num_thread_y), "threadIdx.y")
+ block_x = te.thread_axis((0, num_sm), "blockIdx.x")
+ thread_x = te.thread_axis((0, num_thread_x), "threadIdx.x")
+ thread_y = te.thread_axis((0, num_thread_y), "threadIdx.y")
s_state_h_S = s.cache_read(s_state_h, "shared", [s_h2h])
s_state_c_S = s.cache_read(s_state_c, "shared", [next_c])
print("Time cost=%g" % eval_result.mean)
# set unroll_explicit for more readable code.
- with tvm.build_config(
+ with tvm.target.build_config(
detect_global_barrier=DETECT_GLOBAL_BARRIER,
auto_unroll_max_step=128,
unroll_explicit=False):
```
"""
import tvm
+from tvm import te
import time
import os
import argparse
n_batch_size = 4
detect_global_barrier = DETECT_GLOBAL_BARRIER
- num_step = tvm.var("num_step")
- num_hidden = tvm.convert(n_num_hidden)
- batch_size = tvm.convert(n_batch_size)
+ num_step = te.var("num_step")
+ num_hidden = tvm.runtime.convert(n_num_hidden)
+ batch_size = tvm.runtime.convert(n_batch_size)
num_thread_y = 8
num_thread_x = 16 * 3
num_sm = 24
- Whh = tvm.placeholder((num_hidden, num_hidden), name="Whh")
- s_init = tvm.compute((1, batch_size, num_hidden),
+ Whh = te.placeholder((num_hidden, num_hidden), name="Whh")
+ s_init = te.compute((1, batch_size, num_hidden),
lambda _, i, j: 1.0, name="init")
- s_state = tvm.placeholder((num_step, batch_size, num_hidden))
- kh = tvm.reduce_axis((0, num_hidden), name="kh")
- s_update = tvm.compute(
+ s_state = te.placeholder((num_step, batch_size, num_hidden))
+ kh = te.reduce_axis((0, num_hidden), name="kh")
+ s_update = te.compute(
(num_step, batch_size, num_hidden),
- lambda t, i, j: tvm.sum(s_state[t-1, i, kh] * Whh[kh, j], axis=kh),
+ lambda t, i, j: te.sum(s_state[t-1, i, kh] * Whh[kh, j], axis=kh),
name="update")
- s_scan = tvm.scan(s_init, s_update, s_state)
+ s_scan = tvm.te.scan(s_init, s_update, s_state)
# schedule
- s = tvm.create_schedule(s_scan.op)
+ s = te.create_schedule(s_scan.op)
CL = s_update
SS = s.cache_read(s_state, "shared", [CL])
SL = s.cache_read(SS, "local", [CL])
ko, ki = s[CL].split(s[CL].op.reduce_axis[0], nparts=num_thread_y)
CLF = s.rfactor(CL, ko)
- block_x = tvm.thread_axis((0, num_sm), "blockIdx.x")
- thread_x = tvm.thread_axis((0, num_thread_x), "threadIdx.x")
- thread_y = tvm.thread_axis((0, num_thread_y), "threadIdx.y")
+ block_x = te.thread_axis((0, num_sm), "blockIdx.x")
+ thread_x = te.thread_axis((0, num_thread_x), "threadIdx.x")
+ thread_y = te.thread_axis((0, num_thread_y), "threadIdx.y")
if PERSIST_KERNEL:
s[s_scan.op].env_threads([block_x, thread_y, thread_x])
s[SS].bind(tx, thread_x)
def check_device(target):
- with tvm.build_config(
+ with tvm.target.build_config(
detect_global_barrier=detect_global_barrier,
auto_unroll_max_step=128,
unroll_explicit=False):
"""Common utility for topi test"""
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import FallbackConfigEntity
import topi
"""Test code for FIFO buffer"""
import tvm
+from tvm import te
import topi
import topi.testing
import numpy as np
from common import get_all_backend
def verify_fifo_buffer(buffer_shape, data_shape, axis, dtype='float32'):
- buffer = tvm.placeholder(buffer_shape, name='buffer', dtype=dtype)
- data = tvm.placeholder(data_shape, name='data', dtype=dtype)
+ buffer = te.placeholder(buffer_shape, name='buffer', dtype=dtype)
+ data = te.placeholder(data_shape, name='data', dtype=dtype)
# Use memoize, pickle the test data for next time use
@memoize('topi.tests.test_fifo_buffer')
dtype = 'float32'
- inc_input = tvm.placeholder(inc_input_shape, name='inc_input', dtype=dtype)
- input_window = tvm.placeholder(input_window_shape, name='input_window', dtype=dtype)
- context = tvm.placeholder(context_shape, name='context', dtype=dtype)
- kernel = tvm.placeholder(kernel_shape, name='kernel', dtype=dtype)
- inc_output = tvm.placeholder(inc_input_shape, name='inc_output', dtype=dtype)
- output_window = tvm.placeholder(output_window_shape, name='output_window', dtype=dtype)
+ inc_input = te.placeholder(inc_input_shape, name='inc_input', dtype=dtype)
+ input_window = te.placeholder(input_window_shape, name='input_window', dtype=dtype)
+ context = te.placeholder(context_shape, name='context', dtype=dtype)
+ kernel = te.placeholder(kernel_shape, name='kernel', dtype=dtype)
+ inc_output = te.placeholder(inc_input_shape, name='inc_output', dtype=dtype)
+ output_window = te.placeholder(output_window_shape, name='output_window', dtype=dtype)
# Use memoize, pickle the test data for next time use
@memoize('topi.tests.test_fifo_buffer_conv1d_integration')
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import topi
from topi import util
def test_util():
- x = tvm.const(100, "int32")
+ x = tvm.tir.const(100, "int32")
assert util.get_const_int(x) == 100
assert util.get_const_tuple((x, x)) == (100, 100)
def test_ewise():
- m = tvm.var('m')
- l = tvm.var('l')
- A = tvm.placeholder((m, l), name='A')
+ m = te.var('m')
+ l = te.var('l')
+ A = te.placeholder((m, l), name='A')
def test_apply(func, name):
B = func(A)
"""Test code for batch_matmul operator"""
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from topi.util import get_const_tuple
}
def verify_batch_matmul(batch, M, N, K):
- x = tvm.placeholder((batch, M, K), name='x')
- y = tvm.placeholder((batch, N, K), name='y')
+ x = te.placeholder((batch, M, K), name='x')
+ y = te.placeholder((batch, N, K), name='y')
dtype = x.dtype
# use memoize to pickle the test data for next time use
# under the License.
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from topi.util import get_const_tuple
out_dtype = 'int32'
with tvm.target.create('llvm'):
- A = tvm.placeholder((batch, in_channel, in_height, in_width), dtype=input_dtype, name='A')
- W = tvm.placeholder((num_filter, in_channel, kernel, kernel), dtype=input_dtype, name='W')
+ A = te.placeholder((batch, in_channel, in_height, in_width), dtype=input_dtype, name='A')
+ W = te.placeholder((num_filter, in_channel, kernel, kernel), dtype=input_dtype, name='W')
B = topi.x86.bitserial_conv2d_nchw(A, W, stride, padding, activation_bits, weight_bits,
input_dtype, out_dtype, unipolar)
s = topi.x86.schedule_bitserial_conv2d_nchw([B])
out_dtype='int32'
with tvm.target.create('llvm'):
- A = tvm.placeholder((batch, in_height, in_width, in_channel), dtype=input_dtype, name='A')
- W = tvm.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_dtype, name='W')
+ A = te.placeholder((batch, in_height, in_width, in_channel), dtype=input_dtype, name='A')
+ W = te.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_dtype, name='W')
B = topi.x86.bitserial_conv2d_nhwc(A, W, stride, padding, activation_bits, weight_bits,
input_dtype, out_dtype, unipolar)
s = topi.x86.schedule_bitserial_conv2d_nhwc([B])
import re
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from topi.util import get_const_tuple
device = 'llvm -device=arm_cpu -model=bcm2837 -target=armv7l-linux-gnueabihf -mattr=+neon'
with tvm.target.create(device):
- A = tvm.placeholder((batch, in_height, in_width, in_channel), dtype=input_type, name='A')
- W = tvm.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_type, name='W')
+ A = te.placeholder((batch, in_height, in_width, in_channel), dtype=input_type, name='A')
+ W = te.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_type, name='W')
B = topi.arm_cpu.bitserial_conv2d_nhwc(A, W, stride, padding, activation_bits, weight_bits,
'uint8', out_dtype, unipolar)
s = topi.arm_cpu.schedule_bitserial_conv2d_nhwc([B])
import os
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from topi.util import get_const_tuple
print ("Skipped running code, not an arm device")
continue
input_dtype = 'uint8' if "arm_cpu" in target else "uint32"
- A = tvm.placeholder((batch, in_dim), dtype=input_dtype, name='A')
- B = tvm.placeholder((out_dim, in_dim), dtype=input_dtype, name='B')
+ A = te.placeholder((batch, in_dim), dtype=input_dtype, name='A')
+ B = te.placeholder((out_dim, in_dim), dtype=input_dtype, name='B')
fcompute, fschedule = topi.testing.dispatch(target, _bitserial_dense_implement)
C = fcompute(A, B, activation_bits, weight_bits,
input_dtype, out_dtype, unipolar)
"""Test code for binary neural network operators."""
import numpy as np
import tvm
+from tvm import te
import topi
from topi.util import get_const_tuple
from tvm.contrib.pickle_memoize import memoize
def verify_binary_dense(batch, in_dim, out_dim):
- A = tvm.placeholder((batch, in_dim), name='A')
- B = tvm.placeholder((out_dim, in_dim), name='B')
+ A = te.placeholder((batch, in_dim), name='A')
+ B = te.placeholder((out_dim, in_dim), name='B')
bnn_A = topi.nn.binarize_pack(A)
bnn_B = topi.nn.binarize_pack(B)
# binary dense
- bnn_A1 = tvm.placeholder(bnn_A.shape, dtype=bnn_A.dtype)
- bnn_B1 = tvm.placeholder(bnn_B.shape, dtype=bnn_B.dtype)
+ bnn_A1 = te.placeholder(bnn_A.shape, dtype=bnn_A.dtype)
+ bnn_B1 = te.placeholder(bnn_B.shape, dtype=bnn_B.dtype)
bnn_C = topi.nn.binary_dense(bnn_A1, bnn_B1)
# schedule
with tvm.target.create('llvm'):
"""Test code for broadcasting operators."""
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from common import get_all_backend
def verify_broadcast_to_ele(in_shape, out_shape, fbcast):
# Build the logic and compile the function
- A = tvm.placeholder(shape=in_shape, name="A")
+ A = te.placeholder(shape=in_shape, name="A")
B = fbcast(A, out_shape)
def check_device(device):
rhs_min=-100, rhs_max=100,
dtype="float32"):
# Build the logic and compile the function
- A = (tvm.var("A", dtype=dtype) if lhs_shape is None
- else tvm.placeholder(shape=lhs_shape, name="A", dtype=dtype))
- B = (tvm.var("B", dtype=dtype) if rhs_shape is None
- else tvm.placeholder(shape=rhs_shape, name="B", dtype=dtype))
+ A = (te.var("A", dtype=dtype) if lhs_shape is None
+ else te.placeholder(shape=lhs_shape, name="A", dtype=dtype))
+ B = (te.var("B", dtype=dtype) if rhs_shape is None
+ else te.placeholder(shape=rhs_shape, name="B", dtype=dtype))
C = ftopi(A, B)
- if isinstance(A, tvm.expr.PrimExpr) and isinstance(B, tvm.expr.PrimExpr):
- assert(isinstance(C, tvm.expr.PrimExpr))
+ if isinstance(A, tvm.tir.PrimExpr) and isinstance(B, tvm.tir.PrimExpr):
+ assert(isinstance(C, tvm.tir.PrimExpr))
return
def gen_operand(shape, low, high, ctx):
dtype="bool",
):
# Build the logic and compile the function
- A = tvm.placeholder(shape=indata.shape, name="A", dtype=dtype)
+ A = te.placeholder(shape=indata.shape, name="A", dtype=dtype)
B = func(A)
- if isinstance(A, tvm.expr.PrimExpr):
- assert (isinstance(B, tvm.expr.PrimExpr))
+ if isinstance(A, tvm.tir.PrimExpr):
+ assert (isinstance(B, tvm.tir.PrimExpr))
return
def check_device(device):
dtype="int32",
):
# Build the logic and compile the function
- A = tvm.placeholder(shape=shape, name="A", dtype=dtype)
+ A = te.placeholder(shape=shape, name="A", dtype=dtype)
B = func(A)
- if isinstance(A, tvm.expr.PrimExpr):
- assert (isinstance(B, tvm.expr.PrimExpr))
+ if isinstance(A, tvm.tir.PrimExpr):
+ assert (isinstance(B, tvm.tir.PrimExpr))
return
def check_device(device):
dtype="bool",
):
# Build the logic and compile the function
- A = (tvm.var("A", dtype=dtype))
- B = (tvm.var("B", dtype=dtype))
+ A = (te.var("A", dtype=dtype))
+ B = (te.var("B", dtype=dtype))
C = func(A, B)
- if isinstance(A, tvm.expr.PrimExpr) and isinstance(B, tvm.expr.PrimExpr):
- assert (isinstance(C, tvm.expr.PrimExpr))
+ if isinstance(A, tvm.tir.PrimExpr) and isinstance(B, tvm.tir.PrimExpr):
+ assert (isinstance(C, tvm.tir.PrimExpr))
return
def check_device(device):
"""Test code for clip operator"""
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from topi.util import get_const_tuple
from common import get_all_backend
def verify_clip(N, a_min, a_max, dtype):
- A = tvm.placeholder((N, N), dtype=dtype, name='A')
+ A = te.placeholder((N, N), dtype=dtype, name='A')
B = topi.clip(A, a_min, a_max)
- s = tvm.create_schedule([B.op])
+ s = te.create_schedule([B.op])
# use memoize to pickle the test data for next time use
@memoize("topi.tests.test_topi_clip")
import numpy as np
import itertools
import tvm
+from tvm import te
import topi
import topi.testing
from tvm.contrib.pickle_memoize import memoize
kernel_shape = [kernel_size, in_channels, filters]
dtype = 'float32'
- A = tvm.placeholder(in_shape, name='A', dtype=dtype)
- W = tvm.placeholder(kernel_shape, name='W', dtype=dtype)
+ A = te.placeholder(in_shape, name='A', dtype=dtype)
+ W = te.placeholder(kernel_shape, name='W', dtype=dtype)
def get_ref_data(layout):
a_np = np.random.uniform(size=in_shape).astype(dtype)
import numpy as np
import itertools
import tvm
+from tvm import te
import topi
import topi.testing
from tvm.contrib.pickle_memoize import memoize
def verify_conv1d_transpose_ncw(batch, in_channel, in_size, num_filter, kernel, stride, padding):
in_width = in_size
- A = tvm.placeholder((batch, in_channel, in_width), name='A')
- W = tvm.placeholder((in_channel, num_filter, kernel), name='W')
+ A = te.placeholder((batch, in_channel, in_width), name='A')
+ W = te.placeholder((in_channel, num_filter, kernel), name='W')
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
import topi
import topi.testing
ic_block = bn
break
- A = tvm.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='A')
- W = tvm.placeholder((num_filter//oc_block, in_channel//ic_block, kernel, kernel, ic_block, oc_block), name='W')
- bias = tvm.placeholder((num_filter//oc_block, 1, 1, oc_block), name='bias')
+ A = te.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='A')
+ W = te.placeholder((num_filter//oc_block, in_channel//ic_block, kernel, kernel, ic_block, oc_block), name='W')
+ bias = te.placeholder((num_filter//oc_block, 1, 1, oc_block), name='bias')
@memoize("topi.tests.test_topi_conv2d_NCHWc.verify_conv2d_NCHWc")
def get_ref_data():
import os
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from tvm.contrib.pickle_memoize import memoize
def verify_conv2d_hwcn(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1):
in_height = in_width = in_size
- A = tvm.placeholder((in_height, in_width, in_channel, batch), name='A')
- W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W')
- B = tvm.placeholder((1, num_filter, 1), name='bias')
+ A = te.placeholder((in_height, in_width, in_channel, batch), name='A')
+ W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W')
+ B = te.placeholder((1, num_filter, 1), name='bias')
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import FallbackConfigEntity
import topi
in_height = in_width = in_size
- A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='int8')
- W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W', dtype='int8')
- bias = tvm.placeholder((num_filter // oc_block_factor, 1, 1, oc_block_factor), name='bias',
+ A = te.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='int8')
+ W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W', dtype='int8')
+ bias = te.placeholder((num_filter // oc_block_factor, 1, 1, oc_block_factor), name='bias',
dtype='int8')
a_shape = get_const_tuple(A.shape)
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
import topi
import topi.testing
in_height = in_width = in_size
- A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
- W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W')
- bias = tvm.placeholder((num_filter, 1, 1), name='bias')
+ A = te.placeholder((batch, in_channel, in_height, in_width), name='A')
+ W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W')
+ bias = te.placeholder((num_filter, 1, 1), name='bias')
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
import os
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from tvm.contrib.pickle_memoize import memoize
def verify_conv2d_nhwc(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1):
in_height = in_width = in_size
- A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A')
- W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W')
+ A = te.placeholder((batch, in_height, in_width, in_channel), name='A')
+ W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W')
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import FallbackConfigEntity
import topi
def verify_conv2d_1x1_nhwc_pack_int8(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1):
in_height = in_width = in_size
- A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='uint8')
- W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W', dtype='int8')
+ A = te.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='uint8')
+ W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W', dtype='int8')
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
"""Test code for transposed convolution."""
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from tvm.contrib.pickle_memoize import memoize
stride_height, stride_width = stride
pad_top, pad_left, pad_bottom, pad_right = padding
- A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
- W = tvm.placeholder((in_channel, num_filter, kernel_height, kernel_width), name='W')
+ A = te.placeholder((batch, in_channel, in_height, in_width), name='A')
+ W = te.placeholder((in_channel, num_filter, kernel_height, kernel_width), name='W')
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import FallbackConfigEntity
import topi
in_height = in_width = in_size
- A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
- W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W')
- bias = tvm.placeholder((num_filter, 1, 1), name='bias')
+ A = te.placeholder((batch, in_channel, in_height, in_width), name='A')
+ W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W')
+ bias = te.placeholder((num_filter, 1, 1), name='bias')
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
import topi
import topi.testing
in_depth = in_height = in_width = in_size
- A = tvm.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A')
- W = tvm.placeholder((num_filter, in_channel, kernel, kernel, kernel), name='W')
- bias = tvm.placeholder((num_filter, 1, 1, 1), name='bias')
+ A = te.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A')
+ W = te.placeholder((num_filter, in_channel, kernel, kernel, kernel), name='W')
+ bias = te.placeholder((num_filter, 1, 1, 1), name='bias')
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
import os
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from tvm.contrib.pickle_memoize import memoize
else:
kernel_depth = kernel_height = kernel_width = kernel
- A = tvm.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A')
- W = tvm.placeholder((kernel_depth, kernel_height, kernel_width, in_channel, num_filter), name='W')
+ A = te.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A')
+ W = te.placeholder((kernel_depth, kernel_height, kernel_width, in_channel, num_filter), name='W')
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
# under the License.
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
import topi
import topi.testing
print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d, %d, %d)" % (batch, in_channel, in_size,
num_filter, kernel, stride, padding, dilation, deformable_groups, groups))
- A = tvm.placeholder((batch, in_channel, in_size, in_size), name='A')
+ A = te.placeholder((batch, in_channel, in_size, in_size), name='A')
out_size = (in_size - (kernel - 1) * dilation - 1 + 2 * padding) // stride + 1
- Offset = tvm.placeholder((batch, deformable_groups * kernel * kernel * 2, out_size, out_size), name='offset')
- W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W')
- bias = tvm.placeholder((num_filter, 1, 1), name='bias')
+ Offset = te.placeholder((batch, deformable_groups * kernel * kernel * 2, out_size, out_size), name='offset')
+ W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W')
+ bias = te.placeholder((num_filter, 1, 1), name='bias')
a_shape = get_const_tuple(A.shape)
offset_shape = get_const_tuple(Offset.shape)
"""Test code for dense operator"""
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from topi.util import get_const_tuple
}
def verify_dense(batch, in_dim, out_dim, use_bias=True):
- A = tvm.placeholder((batch, in_dim), name='A')
- B = tvm.placeholder((out_dim, in_dim), name='B')
- C = tvm.placeholder((out_dim,), name='C')
+ A = te.placeholder((batch, in_dim), name='A')
+ B = te.placeholder((out_dim, in_dim), name='B')
+ C = te.placeholder((out_dim,), name='C')
dtype = A.dtype
# use memoize to pickle the test data for next time use
def verify_dense_int8(batch, in_dim, out_dim, use_bias=True):
dtype = 'int8'
out_dtype = 'int32'
- A = tvm.placeholder((batch, in_dim), name='A', dtype=dtype)
- B = tvm.placeholder((out_dim, in_dim), name='B', dtype=dtype)
- C = tvm.placeholder((out_dim,), name='C', dtype=out_dtype)
+ A = te.placeholder((batch, in_dim), name='A', dtype=dtype)
+ B = te.placeholder((out_dim, in_dim), name='B', dtype=dtype)
+ C = te.placeholder((out_dim,), name='C', dtype=out_dtype)
# use memoize to pickle the test data for next time use
@memoize("topi.tests.test_topi_dense_int8")
"""Test code for depth to space"""
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
else:
raise NotImplementedError('Layout not supported {}'.format(layout))
- A = tvm.placeholder(in_shape, name='A', dtype='float32')
+ A = te.placeholder(in_shape, name='A', dtype='float32')
dtype = A.dtype
a_np = np.random.uniform(size=in_shape).astype(dtype)
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
from tvm import autotvm
import topi
import topi.testing
padding_args = padding
# placeholder
- Input = tvm.placeholder((batch, in_channel, in_height, in_width), name='Input')
- Filter = tvm.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter')
- Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale')
- Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift')
+ Input = te.placeholder((batch, in_channel, in_height, in_width), name='Input')
+ Filter = te.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter')
+ Scale = te.placeholder((in_channel * channel_multiplier,), name='Scale')
+ Shift = te.placeholder((in_channel * channel_multiplier,), name='Shift')
dtype = 'float32'
padding_args = padding
# placeholder
- Input = tvm.placeholder((batch, in_height, in_width, in_channel), name='Input')
- Filter = tvm.placeholder((filter_height, filter_width,filter_channel, channel_multiplier), name='Filter')
- Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale')
- Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift')
+ Input = te.placeholder((batch, in_height, in_width, in_channel), name='Input')
+ Filter = te.placeholder((filter_height, filter_width,filter_channel, channel_multiplier), name='Filter')
+ Scale = te.placeholder((in_channel * channel_multiplier,), name='Scale')
+ Shift = te.placeholder((in_channel * channel_multiplier,), name='Shift')
dtype = 'float32'
break
# placeholder
- Input = tvm.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='Input')
- Filter = tvm.placeholder((out_channel//oc_block, 1, filter_height, filter_width, 1, oc_block), name='Filter')
+ Input = te.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='Input')
+ Filter = te.placeholder((out_channel//oc_block, 1, filter_height, filter_width, 1, oc_block), name='Filter')
in_layout = "NCHW%dc" % ic_block
out_layout = "NCHW%dc" % oc_block
dtype = 'float32'
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import topi
import numpy as np
from tvm.contrib.pickle_memoize import memoize
oshape = [batch, out_h, out_w, out_channel]
# placeholder
- Out_grad = tvm.placeholder(oshape, name='Out_grad')
- Filter = tvm.placeholder((filter_h, filter_w, filter_channel, channel_multiplier))
+ Out_grad = te.placeholder(oshape, name='Out_grad')
+ Filter = te.placeholder((filter_h, filter_w, filter_channel, channel_multiplier))
# declare
In_grad = topi.nn.depthwise_conv2d_backward_input_nhwc(Filter, Out_grad, oshape, ishape,
stride=[stride_h, stride_w], padding=[padding_h, padding_w])
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import topi
import topi.testing
import numpy as np
fshape = [filter_h, filter_w, in_channel, channel_multiplier]
# placeholder
- Out_grad = tvm.placeholder(oshape, name='Out_grad')
- Input = tvm.placeholder((batch, in_h, in_w, in_channel), name='In_grad')
+ Out_grad = te.placeholder(oshape, name='Out_grad')
+ Input = te.placeholder((batch, in_h, in_w, in_channel), name='In_grad')
# declare
Weight_grad = topi.nn.depthwise_conv2d_backward_weight_nhwc(Input, Out_grad, oshape, fshape,
stride=[stride_h, stride_w], padding=[padding_h, padding_w])
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import topi
import topi.testing
import numpy as np
ctx = tvm.cpu(0)
def _test_dilate(input_size, strides):
- Input = tvm.placeholder((input_size))
+ Input = te.placeholder((input_size))
Output = topi.nn.dilate(Input, strides)
- schedule = tvm.create_schedule(Output.op)
+ schedule = te.create_schedule(Output.op)
input_np = np.random.uniform(size=input_size).astype(Input.dtype)
output_np = topi.testing.dilate_python(input_np, strides)
input_tvm = tvm.nd.array(input_np, ctx=ctx)
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import FallbackConfigEntity
import topi
in_height = in_width = in_size
- A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
- W = tvm.placeholder((num_filter, in_channel // groups, kernel, kernel), name='W')
- bias = tvm.placeholder((num_filter, 1, 1), name='bias')
+ A = te.placeholder((batch, in_channel, in_height, in_width), name='A')
+ W = te.placeholder((num_filter, in_channel // groups, kernel, kernel), name='W')
+ bias = te.placeholder((num_filter, 1, 1), name='bias')
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
in_height = in_width = in_size
- A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='int8')
- W = tvm.placeholder((num_filter, in_channel // groups, kernel, kernel), name='W', dtype='int8')
- bias = tvm.placeholder((num_filter // oc_block_factor, 1, 1, oc_block_factor), name='bias',
+ A = te.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='int8')
+ W = te.placeholder((num_filter, in_channel // groups, kernel, kernel), name='W', dtype='int8')
+ bias = te.placeholder((num_filter // oc_block_factor, 1, 1, oc_block_factor), name='bias',
dtype='int8')
a_shape = get_const_tuple(A.shape)
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
import topi
import topi.testing
ic_block = 8
autotvm.DispatchContext.current.silent = True
- A = tvm.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='A', dtype='uint8')
- W = tvm.placeholder((num_filter//oc_block, in_channel//ic_block//groups, kernel, kernel, ic_block//4, oc_block, 4), name='W', dtype='int8')
+ A = te.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='A', dtype='uint8')
+ W = te.placeholder((num_filter//oc_block, in_channel//ic_block//groups, kernel, kernel, ic_block//4, oc_block, 4), name='W', dtype='int8')
@memoize("topi.tests.test_topi_conv2d_NCHWc_int8.verify_conv2d_NCHWc_int8")
def get_ref_data():
"""Test code for bilinear scale """
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
def verify_resize(batch, in_channel, in_height, in_width, out_height, out_width,
layout='NCHW', coord_trans="align_corners", method="bilinear"):
if layout == 'NCHW':
- A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='float32')
+ A = te.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='float32')
dtype = A.dtype
out_shape = (batch, in_channel, out_height, out_width)
a_np = np.random.uniform(size=(batch, in_channel, in_height, in_width)).astype(dtype)
elif layout == 'NHWC':
- A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='float32')
+ A = te.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='float32')
dtype = A.dtype
out_shape = (batch, out_height, out_width, in_channel)
a_np = np.random.uniform(size=(batch, in_height, in_width, in_channel)).astype(dtype)
def verify_resize3d(batch, in_channel, in_depth, in_height, in_width, out_depth, out_height, out_width,
layout='NCDHW', coordinate_transformation_mode="half_pixel", method="trilinear"):
if layout == 'NCDHW':
- A = tvm.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A', dtype='float32')
+ A = te.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A', dtype='float32')
dtype = A.dtype
out_shape = (batch, in_channel, out_depth, out_height, out_width)
a_np = np.random.uniform(size=(batch, in_channel, in_depth, in_height, in_width)).astype(dtype)
elif layout == 'NDHWC':
- A = tvm.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A', dtype='float32')
+ A = te.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A', dtype='float32')
dtype = A.dtype
out_shape = (batch, out_depth, out_height, out_width, in_channel)
a_np = np.random.uniform(size=(batch, in_depth, in_height, in_width, in_channel)).astype(dtype)
def verify_crop_and_resize(image_shape, np_boxes, np_box_indices, np_crop_size, layout='NHWC',
method="bilinear", extrapolation_value=0.0):
- images = tvm.placeholder(image_shape, name='images', dtype='float32')
+ images = te.placeholder(image_shape, name='images', dtype='float32')
np_images = np.random.uniform(size=image_shape).astype("float32")
- boxes = tvm.placeholder(np_boxes.shape, name="boxes", dtype="float32")
- box_ind = tvm.placeholder(np_box_indices.shape, name="box_ind", dtype="int32")
+ boxes = te.placeholder(np_boxes.shape, name="boxes", dtype="float32")
+ box_ind = te.placeholder(np_box_indices.shape, name="box_ind", dtype="int32")
batch = len(np_box_indices)
target_height, target_width = np_crop_size[0], np_crop_size[1]
"""Test code for local response normalization"""
import numpy as np
import tvm
+from tvm import te
import topi
from topi.util import get_const_tuple
import topi.testing
}
def verify_lrn(shape, size, axis, bias, alpha, beta):
- A = tvm.placeholder(shape, name='A')
+ A = te.placeholder(shape, name='A')
B = topi.nn.lrn(A, size, axis, alpha, beta, bias)
dtype = A.dtype
import numpy as np
import scipy
import tvm
+from tvm import te
import topi
import topi.testing
from topi import util
def test_util():
- x = tvm.const(100, "int32")
+ x = tvm.tir.const(100, "int32")
assert util.get_const_int(x) == 100
assert util.get_const_tuple((x, x)) == (100, 100)
low,
high,
shape=(20, 3),
- dtype=tvm.float32,
+ dtype="float32",
check_round=False,
skip_name_check=False,
):
- m = tvm.var("m")
- l = tvm.var("l")
- A = tvm.placeholder((m, l), dtype=dtype, name="A")
+ m = te.var("m")
+ l = te.var("l")
+ A = te.placeholder((m, l), dtype=dtype, name="A")
B = func(A)
assert tuple(B.shape) == tuple(A.shape)
low,
high,
shape=(20, 3),
- dtype=tvm.float32,
+ dtype="float32",
check_round=False,
skip_name_check=False,
):
- m = tvm.var("m")
- l = tvm.var("l")
- A = tvm.placeholder((m, l), dtype=dtype, name="A")
+ m = te.var("m")
+ l = te.var("l")
+ A = te.placeholder((m, l), dtype=dtype, name="A")
B = topi.isnan(A)
assert tuple(B.shape) == tuple(A.shape)
def test_cast():
def verify(from_dtype, to_dtype, low=-100, high=100):
shape = (5, 4)
- A = tvm.placeholder(shape, dtype=from_dtype, name="A")
+ A = te.placeholder(shape, dtype=from_dtype, name="A")
B = topi.cast(A, to_dtype)
if from_dtype == "bool":
low,
high,
step,
- dtype=tvm.float32
+ dtype="float32"
):
a_np = np.arange(low, high, step).astype(dtype)
b_np = f_numpy(a_np)
- A = tvm.placeholder(a_np.shape, dtype=dtype, name="A")
+ A = te.placeholder(a_np.shape, dtype=dtype, name="A")
B = func(A)
assert tuple(B.shape) == tuple(A.shape)
# under the License.
import numpy as np
import tvm
+from tvm import te
import topi
from topi.util import get_const_tuple
pls = [] # placeholders
vals_nd = [] # initial values
for i,arg in enumerate(args):
- pls.append(tvm.placeholder(arg.shape, name='pl'+str(i)))
+ pls.append(te.placeholder(arg.shape, name='pl'+str(i)))
vals_nd.append(tvm.nd.array(arg, ctx))
out = lam(*pls)
out_nd = tvm.nd.array(np.zeros(get_const_tuple(out.shape), dtype=out.dtype), ctx)
- s = tvm.create_schedule([out.op])
+ s = te.create_schedule([out.op])
m = tvm.build(s, pls + [out], "llvm")
m(*(vals_nd+[out_nd]))
return out_nd.asnumpy()
import math
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from topi.util import get_const_tuple
sw = sh
pt, pl, pb, pr = padding
layout = "NCHW"
- A = tvm.placeholder((n, ic, ih, iw), name='A')
+ A = te.placeholder((n, ic, ih, iw), name='A')
B = topi.nn.pool(A, kernel=[kh, kw], stride=[sh, sw], padding=padding,
pool_type=pool_type, ceil_mode=ceil_mode,
layout="NCHW", count_include_pad=count_include_pad)
sw = sh
pt, pl, pb, pr = padding
layout = "NCHW"
- A = tvm.placeholder((n, ic, ih, iw), name='A')
+ A = te.placeholder((n, ic, ih, iw), name='A')
B = topi.nn.pool(A, kernel=[kh, kw], stride=[sh, sw], padding=padding,
pool_type=pool_type, ceil_mode=ceil_mode,
layout="NCHW", count_include_pad=count_include_pad)
else:
assert bshape[2] == int(math.floor(float(ashape[2] - kh + pt + pb) / sh) + 1)
assert bshape[3] == int(math.floor(float(ashape[3] - kw + pl + pr) / sw) + 1)
- OutGrad = tvm.placeholder(bshape, name='OutGrad')
+ OutGrad = te.placeholder(bshape, name='OutGrad')
PoolGrad = topi.nn.pool_grad(OutGrad, A, kernel=[kh, kw], stride=[sh, sw], padding=padding,
pool_type=pool_type, ceil_mode=ceil_mode,
layout="NCHW", count_include_pad=count_include_pad)
def verify_global_pool(n, c, h, w, pool_type, layout='NCHW'):
assert layout in ["NCHW", "NHWC"]
- A = tvm.placeholder((n, c, h, w), name='A')
+ A = te.placeholder((n, c, h, w), name='A')
B = topi.nn.global_pool(A, pool_type=pool_type, layout=layout)
B = topi.nn.relu(B)
l_sl = slice(l_start, l_end)
np_out[i, j, k, l] = np_op(np_data[i, j, k_sl, l_sl])
- data = tvm.placeholder(dshape, name="data", dtype=dtype)
+ data = te.placeholder(dshape, name="data", dtype=dtype)
out = topi.nn.adaptive_pool(data, out_size, pool_type, layout)
def check_device(device):
ctx = tvm.context(device, 0)
input_shape = (n, ic, id, ih, iw)
kernel = [kd, kh, kw]
stride = [sd, sh, sw]
- A = tvm.placeholder(input_shape, name='A')
+ A = te.placeholder(input_shape, name='A')
B = topi.nn.pool3d(A, kernel=kernel, stride=stride, padding=padding,
pool_type=pool_type, ceil_mode=ceil_mode,
layout=layout, count_include_pad=count_include_pad)
input_shape = (n, ic, iw)
kernel = [kw]
stride = [sw]
- A = tvm.placeholder(input_shape, name='A')
+ A = te.placeholder(input_shape, name='A')
B = topi.nn.pool1d(A, kernel=kernel, stride=stride, padding=padding,
pool_type=pool_type, ceil_mode=ceil_mode,
layout=layout, count_include_pad=count_include_pad)
import os
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum", dtype="float32"):
# Build the logic and compile the function
- A = tvm.placeholder(shape=in_shape, name="A", dtype=dtype)
+ A = te.placeholder(shape=in_shape, name="A", dtype=dtype)
A1 = topi.sqrt(topi.exp(A))
out_dtype = dtype
if type == "sum":
import os
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from topi.util import get_const_tuple
from common import get_all_backend
def verify_relu(m, n, dtype="float32"):
- A = tvm.placeholder((m, n), name='A', dtype=dtype)
+ A = te.placeholder((m, n), name='A', dtype=dtype)
B = topi.nn.relu(A)
a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype)
def verify_leaky_relu(m, alpha):
- A = tvm.placeholder((m,), name='A')
+ A = te.placeholder((m,), name='A')
B = topi.nn.leaky_relu(A, alpha)
- s = tvm.create_schedule([B.op])
+ s = te.create_schedule([B.op])
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = a_np * (a_np > 0) + a_np * (a_np < 0) * alpha
def verify_prelu(x, w, axis, weight_reshape):
- X = tvm.placeholder((x), name='X')
- W = tvm.placeholder((w), name='W')
+ X = te.placeholder((x), name='X')
+ W = te.placeholder((w), name='W')
x_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(X.shape)).astype(X.dtype)
w_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(W.shape)).astype(W.dtype)
return (x < 0) * (x *W.reshape(weight_reshape)) + (x>=0) * x
B = topi.nn.prelu(X, W, axis)
- s = tvm.create_schedule([B.op])
+ s = te.create_schedule([B.op])
ctx = tvm.cpu(0)
x_tvm = tvm.nd.array(x_np, ctx)
import topi
from topi.util import get_const_tuple
import tvm
+from tvm import te
import topi.testing
_reorg_schedule = {
'''Verify reorg operator by comparing outputs from tvm and numpy implementation'''
in_height = in_width = in_size
- A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
+ A = te.placeholder((batch, in_channel, in_height, in_width), name='A')
B = topi.vision.reorg(A, stride)
a_shape = get_const_tuple(A.shape)
import os
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
import logging
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
def verify_softmax(m, n, dtype="float32"):
- A = tvm.placeholder((m, n), dtype=dtype, name='A')
+ A = te.placeholder((m, n), dtype=dtype, name='A')
B = topi.nn.softmax(A)
# confirm lower works
- s = tvm.create_schedule([B.op])
+ s = te.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
check_device(A, B, a_np, b_np, device, "softmax")
def verify_softmax_4d(shape, dtype="float32"):
- A = tvm.placeholder(shape, dtype=dtype, name='A')
+ A = te.placeholder(shape, dtype=dtype, name='A')
B = topi.nn.softmax(A, axis=1)
_, c, h, w = shape
verify_softmax_4d((1, 16, 256, 256))
def verify_log_softmax(m, n, dtype="float32"):
- A = tvm.placeholder((m, n), dtype=dtype, name='A')
+ A = te.placeholder((m, n), dtype=dtype, name='A')
B = topi.nn.log_softmax(A)
# confirm lower works
- s = tvm.create_schedule([B.op])
+ s = te.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = topi.testing.log_softmax_python(a_np)
from __future__ import print_function
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
def verify_argsort(axis, is_ascend):
dshape = (20, 100)
data_dtype = "float32"
- data = tvm.placeholder(dshape, name="data", dtype=data_dtype)
+ data = te.placeholder(dshape, name="data", dtype=data_dtype)
perm = np.arange(dshape[0] * dshape[1], dtype=data_dtype)
np.random.shuffle(perm)
def verify_topk(k, axis, ret_type, is_ascend, dtype):
shape = (20, 100)
data_dtype = "float32"
- data = tvm.placeholder(shape, name="data", dtype=data_dtype)
+ data = te.placeholder(shape, name="data", dtype=data_dtype)
np_data = np.random.uniform(size=shape).astype(data_dtype)
if is_ascend:
"""Test code for space to depth"""
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
else:
raise NotImplementedError('Layout not supported {}'.format(layout))
- A = tvm.placeholder(in_shape, name='A', dtype='float32')
+ A = te.placeholder(in_shape, name='A', dtype='float32')
dtype = A.dtype
a_np = np.random.uniform(size=in_shape).astype(dtype)
"""Test code for sparse operator"""
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from topi.util import get_const_tuple
import scipy.sparse as sp
def verify_dynamic_csrmv(batch, in_dim, out_dim, use_bias=True):
- nr, nc, n = tvm.var("nr"), tvm.var("nc"), tvm.var("n")
+ nr, nc, n = te.var("nr"), te.var("nc"), te.var("n")
dtype = 'float32'
A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, dtype=dtype, name='A')
- B = tvm.placeholder((in_dim, 1), name='B')
- C = tvm.placeholder((nr,), name='C')
+ B = te.placeholder((in_dim, 1), name='B')
+ C = te.placeholder((nr,), name='C')
D = topi.sparse.csrmv(A, B, C if use_bias else None)
- s = tvm.create_schedule(D.op)
+ s = te.create_schedule(D.op)
dtype = A.dtype
# get the test data
check_device(device)
def verify_dynamic_csrmm(batch, in_dim, out_dim, use_bias=True):
- nr, nc, n = tvm.var("nr"), tvm.var("nc"), tvm.var("n")
+ nr, nc, n = te.var("nr"), te.var("nc"), te.var("n")
dtype = 'float32'
A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, dtype=dtype, name='A')
- B = tvm.placeholder((in_dim, out_dim), name='B')
- C = tvm.placeholder((nr,), name='C')
+ B = te.placeholder((in_dim, out_dim), name='B')
+ C = te.placeholder((nr,), name='C')
D = topi.sparse.csrmm(A, B, C if use_bias else None)
- s = tvm.create_schedule(D.op)
+ s = te.create_schedule(D.op)
dtype = A.dtype
# get the test data
check_device(device)
def verify_dense_si(batch, in_dim, out_dim, use_bias=True, dtype='float32'):
- nonzeros = tvm.var('nonzeros')
+ nonzeros = te.var('nonzeros')
A = tvmsp.placeholder(shape=(batch, in_dim), nonzeros=nonzeros, dtype=dtype, name='A')
- B = tvm.placeholder((out_dim, in_dim), dtype=dtype, name='B')
- C = tvm.placeholder((out_dim,), dtype=dtype, name='C')
+ B = te.placeholder((out_dim, in_dim), dtype=dtype, name='B')
+ C = te.placeholder((out_dim,), dtype=dtype, name='C')
D = topi.sparse.dense(A, B, C if use_bias else None)
- s = tvm.create_schedule(D.op)
+ s = te.create_schedule(D.op)
# get the test data
def get_ref_data():
check_device('llvm')
def verify_dense_sw(batch, in_dim, out_dim, use_bias=True, dtype='float32'):
- nonzeros = tvm.var('nonzeros')
- A = tvm.placeholder((batch, in_dim), dtype=dtype, name='A')
+ nonzeros = te.var('nonzeros')
+ A = te.placeholder((batch, in_dim), dtype=dtype, name='A')
B = tvmsp.placeholder(shape=(out_dim, in_dim), nonzeros=nonzeros, dtype=dtype, name='B')
- C = tvm.placeholder((out_dim,), dtype=dtype, name='C')
+ C = te.placeholder((out_dim,), dtype=dtype, name='C')
D = topi.sparse.dense(A, B, C if use_bias else None)
- s = tvm.create_schedule(D.op)
+ s = te.create_schedule(D.op)
# get the test data
def get_ref_data():
W_np = W_sp_np.todense()
Y_np = X_np.dot(W_np.T)
- W_data = tvm.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype))
- W_indices = tvm.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype))
- W_indptr = tvm.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype))
- X = tvm.placeholder(shape=X_np.shape, dtype=str(X_np.dtype))
+ W_data = te.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype))
+ W_indices = te.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype))
+ W_indptr = te.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype))
+ X = te.placeholder(shape=X_np.shape, dtype=str(X_np.dtype))
Y = topi.nn.sparse_dense(X, W_data, W_indices, W_indptr)
- s = tvm.create_schedule(Y.op)
+ s = te.create_schedule(Y.op)
func = tvm.build(s, [X, W_data, W_indices, W_indptr, Y])
Y_tvm = tvm.nd.array(np.zeros(Y_np.shape, dtype=Y_np.dtype))
func(tvm.nd.array(X_np), tvm.nd.array(W_sp_np.data), tvm.nd.array(W_sp_np.indices), tvm.nd.array(W_sp_np.indptr), Y_tvm)
X_sp_T = X_sp.transpose()
X_np_T = X_sp_T.todense()
- X_data = tvm.placeholder(shape=X_sp.data.shape, dtype=str(X_sp.data.dtype))
- X_indices = tvm.placeholder(shape=X_sp.indices.shape, dtype=str(X_sp.indices.dtype))
- X_indptr = tvm.placeholder(shape=X_sp.indptr.shape, dtype=str(X_sp.indptr.dtype))
+ X_data = te.placeholder(shape=X_sp.data.shape, dtype=str(X_sp.data.dtype))
+ X_indices = te.placeholder(shape=X_sp.indices.shape, dtype=str(X_sp.indices.dtype))
+ X_indptr = te.placeholder(shape=X_sp.indptr.shape, dtype=str(X_sp.indptr.dtype))
X_T_data, X_T_indices, X_T_indptr = topi.nn.sparse_transpose(X_data, X_indices, X_indptr)
- s = tvm.create_schedule([X_T_data.op, X_T_indices.op, X_T_indptr.op])
+ s = te.create_schedule([X_T_data.op, X_T_indices.op, X_T_indptr.op])
func = tvm.build(s, [X_data, X_indices, X_indptr, X_T_data, X_T_indices, X_T_indptr])
W_np = W_sp_np.todense()
Y_np = X_np.dot(W_np.T)
- W_data = tvm.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype))
- W_indices = tvm.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype))
- W_indptr = tvm.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype))
- X = tvm.placeholder(shape=X_np.shape, dtype=str(X_np.dtype))
+ W_data = te.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype))
+ W_indices = te.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype))
+ W_indptr = te.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype))
+ X = te.placeholder(shape=X_np.shape, dtype=str(X_np.dtype))
Y = topi.nn.sparse_dense(X, W_data, W_indices, W_indptr)
- s = tvm.create_schedule(Y.op)
+ s = te.create_schedule(Y.op)
func = tvm.build(s, [X, W_data, W_indices, W_indptr, Y])
Y_tvm = tvm.nd.array(np.zeros(Y_np.shape, dtype=Y_np.dtype))
func(tvm.nd.array(X_np),
W_np = W_sp_np.todense()
Y_np = np.array(X_np.dot(W_np.T))
- W_data = tvm.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype))
- W_indices = tvm.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype))
- W_indptr = tvm.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype))
- X = tvm.placeholder(shape=X_np.shape, dtype=str(X_np.dtype))
+ W_data = te.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype))
+ W_indices = te.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype))
+ W_indptr = te.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype))
+ X = te.placeholder(shape=X_np.shape, dtype=str(X_np.dtype))
Y = topi.nn.sparse_dense(X, W_data, W_indices, W_indptr)
- s = tvm.create_schedule(Y.op)
+ s = te.create_schedule(Y.op)
func = tvm.build(s, [X, W_data, W_indices, W_indptr, Y])
Y_tvm = tvm.nd.array(np.zeros(Y_np.shape, dtype=Y_np.dtype))
func(tvm.nd.array(X_np),
"""Test code for tensor operator"""
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from tvm.contrib.pickle_memoize import memoize
tvm_placeholders = []
for i in range(num_args):
tvm_placeholders.append(
- tvm.placeholder(shape, name="data"+str(i), dtype=dtype))
+ te.placeholder(shape, name="data"+str(i), dtype=dtype))
esum = topi.elemwise_sum(tvm_placeholders)
- s = tvm.create_schedule([esum.op])
+ s = te.create_schedule([esum.op])
@memoize("topi.tests.test_topi_elemwise_sum")
def get_ref_data():
def verify_full(shape, dtype, fill_value):
- A = tvm.placeholder(shape, dtype=dtype, name="A")
+ A = te.placeholder(shape, dtype=dtype, name="A")
B = topi.full_like(A, fill_value=fill_value)
C = topi.full(shape=shape, dtype=dtype, fill_value=fill_value)
- s1 = tvm.create_schedule([B.op])
- s2 = tvm.create_schedule([C.op])
+ s1 = te.create_schedule([B.op])
+ s2 = te.create_schedule([C.op])
@memoize("topi.tests.test_topi_full")
def get_ref_data():
return
with tvm.target.create(device):
ctx = tvm.context(device, 0)
- A = tvm.placeholder((n, m), name='A', dtype=dtype)
- B = tvm.compute((n, m), lambda i, j:
- A[i, j] + tvm.const(1, A.dtype), name='B')
+ A = te.placeholder((n, m), name='A', dtype=dtype)
+ B = te.compute((n, m), lambda i, j:
+ A[i, j] + tvm.tir.const(1, A.dtype), name='B')
S = topi.testing.get_elemwise_schedule(device)(B)
fun = tvm.build(S, [A, B], device)
"""Test code for broadcasting operators."""
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
from tvm.contrib.nvcc import have_fp16
from common import get_all_backend
def verify_expand_dims(in_shape, out_shape, axis, num_newaxis):
- A = tvm.placeholder(shape=in_shape, name="A")
+ A = te.placeholder(shape=in_shape, name="A")
B = topi.expand_dims(A, axis, num_newaxis)
def check_device(device):
ctx = tvm.context(device, 0)
def verify_reinterpret(in_shape, in_dtype, out_dtype, generator):
- A = tvm.placeholder(shape=in_shape, name="A", dtype=in_dtype)
+ A = te.placeholder(shape=in_shape, name="A", dtype=in_dtype)
B = topi.reinterpret(A, out_dtype)
def check_device(device):
ctx = tvm.context(device, 0)
def verify_transpose(in_shape, axes):
- A = tvm.placeholder(shape=in_shape, name="A")
+ A = te.placeholder(shape=in_shape, name="A")
B = topi.transpose(A, axes)
def check_device(device):
ctx = tvm.context(device, 0)
def verify_reshape(src_shape, dst_shape):
- A = tvm.placeholder(shape=src_shape, name="A")
+ A = te.placeholder(shape=src_shape, name="A")
B = topi.reshape(A, dst_shape)
def check_device(device):
ctx = tvm.context(device, 0)
def verify_squeeze(src_shape, axis):
- A = tvm.placeholder(shape=src_shape, name="A")
+ A = te.placeholder(shape=src_shape, name="A")
B = topi.squeeze(A, axis=axis)
def check_device(device):
ctx = tvm.context(device, 0)
tensor_l = []
for i, shape in enumerate(shapes):
- tensor_l.append(tvm.placeholder(shape, name="A" + str(i)))
+ tensor_l.append(te.placeholder(shape, name="A" + str(i)))
out_tensor = topi.concatenate(a_tuple=tensor_l, axis=axis)
def check_device(device):
ctx = tvm.context(device, 0)
def verify_stack(shapes, axis):
tensor_l = []
for i, shape in enumerate(shapes):
- tensor_l.append(tvm.placeholder(shape, name="A" + str(i)))
+ tensor_l.append(te.placeholder(shape, name="A" + str(i)))
out_tensor = topi.stack(tensor_l, axis)
def check_device(device):
ctx = tvm.context(device, 0)
def verify_split(src_shape, indices_or_sections, axis):
- A = tvm.placeholder(shape=src_shape, name="A")
+ A = te.placeholder(shape=src_shape, name="A")
tensor_l = topi.split(A, indices_or_sections, axis=axis)
def check_device(device):
ctx = tvm.context(device, 0)
def verify_expand_like(in_shape, out_shape, axis):
- A = tvm.placeholder(shape=in_shape, name="A")
- B = tvm.placeholder(shape=out_shape, name="B")
+ A = te.placeholder(shape=in_shape, name="A")
+ B = te.placeholder(shape=out_shape, name="B")
C = topi.expand_like(A, B, axis)
- s = tvm.create_schedule([C.op])
+ s = te.create_schedule([C.op])
def check_device(device):
if not tvm.runtime.enabled(device):
check_device(device)
def verify_flip(in_shape, axis):
- A = tvm.placeholder(shape=in_shape, name="A")
+ A = te.placeholder(shape=in_shape, name="A")
B = topi.flip(A, axis) + 1
def check_device(device):
ctx = tvm.context(device, 0)
src_dtype = "float32"
indices_dtype = "int32"
indices_src = np.array(indices_src, dtype=indices_dtype)
- A = tvm.placeholder(shape=src_shape, dtype=src_dtype, name="A")
- indices = tvm.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices")
+ A = te.placeholder(shape=src_shape, dtype=src_dtype, name="A")
+ indices = te.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices")
if axis is None:
out_tensor = topi.take(a=A, indices=indices, mode=mode)
else:
check_device(device)
def verify_strided_slice(in_shape, begin, end, strides=None):
- A = tvm.placeholder(shape=in_shape, name="A")
+ A = te.placeholder(shape=in_shape, name="A")
strides = [1,1,1] if strides is None else strides
B = topi.strided_slice(A, begin, end, strides) + 1
check_device(device)
def verify_strided_set(in_shape, v_shape, begin, end, strides=None):
- A = tvm.placeholder(shape=in_shape, name="A")
- V = tvm.placeholder(shape=v_shape, name="V")
- b = tvm.placeholder(shape=(len(begin),), name="b", dtype='int32')
- e = tvm.placeholder(shape=(len(end),), name="e", dtype='int32')
+ A = te.placeholder(shape=in_shape, name="A")
+ V = te.placeholder(shape=v_shape, name="V")
+ b = te.placeholder(shape=(len(begin),), name="b", dtype='int32')
+ e = te.placeholder(shape=(len(end),), name="e", dtype='int32')
if strides is not None:
- st = tvm.placeholder(shape=(len(strides),), name="st", dtype='int32')
+ st = te.placeholder(shape=(len(strides),), name="st", dtype='int32')
B = topi.strided_set(A, V, b, e, st) + 1
else:
B = topi.strided_set(A, V, b, e) + 1
def verify_gather_nd(src_shape, indices_src, indices_dtype):
src_dtype = "float32"
indices_src = np.array(indices_src, dtype=indices_dtype)
- A = tvm.placeholder(shape=src_shape, dtype=src_dtype, name="A")
- indices = tvm.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices")
+ A = te.placeholder(shape=src_shape, dtype=src_dtype, name="A")
+ indices = te.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices")
out_tensor = topi.gather_nd(a=A, indices=indices)
def check_device(device):
check_device(device)
def verify_repeat(in_shape, repeats, axis):
- A = tvm.placeholder(shape=in_shape, name="A")
+ A = te.placeholder(shape=in_shape, name="A")
B = topi.repeat(A, repeats, axis)
def check_device(device):
ctx = tvm.context(device, 0)
check_device(device)
def verify_tile(in_shape, reps):
- A = tvm.placeholder(shape=in_shape, name="A")
+ A = te.placeholder(shape=in_shape, name="A")
B = topi.tile(A, reps)
def check_device(device):
ctx = tvm.context(device, 0)
check_device(device)
def verify_where(in_shape):
- Cond = tvm.placeholder(shape=in_shape, name="cond")
+ Cond = te.placeholder(shape=in_shape, name="cond")
dtype = Cond.dtype
- A = tvm.placeholder(shape=in_shape, name="A")
- B = tvm.placeholder(shape=in_shape, name="B")
+ A = te.placeholder(shape=in_shape, name="A")
+ B = te.placeholder(shape=in_shape, name="B")
C = topi.where(Cond, A, B)
def check_device(device):
ctx = tvm.context(device, 0)
check_device(device)
def verify_one_hot(indices_shape, depth, on_value, off_value, axis, dtype):
- indices = tvm.placeholder(shape=indices_shape, name="indices", dtype="int32")
- on_value_const = tvm.const(on_value, dtype)
- off_value_const = tvm.const(off_value, dtype)
+ indices = te.placeholder(shape=indices_shape, name="indices", dtype="int32")
+ on_value_const = tvm.tir.const(on_value, dtype)
+ off_value_const = tvm.tir.const(off_value, dtype)
one_hot_result = topi.transform.one_hot(indices, on_value_const, off_value_const, depth, axis, dtype)
def check_device(device):
ctx = tvm.context(device, 0)
verify_squeeze((1, 1, 1, 1), None)
# a special case to trigger inline let expression
- A = tvm.placeholder((2,), 'float32', 'A')
+ A = te.placeholder((2,), 'float32', 'A')
E = topi.squeeze(A)
- C = tvm.compute((1,), lambda i: E[(2 * A[0] - 1).astype('int32')])
+ C = te.compute((1,), lambda i: E[(2 * A[0] - 1).astype('int32')])
for device in ['cuda', 'opencl']:
ctx = tvm.context(device, 0)
if ctx.exist:
def test_layout_transform():
in_shape = (1, 32, 8, 8)
- A = tvm.placeholder(shape=in_shape, dtype="float32", name="A")
+ A = te.placeholder(shape=in_shape, dtype="float32", name="A")
B = topi.layout_transform(A, "NCHW", "NCHW16c")
input = np.random.uniform(size=in_shape).astype(A.dtype)
def test_shape():
in_shape = (8, 7, 13)
dtype = "int32"
- A = tvm.placeholder(shape=in_shape, dtype="float32", name="A")
+ A = te.placeholder(shape=in_shape, dtype="float32", name="A")
B = topi.shape(A, dtype)
input = np.random.uniform(size=in_shape).astype(A.dtype)
for mask_value in [0.0, 1.0]:
max_length = in_shape[axis]
batch_size = in_shape[1 - axis]
- A = tvm.placeholder(shape=in_shape, dtype="float32", name="A")
- B = tvm.placeholder(shape=(batch_size,), dtype="int32", name="B")
+ A = te.placeholder(shape=in_shape, dtype="float32", name="A")
+ B = te.placeholder(shape=(batch_size,), dtype="int32", name="B")
C = topi.sequence_mask(A, B, axis=axis, mask_value=mask_value)
A_data = np.random.normal(0, 1, in_shape).astype(np.float32)
B_data = np.random.randint(1, max_length, (batch_size,)).astype(np.int32)
def test_ndarray_size():
in_shape = (5, 11, 7)
dtype = "int32"
- A = tvm.placeholder(shape=in_shape, dtype="float32", name="A")
+ A = te.placeholder(shape=in_shape, dtype="float32", name="A")
B = topi.ndarray_size(A, dtype)
input = np.random.uniform(size=in_shape).astype(A.dtype)
return
print("Running on target: %s" % device)
conv2d_compute, conv2d_schedule = topi.testing.get_conv2d_nchw_implement(device)
- data = tvm.placeholder((2, 1, 2, 4), 'int8', 'data')
- w = tvm.placeholder((3, 1, 2, 2), 'int8', 'w')
+ data = te.placeholder((2, 1, 2, 4), 'int8', 'data')
+ w = te.placeholder((3, 1, 2, 2), 'int8', 'w')
conv1 = conv2d_compute(data, w, 1, 0, 1, 'int32')
- zeros = topi.full((2, 3, 1, 3), 'int32', tvm.const(0, dtype='int32'))
+ zeros = topi.full((2, 3, 1, 3), 'int32', tvm.tir.const(0, dtype='int32'))
gt = topi.greater_equal(conv1, zeros)
- one = topi.full((2, 3, 1, 3), 'int32', tvm.const(1, dtype='int32'))
- two = topi.full((2, 3, 1, 3), 'int32', tvm.const(2, dtype='int32'))
+ one = topi.full((2, 3, 1, 3), 'int32', tvm.tir.const(1, dtype='int32'))
+ two = topi.full((2, 3, 1, 3), 'int32', tvm.tir.const(2, dtype='int32'))
where = topi.where(gt, one, two)
add = topi.add(conv1, where)
outs = [add]
"""Test code for upsampling"""
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
import math
layout='NCHW', method="nearest_neighbor",
in_batch_block = 0, in_channel_block = 0):
if layout == 'NCHW':
- A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
+ A = te.placeholder((batch, in_channel, in_height, in_width), name='A')
dtype = A.dtype
out_shape = (batch, in_channel, int(round(in_height*scale_h)), int(round(in_width*scale_w)))
a_np = np.random.uniform(size=(batch, in_channel, in_height, in_width)).astype(dtype)
elif nchw_pack_layout(layout):
- A = tvm.placeholder((batch, in_channel, in_height, in_width, in_batch_block, in_channel_block),
+ A = te.placeholder((batch, in_channel, in_height, in_width, in_batch_block, in_channel_block),
name='A')
dtype = A.dtype
out_shape = (batch, in_channel, int(round(in_height*scale_h)), int(round(in_width*scale_w)),
a_np = np.random.uniform(size=(batch, in_channel, in_height, in_width,
in_batch_block, in_channel_block)).astype(dtype)
elif layout == 'NHWC':
- A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A')
+ A = te.placeholder((batch, in_height, in_width, in_channel), name='A')
dtype = A.dtype
out_shape = (batch, int(round(in_height*scale_h)), int(round(in_width*scale_w)), in_channel)
a_np = np.random.uniform(size=(batch, in_height, in_width, in_channel)).astype(dtype)
def verify_upsampling3d(batch, in_channel, in_depth, in_height, in_width, scale_d, scale_h, scale_w,
layout='NCDHW', method="nearest_neighbor"):
if layout == 'NCDHW':
- A = tvm.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A')
+ A = te.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A')
dtype = A.dtype
out_shape = (batch, in_channel, int(round(in_depth*scale_d)), int(round(in_height*scale_h)),
int(round(in_width*scale_w)))
a_np = np.random.uniform(size=(batch, in_channel, in_depth, in_height, in_width)).astype(dtype)
elif layout == 'NDHWC':
- A = tvm.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A')
+ A = te.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A')
dtype = A.dtype
out_shape = (batch, int(round(in_depth*scale_d)), int(round(in_height*scale_h)),
int(round(in_width*scale_w)), in_channel)
import math
import numpy as np
import tvm
+from tvm import te
import topi
import topi.testing
print("Running on target: %s" % device)
with tvm.target.create(device):
fcompute, fschedule = topi.testing.dispatch(device, _get_valid_counts_implement)
- data = tvm.placeholder(dshape, name="data", dtype=dtype)
+ data = te.placeholder(dshape, name="data", dtype=dtype)
outs = fcompute(data, score_threshold, id_index, score_index)
s = fschedule(outs)
dshape = np_data.shape
batch, num_anchors, _ = dshape
indices_dshape = (batch, num_anchors)
- data = tvm.placeholder(dshape, name="data")
- valid_count = tvm.placeholder((batch,), dtype="int32", name="valid_count")
+ data = te.placeholder(dshape, name="data")
+ valid_count = te.placeholder((batch,), dtype="int32", name="valid_count")
def check_device(device):
ctx = tvm.context(device, 0)
def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5, 0.5), clip=False):
- data = tvm.placeholder(dshape, name="data")
+ data = te.placeholder(dshape, name="data")
dtype = data.dtype
input_data = np.random.uniform(size=dshape).astype(dtype)
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
-
+
fcompute, fschedule = topi.testing.dispatch(device, _multibox_prior_implement)
with tvm.target.create(device):
out = fcompute(data, sizes, ratios, steps, offsets, clip)
batch_size = 1
num_anchors = 3
num_classes = 3
- cls_prob = tvm.placeholder((batch_size, num_anchors, num_classes), name="cls_prob")
- loc_preds = tvm.placeholder((batch_size, num_anchors * 4), name="loc_preds")
- anchors = tvm.placeholder((1, num_anchors, 4), name="anchors")
+ cls_prob = te.placeholder((batch_size, num_anchors, num_classes), name="cls_prob")
+ loc_preds = te.placeholder((batch_size, num_anchors * 4), name="loc_preds")
+ anchors = te.placeholder((1, num_anchors, 4), name="anchors")
# Manually create test case
np_cls_prob = np.array([[[0.2, 0.5, 0.3], [0.25, 0.3, 0.45], [0.7, 0.1, 0.2]]])
a_shape = (batch, in_channel, in_size, in_size)
rois_shape = (num_roi, 5)
- a = tvm.placeholder(a_shape)
- rois = tvm.placeholder(rois_shape)
+ a = te.placeholder(a_shape)
+ rois = te.placeholder(rois_shape)
@memoize("topi.tests.test_topi_vision.verify_roi_align")
def get_ref_data():
a_shape = (batch, in_channel, in_size, in_size)
rois_shape = (num_roi, 5)
- a = tvm.placeholder(a_shape)
- rois = tvm.placeholder(rois_shape)
+ a = te.placeholder(a_shape)
+ rois = te.placeholder(rois_shape)
@memoize("topi.tests.test_topi_vision.verify_roi_pool")
def get_ref_data():
def verify_proposal(np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs):
- cls_prob = tvm.placeholder(np_cls_prob.shape)
- bbox_pred = tvm.placeholder(np_bbox_pred.shape)
- im_info = tvm.placeholder(np_im_info.shape)
+ cls_prob = te.placeholder(np_cls_prob.shape)
+ bbox_pred = te.placeholder(np_bbox_pred.shape)
+ im_info = te.placeholder(np_im_info.shape)
def check_device(device):
ctx = tvm.context(device, 0)
import numpy as np
import tvm
+from tvm import te
import topi
from topi.testing import conv2d_nchw_python
def conv2d_no_batching(N, H, W, CO, CI, KH, KW, stride, padding):
assert N == 1, "Only consider batch_size = 1 in this template"
- data = tvm.placeholder((N, CI, H, W), name='data')
- kernel = tvm.placeholder((CO, CI, KH, KW), name='kernel')
+ data = te.placeholder((N, CI, H, W), name='data')
+ kernel = te.placeholder((CO, CI, KH, KW), name='kernel')
conv = topi.nn.conv2d_nchw(data, kernel, stride, padding, dilation=1, out_dtype='float32')
- s = tvm.create_schedule([conv.op])
+ s = te.create_schedule([conv.op])
##### space definition begin #####
n, f, y, x = s[conv].op.axis
bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x)
kernel_scope = n # this is the scope to attach global config inside this kernel
- s[output].bind(bf, tvm.thread_axis("blockIdx.z"))
- s[output].bind(by, tvm.thread_axis("blockIdx.y"))
- s[output].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[output].bind(vf, tvm.thread_axis("vthread"))
- s[output].bind(vy, tvm.thread_axis("vthread"))
- s[output].bind(vx, tvm.thread_axis("vthread"))
- s[output].bind(tf, tvm.thread_axis("threadIdx.z"))
- s[output].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[output].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[output].bind(bf, te.thread_axis("blockIdx.z"))
+ s[output].bind(by, te.thread_axis("blockIdx.y"))
+ s[output].bind(bx, te.thread_axis("blockIdx.x"))
+ s[output].bind(vf, te.thread_axis("vthread"))
+ s[output].bind(vy, te.thread_axis("vthread"))
+ s[output].bind(vx, te.thread_axis("vthread"))
+ s[output].bind(tf, te.thread_axis("threadIdx.z"))
+ s[output].bind(ty, te.thread_axis("threadIdx.y"))
+ s[output].bind(tx, te.thread_axis("threadIdx.x"))
s[output].reorder(n, bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi)
s[OL].compute_at(s[output], tx)
tz, fused = s[load].split(fused, nparts=cfg["tile_f"].size[2])
ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2])
tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2])
- s[load].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[load].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[load].bind(tz, te.thread_axis("threadIdx.z"))
+ s[load].bind(ty, te.thread_axis("threadIdx.y"))
+ s[load].bind(tx, te.thread_axis("threadIdx.x"))
# tune unroll
s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val)
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from tvm import relay
import tvm.relay.testing
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from tvm import relay
import tvm.relay.testing
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from tvm import relay
import tvm.relay.testing
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from tvm import relay
from tvm.relay import testing
import numpy as np
import tvm
+from tvm import te
# the module is called `autotvm`
from tvm import autotvm
# Matmul V0: Constant tiling factor
def matmul_v0(N, L, M, dtype):
- A = tvm.placeholder((N, L), name='A', dtype=dtype)
- B = tvm.placeholder((L, M), name='B', dtype=dtype)
+ A = te.placeholder((N, L), name='A', dtype=dtype)
+ B = te.placeholder((L, M), name='B', dtype=dtype)
- k = tvm.reduce_axis((0, L), name='k')
- C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C')
- s = tvm.create_schedule(C.op)
+ k = te.reduce_axis((0, L), name='k')
+ C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C')
+ s = te.create_schedule(C.op)
# schedule
y, x = s[C].op.axis
# Matmul V1: List candidate values
@autotvm.register_customized_task("tutorial/matmul_v1") # 1. use a decorator
def matmul_v1(N, L, M, dtype):
- A = tvm.placeholder((N, L), name='A', dtype=dtype)
- B = tvm.placeholder((L, M), name='B', dtype=dtype)
+ A = te.placeholder((N, L), name='A', dtype=dtype)
+ B = te.placeholder((L, M), name='B', dtype=dtype)
- k = tvm.reduce_axis((0, L), name='k')
- C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C')
- s = tvm.create_schedule(C.op)
+ k = te.reduce_axis((0, L), name='k')
+ C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C')
+ s = te.create_schedule(C.op)
# schedule
y, x = s[C].op.axis
@autotvm.register_customized_task("tutorial/matmul")
def matmul(N, L, M, dtype):
- A = tvm.placeholder((N, L), name='A', dtype=dtype)
- B = tvm.placeholder((L, M), name='B', dtype=dtype)
+ A = te.placeholder((N, L), name='A', dtype=dtype)
+ B = te.placeholder((L, M), name='B', dtype=dtype)
- k = tvm.reduce_axis((0, L), name='k')
- C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C')
- s = tvm.create_schedule(C.op)
+ k = te.reduce_axis((0, L), name='k')
+ C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C')
+ s = te.create_schedule(C.op)
# schedule
y, x = s[C].op.axis
import numpy as np
import tvm
+from tvm import te
from tvm import rpc
from tvm.contrib import util
-n = tvm.convert(1024)
-A = tvm.placeholder((n,), name='A')
-B = tvm.compute((n,), lambda i: A[i] + 1.0, name='B')
-s = tvm.create_schedule(B.op)
+n = tvm.runtime.convert(1024)
+A = te.placeholder((n,), name='A')
+B = te.compute((n,), lambda i: A[i] + 1.0, name='B')
+s = te.create_schedule(B.op)
######################################################################
# Then we cross compile the kernel.
opencl_device_port = 9090
# create schedule for the above "add one" compute declaration
- s = tvm.create_schedule(B.op)
+ s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=32)
- s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
- s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
+ s[B].bind(xo, te.thread_axis("blockIdx.x"))
+ s[B].bind(xi, te.thread_axis("threadIdx.x"))
func = tvm.build(s, [A, B], "opencl", target_host=target_host)
remote = rpc.connect(opencl_device_host, opencl_device_port)
from __future__ import absolute_import, print_function
import tvm
+from tvm import te
import numpy as np
######################################################################
# our customized lowering pass to manipulate the IR directly instead of using schedule primitives.
#
-n = tvm.const(128, "int32")
-a = tvm.placeholder((n, ), name="a")
-b = tvm.placeholder((n, ), name="b")
-c = tvm.compute((n, ), lambda i: a[i] + b[i], name='c')
+n = tvm.tir.const(128, "int32")
+a = te.placeholder((n, ), name="a")
+b = te.placeholder((n, ), name="b")
+c = te.compute((n, ), lambda i: a[i] + b[i], name='c')
-sch = tvm.create_schedule(c.op)
+sch = te.create_schedule(c.op)
ir = tvm.lower(sch, [a, b, c], simple_mode=True)
print(ir)
#
# IR Visitor
# ~~~~~~~~~~
-# We can use ``tvm.ir_pass.PostOrderVisit(stmt, func)`` to gather information from the Halide IR.
+# We can use ``tvm.tir.ir_pass.PostOrderVisit(stmt, func)`` to gather information from the Halide IR.
# ``func`` is a function callback. This function will be called before exiting the current IR node,
# i.e. post-order visit. Then we leverage side effects to store the result of IR visit, because the
# return value of ``func`` will be ignored.
if op in loops:
extent = op.extent.value
name = op.loop_var.name
- lo, li = tvm.var(name + '.outer'), tvm.var(name + '.inner')
- body = tvm.ir_pass.Substitute(op.body, {op.loop_var: lo * 8 + li})
+ lo, li = te.var(name + '.outer'), te.var(name + '.inner')
+ body = tvm.tir.ir_pass.Substitute(op.body, {op.loop_var: lo * 8 + li})
body = tvm.tir.For(li, 0, 8, tvm.tir.For.Vectorized, 0, body)
body = tvm.tir.For(lo, 0, extent // 8, tvm.tir.For.Serial, 0, body)
return body
def vectorize(stmt):
global loops
- tvm.ir_pass.PostOrderVisit(stmt, find_width8)
+ tvm.tir.ir_pass.PostOrderVisit(stmt, find_width8)
if not loops:
return stmt
# The last list arugment indicates what kinds of nodes will be transformed.
# Thus, in this case only `For` nodes will call `vectorize8`
- stmt = tvm.ir_pass.IRTransform(stmt, None, vectorize8, ['For'])
+ stmt = tvm.tir.ir_pass.IRTransform(stmt, None, vectorize8, ['For'])
return stmt
# Thus, a good place to put this transformation pass is just after Phase 1.
#
-with tvm.build_config(add_lower_pass=[(1, vectorize)]) as cfg:
+with tvm.target.build_config(add_lower_pass=[(1, vectorize)]) as cfg:
print(tvm.lower(sch, [a, b, c], simple_mode=True))
#####################################################################
# Quick View
# ----------
# This tutorial gives a quick view of writing a customized IR transformation pass:
-# - Use ``tvm.ir_pass.PostOrderVisit`` to gather information on each IR nodes.
-# - Use ``tvm.ir_pass.IRTransform`` to transform IR nodes.
+# - Use ``tvm.tir.ir_pass.PostOrderVisit`` to gather information on each IR nodes.
+# - Use ``tvm.tir.ir_pass.IRTransform`` to transform IR nodes.
# - Wrap up two above to write an IR-transformation function.
-# - Use ``tvm.build_config`` to put this function to TVM lowering pass
+# - Use ``tvm.target.build_config`` to put this function to TVM lowering pass
#
import numpy as np
import tvm
+from tvm import te
import tvm.relay as relay
###############################################################################
from tvm import relay
from tvm.contrib import graph_runtime
import tvm
+from tvm import te
def GraphConv(layer_name,
input_dim,
import keras
from keras.applications.mobilenet_v2 import MobileNetV2
import tvm
+from tvm import te
import tvm.relay as relay
from tvm import rpc
from tvm.contrib import util, ndk, graph_runtime as runtime
"""
import tvm
+from tvm import te
import tvm.relay as relay
from tvm import rpc
from tvm.contrib import util, graph_runtime as runtime
"""
import tvm
+from tvm import te
from tvm import relay
import mxnet as mx
from tvm.contrib.download import download_testdata
We will use GluonCV pre-trained SSD model and convert it to Relay IR
"""
import tvm
+from tvm import te
from matplotlib import pyplot as plt
from tvm.relay.testing.config import ctx_list
#
# To get best inference performance on Intel graphics,
# change target argument to :code:`opencl -device=intel_graphics`.
-# But when using Intel graphics on Mac, target needs to
+# But when using Intel graphics on Mac, target needs to
# be set to `opencl` only for the reason that Intel subgroup
# extension is not supported on Mac.
#
# ---------------
# The process is no different from other examples.
import tvm
+from tvm import te
from tvm.contrib import graph_runtime
# context x86 CPU, use tvm.gpu(0) if you run on GPU
ctx = tvm.cpu(0)
https://github.com/apple/coremltools
"""
import tvm
+from tvm import te
import tvm.relay as relay
from tvm.contrib.download import download_testdata
import coremltools as cm
# tvm, relay
import tvm
+from tvm import te
from tvm import relay
from ctypes import *
from tvm.contrib.download import download_testdata
https://keras.io/#installation
"""
import tvm
+from tvm import te
import tvm.relay as relay
from tvm.contrib.download import download_testdata
import keras
# some standard imports
import mxnet as mx
import tvm
+from tvm import te
import tvm.relay as relay
import numpy as np
import onnx
import numpy as np
import tvm
+from tvm import te
import tvm.relay as relay
from tvm.contrib.download import download_testdata
# tvm, relay
import tvm
+from tvm import te
from tvm import relay
# os and numpy
# Execute on TVM
# --------------
import tvm
+from tvm import te
from tvm.contrib import graph_runtime as runtime
# Create a runtime executor module
To begin with, we import Relay and TVM.
"""
import tvm
+from tvm import te
import numpy as np
from tvm.contrib import graph_runtime as runtime
from tvm import relay
from __future__ import absolute_import, print_function
import tvm
+from tvm import te
import numpy as np
from tvm.contrib import cblas
######################################################################
# Use Extern Tensor Function
# --------------------------
-# In the example below, we use :any:`tvm.extern` to add an extern
+# In the example below, we use :any:`te.extern` to add an extern
# array function call. In the extern call, we declare the shape
# of output tensors. In the second argument we provide the list of inputs.
#
n = 1024
l = 128
m = 235
-bias = tvm.var('bias', dtype=tvm.float32)
-A = tvm.placeholder((n, l), name='A')
-B = tvm.placeholder((l, m), name='B')
-C = tvm.extern((n, m), [A, B],
- lambda ins, outs: tvm.call_packed(
+bias = te.var('bias', dtype="float32")
+A = te.placeholder((n, l), name='A')
+B = te.placeholder((l, m), name='B')
+C = te.extern((n, m), [A, B],
+ lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.cblas.matmul",
ins[0], ins[1], outs[0], False, False), name="C")
-D = tvm.compute(C.shape, lambda i, j: C[i,j] + bias, name="D")
-s = tvm.create_schedule(D.op)
+D = te.compute(C.shape, lambda i, j: C[i,j] + bias, name="D")
+s = te.create_schedule(D.op)
######################################################################
# Verify the Result
#
from tvm.contrib import cblas
C = cblas.matmul(A, B)
-D = tvm.compute(C.shape, lambda i, j: C[i,j] + bias, name="D")
-s = tvm.create_schedule(D.op)
+D = te.compute(C.shape, lambda i, j: C[i,j] + bias, name="D")
+s = te.create_schedule(D.op)
######################################################################
# Hook Python Function as Extern
print("my_tvm_addone signatures: %s, %s" % (type(x), type(y)))
tvm.nd.array(x.asnumpy() + 1).copyto(y)
-A = tvm.placeholder((n,), name='A')
-B = tvm.extern(A.shape, [A], lambda ins, outs: tvm.call_packed(
+A = te.placeholder((n,), name='A')
+B = te.extern(A.shape, [A], lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.my_tvm_addone", ins[0], outs[0]), name="C")
-s = tvm.create_schedule(B.op)
+s = te.create_schedule(B.op)
f = tvm.build(s, [A, B], "llvm")
a = tvm.nd.array(np.random.uniform(size=(n,)).astype(A.dtype), ctx)
b = tvm.nd.array(np.random.uniform(size=(n,)).astype(B.dtype), ctx)
######################################################################
# Summary
# -------
-# - TVM calls extern tensor function via :any:`tvm.extern`
+# - TVM calls extern tensor function via :any:`te.extern`
# - Use contrib wrappers for short sugars of extern tensor calls.
# - We can hook front-end function as extern tensor callbacks.
#
from __future__ import absolute_import, print_function
import tvm
+from tvm import te
import numpy as np
######################################################################
# -------------------------------
# The most straight-forward way to call target specific function is via
# extern function call construct in tvm.
-# In the following example, we use :any:`tvm.call_pure_extern` to call
+# In the following example, we use :any:`tvm.tir.call_pure_extern` to call
# :code:`__expf` function, which is only available under CUDA.
#
-n = tvm.var("n")
-A = tvm.placeholder((n,), name='A')
-B = tvm.compute(A.shape,
- lambda i: tvm.call_pure_extern("float32", "__expf", A[i]),
+n = te.var("n")
+A = te.placeholder((n,), name='A')
+B = te.compute(A.shape,
+ lambda i: tvm.tir.call_pure_extern("float32", "__expf", A[i]),
name="B")
-s = tvm.create_schedule(B.op)
+s = te.create_schedule(B.op)
num_thread = 64
bx, tx = s[B].split(B.op.axis[0], factor=num_thread)
-s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
-s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
+s[B].bind(bx, te.thread_axis("blockIdx.x"))
+s[B].bind(tx, te.thread_axis("threadIdx.x"))
f = tvm.build(s, [A, B], "cuda", name="myexp")
print(f.imported_modules[0].get_source())
#
# TVM intrinsic provides the user a mechanism to achieve this, and this
# is the recommended way to solve the problem.
-# The following code use tvm.exp instead, which create an intrinsic call
-# :any:`tvm.exp` to do the exponential.
+# The following code use te.exp instead, which create an intrinsic call
+# :any:`te.exp` to do the exponential.
#
-n = tvm.var("n")
-A = tvm.placeholder((n,), name='A')
-B = tvm.compute(A.shape, lambda i: tvm.exp(A[i]), name="B")
-s = tvm.create_schedule(B.op)
+n = te.var("n")
+A = te.placeholder((n,), name='A')
+B = te.compute(A.shape, lambda i: te.exp(A[i]), name="B")
+s = te.create_schedule(B.op)
num_thread = 64
bx, tx = s[B].split(B.op.axis[0], factor=num_thread)
-s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
-s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
+s[B].bind(bx, te.thread_axis("blockIdx.x"))
+s[B].bind(tx, te.thread_axis("threadIdx.x"))
fcuda = tvm.build(s, [A, B], "cuda", name="myexp")
print(fcuda.imported_modules[0].get_source())
######################################################################
# We can find that the code works for both CUDA and opencl.
-# The same tvm.exp can also be used for float64 data types.
+# The same te.exp can also be used for float64 data types.
#
fopencl = tvm.build(s, [A, B], "opencl", name="myexp")
print(fopencl.imported_modules[0].get_source())
######################################################################
# Intrinsic Lowering Rule
# -----------------------
-# When :any:`tvm.exp` is called, TVM creates an intrinsic Call Expr.
+# When :any:`te.exp` is called, TVM creates an intrinsic Call Expr.
# TVM uses transformation rules to transform the intrinsic
# call to device specific extern calls.
#
assert isinstance(op, tvm.tir.Call)
if op.dtype == "float32":
# call float function
- return tvm.call_pure_extern("float32", "%sf" % op.name, op.args[0])
+ return tvm.tir.call_pure_extern("float32", "%sf" % op.name, op.args[0])
elif op.dtype == "float64":
# call double function
- return tvm.call_pure_extern("float32", op.name, op.args[0])
+ return tvm.tir.call_pure_extern("float32", op.name, op.args[0])
else:
# cannot do translation, return self.
return op
def mylog(x):
"""customized log intrinsic function"""
- return tvm.call_pure_intrin(x.dtype, "mylog", x)
+ return tvm.tir.call_pure_intrin(x.dtype, "mylog", x)
def my_cuda_mylog_rule(op):
"""CUDA lowering rule for log"""
if op.dtype == "float32":
- return tvm.call_pure_extern("float32", "logf", op.args[0])
+ return tvm.tir.call_pure_extern("float32", "logf", op.args[0])
elif op.dtype == "float64":
- return tvm.call_pure_extern("float64", "log", op.args[0])
+ return tvm.tir.call_pure_extern("float64", "log", op.args[0])
else:
return op
tvm.target.register_intrin_rule("cuda", "mylog", my_cuda_mylog_rule, override=True)
-n = tvm.var("n")
-A = tvm.placeholder((n,), name='A')
-B = tvm.compute(A.shape, lambda i: mylog(A[i]), name="B")
-s = tvm.create_schedule(B.op)
+n = te.var("n")
+A = te.placeholder((n,), name='A')
+B = te.compute(A.shape, lambda i: mylog(A[i]), name="B")
+s = te.create_schedule(B.op)
num_thread = 64
bx, tx = s[B].split(B.op.axis[0], factor=num_thread)
-s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
-s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
+s[B].bind(bx, te.thread_axis("blockIdx.x"))
+s[B].bind(tx, te.thread_axis("threadIdx.x"))
fcuda = tvm.build(s, [A, B], "cuda", name="mylog")
print(fcuda.imported_modules[0].get_source())
# -------
# - TVM can call extern target dependent math function.
# - Use intrinsic to defined a unified interface for the functions.
-# - For more intrinsics available in tvm, take a look at :any:`tvm.intrin`
+# - For more intrinsics available in tvm, take a look at :any:`tvm.tir`
# - You can customize the intrinsic behavior by defining your own rules.
#
from __future__ import absolute_import, print_function
import tvm
+from tvm import te
import numpy as np
######################################################################
#
# The following lines describe the row sum operation.
# To create a reduction formula, we declare a reduction axis using
-# :any:`tvm.reduce_axis`. :any:`tvm.reduce_axis` takes in the range of reductions.
-# :any:`tvm.sum` takes in the expression to be reduced as well as the reduction
+# :any:`te.reduce_axis`. :any:`te.reduce_axis` takes in the range of reductions.
+# :any:`te.sum` takes in the expression to be reduced as well as the reduction
# axis and compute the sum of value over all k in the declared range.
#
# The equivalent C code is as follows:
# }
# }
#
-n = tvm.var("n")
-m = tvm.var("m")
-A = tvm.placeholder((n, m), name='A')
-k = tvm.reduce_axis((0, m), "k")
-B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="B")
+n = te.var("n")
+m = te.var("m")
+A = te.placeholder((n, m), name='A')
+k = te.reduce_axis((0, m), "k")
+B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B")
######################################################################
# Schedule the Reduction
# There are several ways to schedule a reduction.
# Before doing anything, let us print out the IR code of default schedule.
#
-s = tvm.create_schedule(B.op)
+s = te.create_schedule(B.op)
print(tvm.lower(s, [A, B], simple_mode=True))
######################################################################
######################################################################
# If we are building a GPU kernel, we can bind the rows of B to GPU threads.
-s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
-s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
+s[B].bind(xo, te.thread_axis("blockIdx.x"))
+s[B].bind(xi, te.thread_axis("threadIdx.x"))
print(tvm.lower(s, [A, B], simple_mode=True))
######################################################################
# In the following schedule, the result of B is written to a temporary
# result B.rf. The factored dimension becomes the first dimension of B.rf.
#
-s = tvm.create_schedule(B.op)
+s = te.create_schedule(B.op)
ko, ki = s[B].split(B.op.reduce_axis[0], factor=16)
BF = s.rfactor(B, ki)
print(tvm.lower(s, [A, B], simple_mode=True))
# columns by threadIdx.x and finally do a cross thread reduction over threadIdx.x
#
xo, xi = s[B].split(s[B].op.axis[0], factor=32)
-s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
-s[B].bind(xi, tvm.thread_axis("threadIdx.y"))
-tx = tvm.thread_axis("threadIdx.x")
+s[B].bind(xo, te.thread_axis("blockIdx.x"))
+s[B].bind(xi, te.thread_axis("threadIdx.y"))
+tx = te.thread_axis("threadIdx.x")
s[B].bind(s[B].op.reduce_axis[0], tx)
s[BF].compute_at(s[B], s[B].op.reduce_axis[0])
s[B].set_store_predicate(tx.var.equal(0))
# In TVM, we can describe convolution via 2D reduction in a simple way.
# Here is an example for 2D convolution with filter size = [3, 3] and strides = [1, 1].
#
-n = tvm.var('n')
-Input = tvm.placeholder((n, n), name='Input')
-Filter = tvm.placeholder((3, 3), name='Filter')
-di = tvm.reduce_axis((0, 3), name='di')
-dj = tvm.reduce_axis((0, 3), name='dj')
-Output = tvm.compute(
+n = te.var('n')
+Input = te.placeholder((n, n), name='Input')
+Filter = te.placeholder((3, 3), name='Filter')
+di = te.reduce_axis((0, 3), name='di')
+dj = te.reduce_axis((0, 3), name='dj')
+Output = te.compute(
(n - 2, n - 2),
- lambda i, j: tvm.sum(Input[i + di, j + dj] * Filter[di, dj], axis=[di, dj]),
+ lambda i, j: te.sum(Input[i + di, j + dj] * Filter[di, dj], axis=[di, dj]),
name='Output')
-s = tvm.create_schedule(Output.op)
+s = te.create_schedule(Output.op)
print(tvm.lower(s, [Input, Filter, Output], simple_mode=True))
######################################################################
#
# Define General Commutative Reduction Operation
# ----------------------------------------------
-# Besides the built-in reduction operations like :any:`tvm.sum`,
-# :any:`tvm.min` and :any:`tvm.max`, you can also define your
-# commutative reduction operation by :any:`tvm.comm_reducer`.
+# Besides the built-in reduction operations like :any:`te.sum`,
+# :any:`tvm.te.min` and :any:`tvm.te.max`, you can also define your
+# commutative reduction operation by :any:`te.comm_reducer`.
#
-n = tvm.var('n')
-m = tvm.var('m')
-product = tvm.comm_reducer(lambda x, y: x*y,
- lambda t: tvm.const(1, dtype=t), name="product")
-A = tvm.placeholder((n, m), name='A')
-k = tvm.reduce_axis((0, m), name='k')
-B = tvm.compute((n,), lambda i: product(A[i, k], axis=k), name='B')
+n = te.var('n')
+m = te.var('m')
+product = te.comm_reducer(lambda x, y: x*y,
+ lambda t: tvm.tir.const(1, dtype=t), name="product")
+A = te.placeholder((n, m), name='A')
+k = te.reduce_axis((0, m), name='k')
+B = te.compute((n,), lambda i: product(A[i, k], axis=k), name='B')
######################################################################
# .. note::
#
# - Describe reduction with reduce_axis.
# - Use rfactor to factor out axis if we need parallelism.
-# - Define new reduction operation by :any:`tvm.comm_reducer`
+# - Define new reduction operation by :any:`te.comm_reducer`
from __future__ import absolute_import, print_function
import tvm
+from tvm import te
import numpy as np
######################################################################
# The result of the scan is a tensor, giving the result of :code:`s_state` after the
# update over the time domain.
#
-m = tvm.var("m")
-n = tvm.var("n")
-X = tvm.placeholder((m, n), name="X")
-s_state = tvm.placeholder((m, n))
-s_init = tvm.compute((1, n), lambda _, i: X[0, i])
-s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i])
-s_scan = tvm.scan(s_init, s_update, s_state, inputs=[X])
+m = te.var("m")
+n = te.var("n")
+X = te.placeholder((m, n), name="X")
+s_state = te.placeholder((m, n))
+s_init = te.compute((1, n), lambda _, i: X[0, i])
+s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i])
+s_scan = tvm.te.scan(s_init, s_update, s_state, inputs=[X])
######################################################################
# Schedule the Scan Cell
# first iteration dimension of the update part.
# To split on the time iteration, user can schedule on scan_op.scan_axis instead.
#
-s = tvm.create_schedule(s_scan.op)
+s = te.create_schedule(s_scan.op)
num_thread = 256
-block_x = tvm.thread_axis("blockIdx.x")
-thread_x = tvm.thread_axis("threadIdx.x")
+block_x = te.thread_axis("blockIdx.x")
+thread_x = te.thread_axis("threadIdx.x")
xo, xi = s[s_init].split(s_init.op.axis[1], factor=num_thread)
s[s_init].bind(xo, block_x)
s[s_init].bind(xi, thread_x)
# The following lines demonstrate a scan with two stage operations
# in the scan cell.
#
-m = tvm.var("m")
-n = tvm.var("n")
-X = tvm.placeholder((m, n), name="X")
-s_state = tvm.placeholder((m, n))
-s_init = tvm.compute((1, n), lambda _, i: X[0, i])
-s_update_s1 = tvm.compute((m, n), lambda t, i: s_state[t-1, i] * 2, name="s1")
-s_update_s2 = tvm.compute((m, n), lambda t, i: s_update_s1[t, i] + X[t, i], name="s2")
-s_scan = tvm.scan(s_init, s_update_s2, s_state, inputs=[X])
+m = te.var("m")
+n = te.var("n")
+X = te.placeholder((m, n), name="X")
+s_state = te.placeholder((m, n))
+s_init = te.compute((1, n), lambda _, i: X[0, i])
+s_update_s1 = te.compute((m, n), lambda t, i: s_state[t-1, i] * 2, name="s1")
+s_update_s2 = te.compute((m, n), lambda t, i: s_update_s1[t, i] + X[t, i], name="s2")
+s_scan = tvm.te.scan(s_init, s_update_s2, s_state, inputs=[X])
######################################################################
# These intermediate tensors can also be scheduled normally.
# To ensure correctness, TVM creates a group constraint to forbid
# the body of scan to be compute_at locations outside the scan loop.
#
-s = tvm.create_schedule(s_scan.op)
+s = te.create_schedule(s_scan.op)
xo, xi = s[s_update_s2].split(s_update_s2.op.axis[1], factor=32)
s[s_update_s1].compute_at(s[s_update_s2], xo)
print(tvm.lower(s, [X, s_scan], simple_mode=True))
# recurrent state. Scan support multiple recurrent states.
# The following example demonstrates how we can build recurrence with two states.
#
-m = tvm.var("m")
-n = tvm.var("n")
-l = tvm.var("l")
-X = tvm.placeholder((m, n), name="X")
-s_state1 = tvm.placeholder((m, n))
-s_state2 = tvm.placeholder((m, l))
-s_init1 = tvm.compute((1, n), lambda _, i: X[0, i])
-s_init2 = tvm.compute((1, l), lambda _, i: 0.0)
-s_update1 = tvm.compute((m, n), lambda t, i: s_state1[t-1, i] + X[t, i])
-s_update2 = tvm.compute((m, l), lambda t, i: s_state2[t-1, i] + s_state1[t-1, 0])
-s_scan1, s_scan2 = tvm.scan([s_init1, s_init2],
+m = te.var("m")
+n = te.var("n")
+l = te.var("l")
+X = te.placeholder((m, n), name="X")
+s_state1 = te.placeholder((m, n))
+s_state2 = te.placeholder((m, l))
+s_init1 = te.compute((1, n), lambda _, i: X[0, i])
+s_init2 = te.compute((1, l), lambda _, i: 0.0)
+s_update1 = te.compute((m, n), lambda t, i: s_state1[t-1, i] + X[t, i])
+s_update2 = te.compute((m, l), lambda t, i: s_state2[t-1, i] + s_state1[t-1, 0])
+s_scan1, s_scan2 = tvm.te.scan([s_init1, s_init2],
[s_update1, s_update2],
[s_state1, s_state2], inputs=[X])
-s = tvm.create_schedule(s_scan1.op)
+s = te.create_schedule(s_scan1.op)
print(tvm.lower(s, [X, s_scan1, s_scan2], simple_mode=True))
######################################################################
from __future__ import absolute_import, print_function
import tvm
+from tvm import te
import numpy as np
######################################################################
#
# declare some variables for use later
-n = tvm.var('n')
-m = tvm.var('m')
+n = te.var('n')
+m = te.var('m')
######################################################################
# A schedule can be created from a list of ops, by default the
# schedule computes tensor in a serial manner in a row-major order.
# declare a matrix element-wise multiply
-A = tvm.placeholder((m, n), name='A')
-B = tvm.placeholder((m, n), name='B')
-C = tvm.compute((m, n), lambda i, j: A[i, j] * B[i, j], name='C')
+A = te.placeholder((m, n), name='A')
+B = te.placeholder((m, n), name='B')
+C = te.compute((m, n), lambda i, j: A[i, j] * B[i, j], name='C')
-s = tvm.create_schedule([C.op])
+s = te.create_schedule([C.op])
# lower will transform the computation from definition to the real
# callable function. With argument `simple_mode=True`, it will
# return you a readable C like statement, we use it here to print the
# -----
# :code:`split` can split a specified axis into two axises by
# :code:`factor`.
-A = tvm.placeholder((m,), name='A')
-B = tvm.compute((m,), lambda i: A[i]*2, name='B')
+A = te.placeholder((m,), name='A')
+B = te.compute((m,), lambda i: A[i]*2, name='B')
-s = tvm.create_schedule(B.op)
+s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=32)
print(tvm.lower(s, [A, B], simple_mode=True))
######################################################################
# You can also split a axis by :code:`nparts`, which splits the axis
# contrary with :code:`factor`.
-A = tvm.placeholder((m,), name='A')
-B = tvm.compute((m,), lambda i: A[i], name='B')
+A = te.placeholder((m,), name='A')
+B = te.compute((m,), lambda i: A[i], name='B')
-s = tvm.create_schedule(B.op)
+s = te.create_schedule(B.op)
bx, tx = s[B].split(B.op.axis[0], nparts=32)
print(tvm.lower(s, [A, B], simple_mode=True))
# ----
# :code:`tile` help you execute the computation tile by tile over two
# axises.
-A = tvm.placeholder((m, n), name='A')
-B = tvm.compute((m, n), lambda i, j: A[i, j], name='B')
+A = te.placeholder((m, n), name='A')
+B = te.compute((m, n), lambda i, j: A[i, j], name='B')
-s = tvm.create_schedule(B.op)
+s = te.create_schedule(B.op)
xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=10, y_factor=5)
print(tvm.lower(s, [A, B], simple_mode=True))
# fuse
# ----
# :code:`fuse` can fuse two consecutive axises of one computation.
-A = tvm.placeholder((m, n), name='A')
-B = tvm.compute((m, n), lambda i, j: A[i, j], name='B')
+A = te.placeholder((m, n), name='A')
+B = te.compute((m, n), lambda i, j: A[i, j], name='B')
-s = tvm.create_schedule(B.op)
+s = te.create_schedule(B.op)
# tile to four axises first: (i.outer, j.outer, i.inner, j.inner)
xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=10, y_factor=5)
# then fuse (i.inner, j.inner) into one axis: (i.inner.j.inner.fused)
# reorder
# -------
# :code:`reorder` can reorder the axises in the specified order.
-A = tvm.placeholder((m, n), name='A')
-B = tvm.compute((m, n), lambda i, j: A[i, j], name='B')
+A = te.placeholder((m, n), name='A')
+B = te.compute((m, n), lambda i, j: A[i, j], name='B')
-s = tvm.create_schedule(B.op)
+s = te.create_schedule(B.op)
# tile to four axises first: (i.outer, j.outer, i.inner, j.inner)
xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=10, y_factor=5)
# then reorder the axises: (i.inner, j.outer, i.outer, j.inner)
# ----
# :code:`bind` can bind a specified axis with a thread axis, often used
# in gpu programming.
-A = tvm.placeholder((n,), name='A')
-B = tvm.compute(A.shape, lambda i: A[i] * 2, name='B')
+A = te.placeholder((n,), name='A')
+B = te.compute(A.shape, lambda i: A[i] * 2, name='B')
-s = tvm.create_schedule(B.op)
+s = te.create_schedule(B.op)
bx, tx = s[B].split(B.op.axis[0], factor=64)
-s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
-s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
+s[B].bind(bx, te.thread_axis("blockIdx.x"))
+s[B].bind(tx, te.thread_axis("threadIdx.x"))
print(tvm.lower(s, [A, B], simple_mode=True))
######################################################################
# ----------
# For a schedule that consists of multiple operators, TVM will compute
# tensors at the root separately by default.
-A = tvm.placeholder((m,), name='A')
-B = tvm.compute((m,), lambda i: A[i]+1, name='B')
-C = tvm.compute((m,), lambda i: B[i]*2, name='C')
+A = te.placeholder((m,), name='A')
+B = te.compute((m,), lambda i: A[i]+1, name='B')
+C = te.compute((m,), lambda i: B[i]*2, name='C')
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
print(tvm.lower(s, [A, B, C], simple_mode=True))
######################################################################
# :code:`compute_at` can move computation of `B` into the first axis
# of computation of `C`.
-A = tvm.placeholder((m,), name='A')
-B = tvm.compute((m,), lambda i: A[i]+1, name='B')
-C = tvm.compute((m,), lambda i: B[i]*2, name='C')
+A = te.placeholder((m,), name='A')
+B = te.compute((m,), lambda i: A[i]+1, name='B')
+C = te.compute((m,), lambda i: B[i]*2, name='C')
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
s[B].compute_at(s[C], C.op.axis[0])
print(tvm.lower(s, [A, B, C], simple_mode=True))
# :code:`compute_inline` can mark one stage as inline, then the body of
# computation will be expanded and inserted at the address where the
# tensor is required.
-A = tvm.placeholder((m,), name='A')
-B = tvm.compute((m,), lambda i: A[i]+1, name='B')
-C = tvm.compute((m,), lambda i: B[i]*2, name='C')
+A = te.placeholder((m,), name='A')
+B = te.compute((m,), lambda i: A[i]+1, name='B')
+C = te.compute((m,), lambda i: B[i]*2, name='C')
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
s[B].compute_inline()
print(tvm.lower(s, [A, B, C], simple_mode=True))
# compute_root
# ------------
# :code:`compute_root` can move computation of one stage to the root.
-A = tvm.placeholder((m,), name='A')
-B = tvm.compute((m,), lambda i: A[i]+1, name='B')
-C = tvm.compute((m,), lambda i: B[i]*2, name='C')
+A = te.placeholder((m,), name='A')
+B = te.compute((m,), lambda i: A[i]+1, name='B')
+C = te.compute((m,), lambda i: B[i]*2, name='C')
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
s[B].compute_at(s[C], C.op.axis[0])
s[B].compute_root()
print(tvm.lower(s, [A, B, C], simple_mode=True))
how to use TEDD and how to interpret the rendered graphs.
"""
-from __future__ import absolute_import, print_function
-
import tvm
+from tvm import te
import topi
from tvm.contrib import tedd
stride = 1
padding = "SAME"
dilation=1
-A = tvm.placeholder((in_size, in_size, in_channel, batch), name='A')
-W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W')
-B = tvm.placeholder((1, num_filter, 1), name='bias')
+
+A = te.placeholder((in_size, in_size, in_channel, batch), name='A')
+W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W')
+B = te.placeholder((1, num_filter, 1), name='bias')
+
with tvm.target.create("llvm"):
- t_conv = topi.nn.conv2d(A, W, stride, padding, dilation, layout='HWCN')
+ t_conv = topi.nn.conv2d_hwcn(A, W, stride, padding, dilation)
t_bias = topi.add(t_conv, B)
t_relu = topi.nn.relu(t_bias)
s = topi.generic.schedule_conv2d_hwcn([t_relu])
from __future__ import absolute_import, print_function
import tvm
+from tvm import te
import numpy as np
######################################################################
# The following lines describe the computation :code:`A * B^T` in TVM.
#
N, M, L = 1024, 512, 64
-A = tvm.placeholder((N, L), name='A')
-B = tvm.placeholder((M, L), name='B')
-k = tvm.reduce_axis((0, L), name='k')
-C = tvm.compute((N, M), lambda i, j:
- tvm.sum(A[i, k] * B[j, k], axis=k), name='C')
-s = tvm.create_schedule(C.op)
+A = te.placeholder((N, L), name='A')
+B = te.placeholder((M, L), name='B')
+k = te.reduce_axis((0, L), name='k')
+C = te.compute((N, M), lambda i, j:
+ te.sum(A[i, k] * B[j, k], axis=k), name='C')
+s = te.create_schedule(C.op)
print(tvm.lower(s, [A, B, C], simple_mode=True))
######################################################################
# which is done in :code:`intrin_func` below.
#
def intrin_gemv(m, l):
- a = tvm.placeholder((l,), name='a')
- b = tvm.placeholder((m, l), name='b')
- k = tvm.reduce_axis((0, l), name='k')
- c = tvm.compute((m,), lambda i: tvm.sum(a[k] * b[i, k], axis=k), name='c')
- Ab = tvm.decl_buffer(a.shape, a.dtype,
+ a = te.placeholder((l,), name='a')
+ b = te.placeholder((m, l), name='b')
+ k = te.reduce_axis((0, l), name='k')
+ c = te.compute((m,), lambda i: te.sum(a[k] * b[i, k], axis=k), name='c')
+ Ab = tvm.tir.decl_buffer(a.shape, a.dtype,
name="A",
offset_factor=1,
strides=[1])
- Bb = tvm.decl_buffer(b.shape, b.dtype,
+ Bb = tvm.tir.decl_buffer(b.shape, b.dtype,
name="B",
offset_factor=1,
- strides=[tvm.var("s1"), 1])
- Cb = tvm.decl_buffer(c.shape, c.dtype,
+ strides=[te.var("s1"), 1])
+ Cb = tvm.tir.decl_buffer(c.shape, c.dtype,
name="C",
offset_factor=1,
strides=[1])
def intrin_func(ins, outs):
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
aa, bb = ins
cc = outs[0]
- ib.emit(tvm.call_extern("int32", "gemv_update",
+ ib.emit(tvm.tir.call_extern("int32", "gemv_update",
cc.access_ptr("w"),
aa.access_ptr("r"),
bb.access_ptr("r"),
m, l, bb.strides[0]))
return ib.get()
- with tvm.build_config(offset_factor=1):
- return tvm.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb})
+ with tvm.target.build_config(offset_factor=1):
+ return te.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb})
######################################################################
-# Here :code:`tvm.decl_tensor_intrin` declares how to execute the computation :code:`c.op`.
+# Here :code:`te.decl_tensor_intrin` declares how to execute the computation :code:`c.op`.
# Our implementation simply takes the inputs and outputs,
# converts them to pointers and emit an external function call.
# Note that tensorization requires user to specify :code:`offset_factor`,
# For now :code:`bb.strides[0] == l`,
# but later we will see how they can differ with more complicated schedules.
#
-# Note that we use :code:`tvm.var("s1")` as the first stride dimension for :code:`B`.
+# Note that we use :code:`te.var("s1")` as the first stride dimension for :code:`B`.
# If the strides can be inferred
# - in this case, TVM knows tensor B is compact thus the strides are :code:`[L, 1]` -
# such placeholder can be put to let TVM automatically bind the inferred value for us.
return ll_code
def intrin_gemv(m, l):
- a = tvm.placeholder((l,), name='a')
- b = tvm.placeholder((m, l), name='b')
- k = tvm.reduce_axis((0, l), name='k')
- c = tvm.compute((m,), lambda i:
- tvm.sum(a[k] * b[i, k], axis=k), name='c')
- Ab = tvm.decl_buffer(a.shape, a.dtype,
+ a = te.placeholder((l,), name='a')
+ b = te.placeholder((m, l), name='b')
+ k = te.reduce_axis((0, l), name='k')
+ c = te.compute((m,), lambda i:
+ te.sum(a[k] * b[i, k], axis=k), name='c')
+ Ab = tvm.tir.decl_buffer(a.shape, a.dtype,
name="A",
offset_factor=1,
strides=[1])
- Bb = tvm.decl_buffer(b.shape, b.dtype,
+ Bb = tvm.tir.decl_buffer(b.shape, b.dtype,
name="B",
offset_factor=1,
- strides=[tvm.var("s1"), 1])
- Cb = tvm.decl_buffer(c.shape, c.dtype,
+ strides=[te.var("s1"), 1])
+ Cb = tvm.tir.decl_buffer(c.shape, c.dtype,
name="C",
offset_factor=1,
strides=[1])
aa, bb = ins
cc = outs[0]
def _body():
- ib = tvm.ir_builder.create()
- ib.emit(tvm.call_extern("int32", "gemv_update",
+ ib = tvm.tir.ir_builder.create()
+ ib.emit(tvm.tir.call_extern("int32", "gemv_update",
cc.access_ptr("w"),
aa.access_ptr("r"),
bb.access_ptr("r"),
m, l, bb.strides[0]))
return ib.get()
def _reduce_reset():
- ib = tvm.ir_builder.create()
- ib.emit(tvm.call_extern("int32", "gemv_reset", cc.access_ptr("w"), m))
+ ib = tvm.tir.ir_builder.create()
+ ib.emit(tvm.tir.call_extern("int32", "gemv_reset", cc.access_ptr("w"), m))
return ib.get()
def _reduce_update():
return _body()
return _body(), _reduce_reset(), _reduce_update()
- with tvm.build_config(offset_factor=1):
- return tvm.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb})
+ with tvm.target.build_config(offset_factor=1):
+ return te.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb})
######################################################################
# Note that :code:`intrin_func` now returns a triplet:
from __future__ import absolute_import, print_function
import tvm
+from tvm import te
import numpy as np
######################################################################
# Describe Batchwise Computation
# ------------------------------
# For operators which have the same shape, we can put them together as
-# the inputs of :any:`tvm.compute`, if we want them to be scheduled
+# the inputs of :any:`te.compute`, if we want them to be scheduled
# together in the next schedule procedure.
#
-n = tvm.var("n")
-m = tvm.var("m")
-A0 = tvm.placeholder((m, n), name='A0')
-A1 = tvm.placeholder((m, n), name='A1')
-B0, B1 = tvm.compute((m, n), lambda i, j: (A0[i, j] + 2, A1[i, j] * 3), name='B')
+n = te.var("n")
+m = te.var("m")
+A0 = te.placeholder((m, n), name='A0')
+A1 = te.placeholder((m, n), name='A1')
+B0, B1 = te.compute((m, n), lambda i, j: (A0[i, j] + 2, A1[i, j] * 3), name='B')
# The generated IR code would be:
-s = tvm.create_schedule(B0.op)
+s = te.create_schedule(B0.op)
print(tvm.lower(s, [A0, A1, B0, B1], simple_mode=True))
######################################################################
# operators, and the inputs will collaborate together, e.g. :code:`argmax`.
# In the reduction procedure, :code:`argmax` need to compare the value of
# operands, also need to keep the index of operand. It can be expressed
-# with :py:func:`tvm.comm_reducer` as below:
+# with :py:func:`te.comm_reducer` as below:
# x and y are the operands of reduction, both of them is a tuple of index
# and value.
# our identity element also need to be a tuple, so `fidentity` accepts
# two types as inputs.
def fidentity(t0, t1):
- return tvm.const(-1, t0), tvm.min_value(t1)
+ return tvm.tir.const(-1, t0), tvm.te.min_value(t1)
-argmax = tvm.comm_reducer(fcombine, fidentity, name='argmax')
+argmax = te.comm_reducer(fcombine, fidentity, name='argmax')
# describe the reduction computation
-m = tvm.var('m')
-n = tvm.var('n')
-idx = tvm.placeholder((m, n), name='idx', dtype='int32')
-val = tvm.placeholder((m, n), name='val', dtype='int32')
-k = tvm.reduce_axis((0, n), 'k')
-T0, T1 = tvm.compute((m, ), lambda i: argmax((idx[i, k], val[i, k]), axis=k), name='T')
+m = te.var('m')
+n = te.var('n')
+idx = te.placeholder((m, n), name='idx', dtype='int32')
+val = te.placeholder((m, n), name='val', dtype='int32')
+k = te.reduce_axis((0, n), 'k')
+T0, T1 = te.compute((m, ), lambda i: argmax((idx[i, k], val[i, k]), axis=k), name='T')
# the generated IR code would be:
-s = tvm.create_schedule(T0.op)
+s = te.create_schedule(T0.op)
print(tvm.lower(s, [idx, val, T0, T1], simple_mode=True))
######################################################################
# with one batch operation, but they can only be scheduled together
# in terms of operation.
-n = tvm.var("n")
-m = tvm.var("m")
-A0 = tvm.placeholder((m, n), name='A0')
-B0, B1 = tvm.compute((m, n), lambda i, j: (A0[i, j] + 2, A0[i, j] * 3), name='B')
-A1 = tvm.placeholder((m, n), name='A1')
-C = tvm.compute((m, n), lambda i, j: A1[i, j] + B0[i, j], name='C')
+n = te.var("n")
+m = te.var("m")
+A0 = te.placeholder((m, n), name='A0')
+B0, B1 = te.compute((m, n), lambda i, j: (A0[i, j] + 2, A0[i, j] * 3), name='B')
+A1 = te.placeholder((m, n), name='A1')
+C = te.compute((m, n), lambda i, j: A1[i, j] + B0[i, j], name='C')
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
s[B0].compute_at(s[C], C.op.axis[0])
# as you can see in the below generated IR code:
print(tvm.lower(s, [A0, A1, C], simple_mode=True))
import numpy as np
import tvm
+from tvm import te
# The sizes of inputs and filters
batch = 256
stride = 1
# Algorithm
-A = tvm.placeholder((in_size, in_size, in_channel, batch), name='A')
-W = tvm.placeholder((kernel, kernel, in_channel, out_channel), name='W')
+A = te.placeholder((in_size, in_size, in_channel, batch), name='A')
+W = te.placeholder((kernel, kernel, in_channel, out_channel), name='W')
out_size = (in_size - kernel + 2*pad) // stride + 1
# Pad input
-Apad = tvm.compute(
+Apad = te.compute(
(in_size + 2*pad, in_size + 2*pad, in_channel, batch),
- lambda yy, xx, cc, nn: tvm.if_then_else(
- tvm.all(yy >= pad, yy - pad < in_size,
+ lambda yy, xx, cc, nn: tvm.tir.if_then_else(
+ tvm.tir.all(yy >= pad, yy - pad < in_size,
xx >= pad, xx - pad < in_size),
- A[yy - pad, xx - pad, cc, nn], tvm.const(0., "float32")),
+ A[yy - pad, xx - pad, cc, nn], tvm.tir.const(0., "float32")),
name='Apad')
# Create reduction variables
-rc = tvm.reduce_axis((0, in_channel), name='rc')
-ry = tvm.reduce_axis((0, kernel), name='ry')
-rx = tvm.reduce_axis((0, kernel), name='rx')
+rc = te.reduce_axis((0, in_channel), name='rc')
+ry = te.reduce_axis((0, kernel), name='ry')
+rx = te.reduce_axis((0, kernel), name='rx')
# Compute the convolution
-B = tvm.compute(
+B = te.compute(
(out_size, out_size, out_channel, batch),
- lambda yy, xx, ff, nn: tvm.sum(
+ lambda yy, xx, ff, nn: te.sum(
Apad[yy * stride + ry, xx * stride + rx, rc, nn] * W[ry, rx, rc, ff],
axis=[ry, rx, rc]),
name='B')
#
# Designate the memory hierarchy
-s = tvm.create_schedule(B.op)
+s = te.create_schedule(B.op)
s[Apad].compute_inline() # compute Apad inline
AA = s.cache_read(Apad, 'shared', [B])
WW = s.cache_read(W, "shared", [B])
vthread = 2
# Get the GPU thread indices
-block_x = tvm.thread_axis("blockIdx.x")
-block_y = tvm.thread_axis("blockIdx.y")
-block_z = tvm.thread_axis("blockIdx.z")
-thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x")
-thread_y = tvm.thread_axis((0, num_thread), "threadIdx.y")
-thread_xz = tvm.thread_axis((0, vthread), "vthread", name="vx")
-thread_yz = tvm.thread_axis((0, vthread), "vthread", name="vy")
+block_x = te.thread_axis("blockIdx.x")
+block_y = te.thread_axis("blockIdx.y")
+block_z = te.thread_axis("blockIdx.z")
+thread_x = te.thread_axis((0, num_thread), "threadIdx.x")
+thread_y = te.thread_axis((0, num_thread), "threadIdx.y")
+thread_xz = te.thread_axis((0, vthread), "vthread", name="vx")
+thread_yz = te.thread_axis((0, vthread), "vthread", name="vy")
# Split the workloads
hi, wi, fi, ni = s[B].op.axis
# NHWCnc memory layout.The following code defines the convolution algorithm in TVM.
import tvm
+from tvm import te
import numpy as np
from tvm.contrib import nvcc
block_size)
# Reduction axes
-kh = tvm.reduce_axis((0, kernel_h), name='kh')
-kw = tvm.reduce_axis((0, kernel_w), name='kw')
-ic = tvm.reduce_axis((0, in_channels // block_size), name='ic')
-ii = tvm.reduce_axis((0, block_size), name='ii')
+kh = te.reduce_axis((0, kernel_h), name='kh')
+kw = te.reduce_axis((0, kernel_w), name='kw')
+ic = te.reduce_axis((0, in_channels // block_size), name='ic')
+ii = te.reduce_axis((0, block_size), name='ii')
# Algorithm
-A = tvm.placeholder(data_shape, name='A', dtype="float16")
-W = tvm.placeholder(kernel_shape, name='W', dtype="float16")
-Apad = tvm.compute(
+A = te.placeholder(data_shape, name='A', dtype="float16")
+W = te.placeholder(kernel_shape, name='W', dtype="float16")
+Apad = te.compute(
(batch_size // block_size, height + 2 * pad_h, width + 2 * pad_w, in_channels // block_size, block_size,
block_size),
- lambda n, h, w, i, nn, ii: tvm.if_then_else(
- tvm.all(h >= pad_h, h - pad_h < height,
+ lambda n, h, w, i, nn, ii: tvm.tir.if_then_else(
+ tvm.tir.all(h >= pad_h, h - pad_h < height,
w >= pad_w, w - pad_w < width),
- A[n, h - pad_h, w - pad_w, i, nn, ii], tvm.const(0., "float16")),
+ A[n, h - pad_h, w - pad_w, i, nn, ii], tvm.tir.const(0., "float16")),
name='Apad')
-Conv = tvm.compute(output_shape,
- lambda n, h, w, o, nn, oo: tvm.sum(
+Conv = te.compute(output_shape,
+ lambda n, h, w, o, nn, oo: te.sum(
Apad[n, h * stride_h + kh, w * stride_w + kw, ic, nn, ii].astype("float32") *
W[kh, kw, ic, o, ii, oo].astype("float32"),
axis=[ic, kh, kw, ii]),
name="Conv")
-s = tvm.create_schedule(Conv.op)
+s = te.create_schedule(Conv.op)
s[Apad].compute_inline()
###############################################################################
def intrin_wmma_load_matrix(scope):
n = 16
- A = tvm.placeholder((n, n), name='A', dtype='float16')
- BA = tvm.decl_buffer(A.shape, A.dtype, scope='shared', data_alignment=32, offset_factor=256)
- C = tvm.compute((n, n), lambda i, j: A[i, j], name='C')
- BC = tvm.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=256)
+ A = te.placeholder((n, n), name='A', dtype='float16')
+ BA = tvm.tir.decl_buffer(A.shape, A.dtype, scope='shared', data_alignment=32, offset_factor=256)
+ C = te.compute((n, n), lambda i, j: A[i, j], name='C')
+ BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=256)
def intrin_func(ins, outs):
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
BA = ins[0]
BC = outs[0]
- ib.emit(tvm.call_intrin('handle', 'tvm_load_matrix_sync',
+ ib.emit(tvm.tir.call_intrin('handle', 'tvm_load_matrix_sync',
BC.data, n, n, n, BC.elem_offset // 256,
BA.access_ptr('r'), n, 'row_major'))
return ib.get()
- return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
+ return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
def intrin_wmma_gemm():
n = 16
- A = tvm.placeholder((n, n), name='A', dtype='float16')
- B = tvm.placeholder((n, n), name='B', dtype='float16')
- k = tvm.reduce_axis((0, n), name="k")
- C = tvm.compute((n, n),
+ A = te.placeholder((n, n), name='A', dtype='float16')
+ B = te.placeholder((n, n), name='B', dtype='float16')
+ k = te.reduce_axis((0, n), name="k")
+ C = te.compute((n, n),
lambda ii, jj:
- tvm.sum(A[ii, k].astype('float') * B[k, jj].astype('float'), axis=k),
+ te.sum(A[ii, k].astype('float') * B[k, jj].astype('float'), axis=k),
name='C')
- BA = tvm.decl_buffer(A.shape, A.dtype, name='BA', scope='wmma.matrix_a', data_alignment=32, offset_factor=256)
- BB = tvm.decl_buffer(B.shape, B.dtype, name='BB', scope='wmma.matrix_b', data_alignment=32, offset_factor=256)
- BC = tvm.decl_buffer(C.shape, C.dtype, name='BC', scope='wmma.accumulator', data_alignment=32, offset_factor=256)
+ BA = tvm.tir.decl_buffer(A.shape, A.dtype, name='BA', scope='wmma.matrix_a', data_alignment=32, offset_factor=256)
+ BB = tvm.tir.decl_buffer(B.shape, B.dtype, name='BB', scope='wmma.matrix_b', data_alignment=32, offset_factor=256)
+ BC = tvm.tir.decl_buffer(C.shape, C.dtype, name='BC', scope='wmma.accumulator', data_alignment=32, offset_factor=256)
def intrin_func(ins, outs):
BA, BB = ins
BC, = outs
def init():
- ib = tvm.ir_builder.create()
- ib.emit(tvm.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, n, n, BC.elem_offset // 256, 0.0))
+ ib = tvm.tir.ir_builder.create()
+ ib.emit(tvm.tir.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, n, n, BC.elem_offset // 256, 0.0))
return ib.get()
def update():
- ib = tvm.ir_builder.create()
- ib.emit(tvm.call_intrin('handle', 'tvm_mma_sync',
+ ib = tvm.tir.ir_builder.create()
+ ib.emit(tvm.tir.call_intrin('handle', 'tvm_mma_sync',
BC.data, BC.elem_offset // 256,
BA.data, BA.elem_offset // 256,
BB.data, BB.elem_offset // 256,
return update(), init(), update()
- return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, B: BB, C: BC})
+ return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, B: BB, C: BC})
def intrin_wmma_store_matrix():
n = 16
- A = tvm.placeholder((n, n), name='A', dtype='float32')
- BA = tvm.decl_buffer(A.shape, A.dtype, scope='wmma.accumulator', data_alignment=32, offset_factor=256)
- C = tvm.compute((n, n), lambda i, j: A[i, j], name='C')
- BC = tvm.decl_buffer(C.shape, C.dtype, scope='global', data_alignment=32, offset_factor=256)
+ A = te.placeholder((n, n), name='A', dtype='float32')
+ BA = tvm.tir.decl_buffer(A.shape, A.dtype, scope='wmma.accumulator', data_alignment=32, offset_factor=256)
+ C = te.compute((n, n), lambda i, j: A[i, j], name='C')
+ BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope='global', data_alignment=32, offset_factor=256)
def intrin_func(ins, outs):
- ib = tvm.ir_builder.create()
+ ib = tvm.tir.ir_builder.create()
BA = ins[0]
BC = outs[0]
- ib.emit(tvm.call_intrin('handle', 'tvm_store_matrix_sync',
+ ib.emit(tvm.tir.call_intrin('handle', 'tvm_store_matrix_sync',
BA.data, n, n, n, BA.elem_offset // 256,
BC.access_ptr('w'), n, 'row_major'))
return ib.get()
- return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
+ return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
###############################################################################
# Scheduling the Computation
warp_size = 32
chunk = 2
-block_x = tvm.thread_axis('blockIdx.x')
-block_y = tvm.thread_axis('blockIdx.y')
-block_z = tvm.thread_axis('blockIdx.z')
-thread_x = tvm.thread_axis('threadIdx.x')
-thread_y = tvm.thread_axis('threadIdx.y')
-thread_z = tvm.thread_axis('threadIdx.z')
+block_x = te.thread_axis('blockIdx.x')
+block_y = te.thread_axis('blockIdx.y')
+block_z = te.thread_axis('blockIdx.z')
+thread_x = te.thread_axis('threadIdx.x')
+thread_y = te.thread_axis('threadIdx.y')
+thread_z = te.thread_axis('threadIdx.z')
nc, hc, wc, oc, nnc, ooc = Conv.op.axis
block_k = s[Conv].fuse(hc, wc)
ctx = tvm.gpu(0)
if nvcc.have_tensorcore(ctx.compute_version):
- with tvm.build_config(auto_unroll_max_step=16):
+ with tvm.target.build_config(auto_unroll_max_step=16):
func = tvm.build(s, [A, W, Conv], 'cuda')
a_np = np.random.uniform(size=data_shape).astype(A.dtype)
w_np = np.random.uniform(size=kernel_shape).astype(W.dtype)
# Then we write a baseline implementation, the simplest way to write a matrix multiplication in TVM.
import tvm
+from tvm import te
import numpy
import timeit
answer = numpy.dot(a.asnumpy(), b.asnumpy())
# Algorithm
-k = tvm.reduce_axis((0, K), 'k')
-A = tvm.placeholder((M, K), name='A')
-B = tvm.placeholder((K, N), name='B')
-C = tvm.compute(
+k = te.reduce_axis((0, K), 'k')
+A = te.placeholder((M, K), name='A')
+B = te.placeholder((K, N), name='B')
+C = te.compute(
(M, N),
- lambda x, y: tvm.sum(A[x, k] * B[k, y], axis=k),
+ lambda x, y: te.sum(A[x, k] * B[k, y], axis=k),
name='C')
# Default schedule
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
func = tvm.build(s, [A, B, C], target=target, name='mmult')
assert func
# fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB (L1 data cache)
bn = 32
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
# Blocking by loop tiling
xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)
#
# In this tutorial, we chose to vectorize the inner loop row data since it is cache friendly.
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)
k, = s[C].op.reduce_axis
ko, ki = s[C].split(k, factor=4)
# which is not cache friendly. If we change the nested loop order of ki and inner axes xi,
# the access pattern for A matrix is more cache friendly.
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)
k, = s[C].op.reduce_axis
ko, ki = s[C].split(k, factor=4)
#
# We have to re-write the algorithm slightly.
-packedB = tvm.compute((N / bn, K, bn), lambda x, y, z: B[y, x * bn + z], name='packedB')
-C = tvm.compute((M, N),
- lambda x, y: tvm.sum(A[x, k] * packedB[y // bn, k, tvm.indexmod(y, bn)], axis=k),
+packedB = te.compute((N / bn, K, bn), lambda x, y, z: B[y, x * bn + z], name='packedB')
+C = te.compute((M, N),
+ lambda x, y: te.sum(A[x, k] * packedB[y // bn, k, tvm.tir.indexmod(y, bn)], axis=k),
name = 'C')
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)
k, = s[C].op.reduce_axis
# write to C when all the block results are ready.
#
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
# Allocate write cache
CC = s.cache_write(C, 'global')
# --------
# Futhermore, we can also utilize multi-core processors to do the thread-level parallelization.
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
CC = s.cache_write(C, 'global')
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from tvm.contrib import nvcc
def matmul_nn(A, B, L, dtype='float16', layout='NN'):
- k = tvm.reduce_axis((0, L), name='k')
+ k = te.reduce_axis((0, L), name='k')
if dtype == 'float16':
out_type = 'float'
elif dtype == 'int8':
elif dtype == 'int4' or dtype == 'int1':
out_type = 'int'
if (layout == 'NN'):
- return tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k].astype(out_type) * B[k, j].astype(out_type), axis=k))
+ return te.compute((N, M), lambda i, j: te.sum(A[i, k].astype(out_type) * B[k, j].astype(out_type), axis=k))
if (layout == 'NT'):
- return tvm.compute((N, M), lambda i, j: tvm.sum(A[k, i].astype(out_type) * B[k, j].astype(out_type), axis=k))
+ return te.compute((N, M), lambda i, j: te.sum(A[k, i].astype(out_type) * B[k, j].astype(out_type), axis=k))
if (layout == 'TN'):
- return tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k].astype(out_type) * B[j, k].astype(out_type), axis=k))
+ return te.compute((N, M), lambda i, j: te.sum(A[i, k].astype(out_type) * B[j, k].astype(out_type), axis=k))
if (layout == 'TT'):
- return tvm.compute((N, M), lambda i, j: tvm.sum(A[k, i].astype(out_type) * B[j, k].astype(out_type), axis=k))
+ return te.compute((N, M), lambda i, j: te.sum(A[k, i].astype(out_type) * B[j, k].astype(out_type), axis=k))
###############################################################################
# Scheduling the Computation
# (2) The warp tile size is not 16x16x16 on CUDA9, or not one of {16x16x16, 32x8x16, 8x32x16} on CUDA version >= 10.0.
#
# In this schedule, storage_align is used to reduce bank conflicts of shared memory. Please refer to this
-# `doc <https://docs.tvm.ai/api/python/schedule.html#tvm.schedule.Stage.storage_align>`_
+# `doc <https://docs.tvm.ai/api/python/schedule.html#tvm.te.schedule.Stage.storage_align>`_
# for the usage of storage_align primitive. In short, we need to add an offset to some shared memory buffer
# to reduce bank conflicts.
# According to the `wmma doc <https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#wmma-description>`_,
else:
print ("Unsupported layout:", layout)
sys.exit(1);
- A = tvm.placeholder(shape_a, name='A', dtype=dtype)
- B = tvm.placeholder(shape_b, name='B', dtype=dtype)
+ A = te.placeholder(shape_a, name='A', dtype=dtype)
+ B = te.placeholder(shape_b, name='B', dtype=dtype)
C = matmul_nn(A, B, L, dtype, layout)
- s = tvm.create_schedule(C.op)
+ s = te.create_schedule(C.op)
y, x = s[C].op.axis
k = s[C].op.reduce_axis[0]
tz, xi = s[C].split(xi, WX)
tx, xi = s[C].split(xi, TX)
s[C].reorder(yo, xo, tz, ty, tx, yi, xi)
- s[C].bind(yo, tvm.thread_axis("blockIdx.y"))
- s[C].bind(xo, tvm.thread_axis("blockIdx.x"))
- s[C].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[C].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[C].bind(yo, te.thread_axis("blockIdx.y"))
+ s[C].bind(xo, te.thread_axis("blockIdx.x"))
+ s[C].bind(ty, te.thread_axis("threadIdx.y"))
+ s[C].bind(tz, te.thread_axis("threadIdx.z"))
+ s[C].bind(tx, te.thread_axis("threadIdx.x"))
# schedule for CL stage
ko, ki = s[CL].split(k, step_k * warp_tile_k)
tx, vec = s[AA].split(tx, factor=v)
fused = s[AA].fuse(s[AA].op.axis[0], xo)
_, ty = s[AA].split(fused, factor=by)
- s[AA].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[AA].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[AA].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[AA].bind(ty, te.thread_axis("threadIdx.y"))
+ s[AA].bind(tz, te.thread_axis("threadIdx.z"))
+ s[AA].bind(tx, te.thread_axis("threadIdx.x"))
# vectorization is very important for float16/int8 inputs
s[AA].vectorize(vec)
tx, vec = s[BB].split(tx, factor=v)
fused = s[BB].fuse(s[BB].op.axis[0], xo)
_, ty = s[BB].split(fused, factor=by)
- s[BB].bind(ty, tvm.thread_axis("threadIdx.y"))
- s[BB].bind(tz, tvm.thread_axis("threadIdx.z"))
- s[BB].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[BB].bind(ty, te.thread_axis("threadIdx.y"))
+ s[BB].bind(tz, te.thread_axis("threadIdx.z"))
+ s[BB].bind(tx, te.thread_axis("threadIdx.x"))
s[BB].vectorize(vec)
s[AL].compute_at(s[CL], kl)
print(best_config)
with autotvm.apply_history_best('matmul.log'):
with tvm.target.create("cuda"):
- with tvm.build_config():
+ with tvm.target.build_config():
s, arg_bufs = test_gemm(N, L, M, dtype, layout)
print(tvm.lower(s, arg_bufs, simple_mode=True))
func = tvm.build(s, arg_bufs)
from tvm import relay
from tvm.relay import testing
import tvm
+from tvm import te
from tvm.contrib import graph_runtime
######################################################################
from __future__ import absolute_import, print_function
import tvm
+from tvm import te
import numpy as np
# Global declarations of environment.
# No computation happens during this phase, as we are only declaring how
# the computation should be done.
#
-n = tvm.var("n")
-A = tvm.placeholder((n,), name='A')
-B = tvm.placeholder((n,), name='B')
-C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
+n = te.var("n")
+A = te.placeholder((n,), name='A')
+B = te.placeholder((n,), name='B')
+C = te.compute(A.shape, lambda i: A[i] + B[i], name="C")
print(type(C))
######################################################################
# C[i] = A[i] + B[i];
# }
#
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
######################################################################
# We used the split construct to split the first axis of C,
# to generate code that runs on GPU.
#
if tgt == "cuda" or tgt == "rocm" or tgt.startswith('opencl'):
- s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
- s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
+ s[C].bind(bx, te.thread_axis("blockIdx.x"))
+ s[C].bind(tx, te.thread_axis("threadIdx.x"))
######################################################################
# Compilation
# arrays with different shapes into fadd, an error will be raised.
#
# We can do more specializations. For example, we can write
-# :code:`n = tvm.convert(1024)` instead of :code:`n = tvm.var("n")`,
+# :code:`n = tvm.runtime.convert(1024)` instead of :code:`n = te.var("n")`,
# in the computation declaration. The generated function will
# only take vectors with length 1024.
#
from __future__ import absolute_import, print_function
import tvm
+from tvm import te
import topi
import numpy as np
# To compute the sum of rows of a two dimensional TVM tensor A, we should
# specify the symbolic operation as well as schedule as follows
#
-n = tvm.var("n")
-m = tvm.var("m")
-A = tvm.placeholder((n, m), name='A')
-k = tvm.reduce_axis((0, m), "k")
-B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="B")
-s = tvm.create_schedule(B.op)
+n = te.var("n")
+m = te.var("m")
+A = te.placeholder((n, m), name='A')
+k = te.reduce_axis((0, m), "k")
+B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B")
+s = te.create_schedule(B.op)
######################################################################
# and to examine the IR code in human readable format, we can do
######################################################################
# However, for such a common operation we had to define the reduce axis ourselves as well as explicit computation with
-# :code:`tvm.compute`. Imagine for more complicated operations how much details we need to provide.
+# :code:`te.compute`. Imagine for more complicated operations how much details we need to provide.
# Fortunately, we can replace those two lines with simple :code:`topi.sum` much like :code:`numpy.sum`
#
C = topi.sum(A, axis=1)
-ts = tvm.create_schedule(C.op)
+ts = te.create_schedule(C.op)
print(tvm.lower(ts, [A], simple_mode=True))
######################################################################
# Even shorter, TOPI provides operator overloading for such common operations. For example,
#
x, y = 100, 10
-a = tvm.placeholder((x, y, y), name="a")
-b = tvm.placeholder((y, y), name="b")
+a = te.placeholder((x, y, y), name="a")
+b = te.placeholder((y, y), name="b")
c = a + b # same as topi.broadcast_add
d = a * b # same as topi.broadcast_mul
######################################################################
# TOPI also provides common neural nets operations such as _softmax_ with optimized schedule
#
-tarray = tvm.placeholder((512, 512), name="tarray")
+tarray = te.placeholder((512, 512), name="tarray")
softmax_topi = topi.nn.softmax(tarray)
with tvm.target.create("cuda"):
sst = topi.cuda.schedule_softmax(softmax_topi)
# compute declaration and schedule. TVM will choose the right function to call with
# the target information.
-data = tvm.placeholder((1, 3, 224, 224))
-kernel = tvm.placeholder((10, 3, 5, 5))
+data = te.placeholder((1, 3, 224, 224))
+kernel = te.placeholder((10, 3, 5, 5))
with tvm.target.create("cuda"):
conv = topi.cuda.conv2d_nchw(data, kernel, 1, 2, 1)
# under the License.
import tvm
+from tvm import te
import ctypes
import os.path as osp
from sys import platform
# under the License.
import tvm
+from tvm import te
import numpy as np
import tsim
import sys
"""
def slice(A, slice_width):
assert np.log2(slice_width) % 1 == 0, "only power of 2 is supported"
- dtype = type(A[0])
+ dtype = type(A[0])
row = 0
# currently only supports uint
if dtype is np.uint8: row = 8 // slice_width
else:
dtype = 'uint8'
- C = np.zeros((row, len(A))).astype(dtype) # sliced and transform
+ C = np.zeros((row, len(A))).astype(dtype) # sliced and transform
# create mask
slice_mask = 2**(slice_width)-1
def slice_mat(A, slice_width):
assert np.log2(slice_width) % 1 == 0, "only power of 2 is supported"
- dtype = type(A[0][0])
+ dtype = type(A[0][0])
row = 0
# currently only supports uint
if dtype is np.uint8: row = 8 // slice_width
dtype = 'uint8'
# 3d array (bits, row, clmn)
- C = np.zeros((row, A.shape[0], A.shape[1])).astype(dtype) # sliced and transform
+ C = np.zeros((row, A.shape[0], A.shape[1])).astype(dtype) # sliced and transform
# create mask
slice_mask = 2**(slice_width)-1
for i in range(len(a_arr)):
for j in range(len(b_arr)):
shift = np.uint8(i*i_width + j*w_width)
- if i == 0 and j == 0:
+ if i == 0 and j == 0:
cycles += f(b_arr[j], a_arr[i], shift, accum, np.uint32(1)) # reset accumulator
- else:
+ else:
cycles += f(b_arr[j], a_arr[i], shift, accum, np.uint32(0)) # no reset
return (accum.asnumpy(), cycles)
""" Matrix Generator
Parameters
-----------
+----------
dtype : String, datatype generated (supports only uint)
i_width : weight bit slices(needs to be less than actual bit width)
w_width : activation bit slices(needs to be less than actual bit width)
def top_test(dtype, i_width, w_width):
# only supports positive values (up to 2**(bits-1))
- rmax = 127
+ rmax = 127
# (m,16) * (16,16) GEMM
- rrow = np.random.randint(7) + 1
+ rrow = np.random.randint(7) + 1
clmn = 16
A = np.random.randint(rmax, size=(rrow,clmn)).astype(dtype)
B = np.random.randint(rmax, size=(clmn,clmn)).astype(dtype)
for i in range(1):
# reg1 and reg2 bits in hardware/chisel/src/main/Compute.scala must be modified for slices greater than 8 bits
if sys.argv[1] == 'serial':
- # generates a random uint8 GEMM with 2-bit(8/4) input and 4-bit(8/2) weight
+ # generates a random uint8 GEMM with 2-bit(8/4) input and 4-bit(8/2) weight
top_test("uint8", 4, 2)
elif sys.argv[1] == 'parallel':
- # generates a random uint8 GEMM with 8-bit input and 8-bit weight (bit parallel)
+ # generates a random uint8 GEMM with 8-bit input and 8-bit weight (bit parallel)
top_test('uint8', 8, 8)
# under the License.
import tvm
+from tvm import te
import ctypes
import os.path as osp
from sys import platform
# under the License.
import tvm
+from tvm import te
import numpy as np
import tsim
# under the License.
import tvm
+from tvm import te
import numpy as np
import tsim
# under the License.
# pylint: disable=unused-argument
"""VTA specific buildin for runtime."""
-from __future__ import absolute_import as _abs
-
import tvm
from . import ir_pass
from .environment import get_env
def lift_coproc_scope(x):
"""Lift coprocessings cope to the """
x = ir_pass.lift_alloc_to_scope_begin(x)
- x = tvm.ir_pass.LiftAttrScope(x, "coproc_scope", False)
+ x = tvm.tir.ir_pass.LiftAttrScope(x, "coproc_scope", False)
return x
def early_rewrite(stmt):
"""Try to do storage rewrite in early pass."""
try:
- return tvm.ir_pass.StorageRewrite(stmt)
+ return tvm.tir.ir_pass.StorageRewrite(stmt)
except tvm.error.TVMError:
return stmt
"""
env = get_env()
def add_debug(stmt):
- debug = tvm.call_extern(
+ debug = tvm.tir.call_extern(
"int32", "VTASetDebugMode",
env.dev.command_handle,
debug_flag)
(1, ir_pass.inject_dma_intrin),
(1, ir_pass.inject_skip_copy),
(1, ir_pass.annotate_alu_coproc_scope),
- (1, lambda x: tvm.ir_pass.LiftAttrScope(x, "coproc_uop_scope", True)),
+ (1, lambda x: tvm.tir.ir_pass.LiftAttrScope(x, "coproc_uop_scope", True)),
(1, lift_coproc_scope),
(1, ir_pass.inject_coproc_sync),
(1, early_rewrite)]
if debug_flag:
pass_list.append((1, add_debug))
pass_list.append((2, ir_pass.inject_alu_intrin))
- pass_list.append((3, tvm.ir_pass.LowerStorageAccessInfo))
+ pass_list.append((3, tvm.tir.ir_pass.LowerStorageAccessInfo))
pass_list.append((3, ir_pass.fold_uop_loop))
pass_list.append((3, ir_pass.cpu_access_rewrite))
- return tvm.build_config(add_lower_pass=pass_list, **kwargs)
+ return tvm.target.build_config(add_lower_pass=pass_list, **kwargs)
def lower(*args, **kwargs):
import json
import copy
import tvm
+from tvm import te
from . import intrin
from .pkg_config import PkgConfig
QID_COMPUTE = 2
def __init__(self, env):
- self.vta_axis = tvm.thread_axis("vta")
+ self.vta_axis = te.thread_axis("vta")
self.vta_push_uop = tvm.tir.StringImm("VTAPushGEMMOp")
- ctx = tvm.call_extern("handle", "VTATLSCommandHandle")
+ ctx = tvm.tir.call_extern("handle", "VTATLSCommandHandle")
self.command_handle = tvm.tir.Call(
"handle", "tvm_thread_context", [ctx],
tvm.tir.Call.Intrinsic, None, 0)
@tvm.register_func("tvm.intrin.rule.default.vta.coproc_sync")
def coproc_sync(op):
_ = op
- return tvm.call_extern(
+ return tvm.tir.call_extern(
"int32", "VTASynchronize",
get_env().dev.command_handle, 1<<31)
@tvm.register_func("tvm.intrin.rule.default.vta.coproc_dep_push")
def coproc_dep_push(op):
- return tvm.call_extern(
+ return tvm.tir.call_extern(
"int32", "VTADepPush",
get_env().dev.command_handle,
op.args[0], op.args[1])
@tvm.register_func("tvm.intrin.rule.default.vta.coproc_dep_pop")
def coproc_dep_pop(op):
- return tvm.call_extern(
+ return tvm.tir.call_extern(
"int32", "VTADepPop",
get_env().dev.command_handle,
op.args[0], op.args[1])
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
def gemm(env, mock=False):
"""Matrix-matrix multiply intrinsic
out_shape = (env.BATCH, env.BLOCK_OUT)
assert out_shape[0] * out_shape[1] == out_lanes
- wgt = tvm.placeholder((wgt_shape[0], wgt_shape[1]),
- dtype="int%d" % env.WGT_WIDTH,
- name=env.wgt_scope)
- inp = tvm.placeholder((inp_shape[0], inp_shape[1]),
- dtype="int%d" % env.INP_WIDTH,
- name=env.inp_scope)
- k = tvm.reduce_axis((0, wgt_shape[1]), name="k")
+ wgt = te.placeholder((wgt_shape[0], wgt_shape[1]),
+ dtype="int%d" % env.WGT_WIDTH,
+ name=env.wgt_scope)
+ inp = te.placeholder((inp_shape[0], inp_shape[1]),
+ dtype="int%d" % env.INP_WIDTH,
+ name=env.inp_scope)
+ k = te.reduce_axis((0, wgt_shape[1]), name="k")
out_dtype = "int%d" % env.ACC_WIDTH
- out = tvm.compute((out_shape[0], out_shape[1]),
- lambda i, j: tvm.sum(inp[i, k].astype(out_dtype) *
- wgt[j, k].astype(out_dtype),
- axis=[k]),
- name="out")
- wgt_layout = tvm.decl_buffer(
+ out = te.compute((out_shape[0], out_shape[1]),
+ lambda i, j: te.sum(inp[i, k].astype(out_dtype) *
+ wgt[j, k].astype(out_dtype),
+ axis=[k]),
+ name="out")
+ wgt_layout = tvm.tir.decl_buffer(
wgt.shape, wgt.dtype, env.wgt_scope,
scope=env.wgt_scope, offset_factor=wgt_lanes, data_alignment=wgt_lanes)
- inp_layout = tvm.decl_buffer(
+ inp_layout = tvm.tir.decl_buffer(
inp.shape, inp.dtype, env.inp_scope,
scope=env.inp_scope, offset_factor=inp_lanes, data_alignment=inp_lanes)
- out_layout = tvm.decl_buffer(
+ out_layout = tvm.tir.decl_buffer(
out.shape, out.dtype, env.acc_scope,
scope=env.acc_scope, offset_factor=out_lanes, data_alignment=out_lanes)
dout = outs[0]
def instr(index):
"""Generate matrix-matrix multiply VTA instruction"""
- irb = tvm.ir_builder.create()
+ irb = tvm.tir.ir_builder.create()
dev = env.dev
irb.scope_attr(dev.vta_axis, "coproc_scope",
dev.get_task_qid(dev.QID_COMPUTE))
irb.scope_attr(dev.vta_axis, "coproc_uop_scope",
dev.vta_push_uop)
if index in (0, 2):
- irb.emit(tvm.call_extern(
+ irb.emit(tvm.tir.call_extern(
"int32", "VTAUopPush",
0, 0,
dout.access_ptr("rw", "int32"),
dwgt.access_ptr("r", "int32"),
0, 0, 0))
else:
- irb.emit(tvm.call_extern(
+ irb.emit(tvm.tir.call_extern(
"int32", "VTAUopPush",
0, 1,
dout.access_ptr("rw", "int32"),
return (nop, nop, nop)
return (instr(0), instr(1), instr(2))
- return tvm.decl_tensor_intrin(out.op, intrin_func,
- name="GEMM",
- binds={inp: inp_layout,
- wgt: wgt_layout,
- out: out_layout})
+ return te.decl_tensor_intrin(out.op, intrin_func,
+ name="GEMM",
+ binds={inp: inp_layout,
+ wgt: wgt_layout,
+ out: out_layout})
"""Additional IR Pass for VTA"""
# pylint: disable=len-as-condition, no-else-return
import tvm
+from tvm import te
from topi import util
from .environment import get_env
fail[0] = True
return op
if gemm_offsets[i] is not None:
- if not tvm.ir_pass.Equal(m[0], gemm_offsets[i]):
+ if not tvm.tir.ir_pass.Equal(m[0], gemm_offsets[i]):
fail[0] = True
return op
args.append(m[1])
gemm_offsets[i] = m[0]
args.append(m[1])
args += op.args[base_args+3:]
- return tvm.call_extern("int32", "VTAUopPush", *args)
+ return tvm.tir.call_extern("int32", "VTAUopPush", *args)
if op.name not in ("VTATLSCommandHandle", "tvm_thread_context"):
raise RuntimeError("unexpected op %s" % op)
return op
- ret = tvm.ir_pass.IRTransform(
+ ret = tvm.tir.ir_pass.IRTransform(
stmt.body, None, _post_order, ["Call"])
if not fail[0] and all(x is not None for x in gemm_offsets):
def _visit(op):
if op.same_as(loop_var):
fail[0] = True
- tvm.ir_pass.PostOrderVisit(ret, _visit)
+ tvm.tir.ir_pass.PostOrderVisit(ret, _visit)
if not fail[0]:
- begin = tvm.call_extern(
+ begin = tvm.tir.call_extern(
"int32", "VTAUopLoopBegin", stmt.extent, *gemm_offsets)
- end = tvm.call_extern("int32", "VTAUopLoopEnd")
+ end = tvm.tir.call_extern("int32", "VTAUopLoopEnd")
return [begin, ret, end]
raise ValueError("Failed to fold the GEMM instructions..")
return tvm.tir.AttrStmt(
stmt.node, stmt.attr_key, stmt.value, body)
return None
- out = tvm.ir_pass.IRTransform(
+ out = tvm.tir.ir_pass.IRTransform(
stmt_in, _do_fold, None, ["AttrStmt"])
return out
return None
new_var = rw_info[buffer_var]
let_stmt = tvm.tir.LetStmt(
- new_var, tvm.call_extern(
+ new_var, tvm.tir.call_extern(
"handle", "VTABufferCPUPtr",
env.dev.command_handle,
buffer_var), op.body)
if isinstance(op, tvm.tir.Load):
buffer_var = op.buffer_var
if not buffer_var in rw_info:
- rw_info[buffer_var] = tvm.var(
+ rw_info[buffer_var] = te.var(
buffer_var.name + "_ptr", "handle")
new_var = rw_info[buffer_var]
return tvm.tir.Load(op.dtype, new_var, op.index)
if isinstance(op, tvm.tir.Store):
buffer_var = op.buffer_var
if not buffer_var in rw_info:
- rw_info[buffer_var] = tvm.var(
+ rw_info[buffer_var] = te.var(
buffer_var.name + "_ptr", "handle")
new_var = rw_info[buffer_var]
return tvm.tir.Store(new_var, op.value, op.index)
raise RuntimeError("not reached")
- stmt = tvm.ir_pass.IRTransform(
+ stmt = tvm.tir.ir_pass.IRTransform(
stmt_in, None, _post_order, ["Allocate", "Load", "Store"])
for buffer_var, new_var in rw_info.items():
stmt = tvm.tir.LetStmt(
- new_var, tvm.call_extern(
+ new_var, tvm.tir.call_extern(
"handle", "VTABufferCPUPtr",
env.dev.command_handle,
buffer_var), stmt)
if isinstance(op, tvm.tir.For):
return _merge_block(lift_stmt.pop() + [op], op.body)
raise RuntimeError("not reached")
- stmt = tvm.ir_pass.IRTransform(
+ stmt = tvm.tir.ir_pass.IRTransform(
stmt_in, _pre_order, _post_order, ["Allocate", "AttrStmt", "For"])
assert len(lift_stmt) == 1
return _merge_block(lift_stmt[0], stmt)
if _match_pragma(stmt, "skip_dma_copy"):
return tvm.tir.Evaluate(0)
return None
- return tvm.ir_pass.IRTransform(
+ return tvm.tir.ir_pass.IRTransform(
stmt_in, _do_fold, None, ["AttrStmt"])
op.loop_var, op.min, 2, op.for_type,
op.device_api, op.body)
return None
- stmt = tvm.ir_pass.IRTransform(
+ stmt = tvm.tir.ir_pass.IRTransform(
stmt_in, None, _do_fold, ["AttrStmt"])
- stmt = tvm.ir_pass.CoProcSync(stmt)
+ stmt = tvm.tir.ir_pass.CoProcSync(stmt)
return stmt
Transformed statement
"""
env = get_env()
- idxd = tvm.indexdiv
- idxm = tvm.indexmod
+ idxd = tvm.tir.indexdiv
+ idxm = tvm.tir.indexmod
def _check_compact(buf):
ndim = len(buf.shape)
- size = tvm.const(1, buf.shape[0].dtype)
+ size = tvm.tir.const(1, buf.shape[0].dtype)
for i in reversed(range(ndim)):
if not util.equal_const_int(size - buf.strides[i], 0):
raise RuntimeError(
break
x_size = x_size * buf.shape[k]
next_base = i + 1
- shape.append(tvm.ir_pass.Simplify(x_size))
+ shape.append(tvm.tir.ir_pass.Simplify(x_size))
strides.append(x_stride)
assert next_base != base
base = next_base
_check_compact(src)
x_size, y_size, x_stride, offset = _get_2d_pattern(
dst, elem_width, elem_bytes, data_type, src.scope, allow_fold=True)
- irb = tvm.ir_builder.create()
+ irb = tvm.tir.ir_builder.create()
irb.scope_attr(env.dev.vta_axis, "coproc_scope",
env.dev.get_task_qid(task_qid))
- irb.emit(tvm.call_extern(
+ irb.emit(tvm.tir.call_extern(
"int32", "VTAStoreBuffer2D",
env.dev.command_handle,
src.access_ptr("r", "int32"),
src, elem_width, elem_bytes, data_type,
dst.scope, allow_fold=allow_fold)
- irb = tvm.ir_builder.create()
+ irb = tvm.tir.ir_builder.create()
irb.scope_attr(env.dev.vta_axis, "coproc_scope",
env.dev.get_task_qid(task_qid))
- irb.emit(tvm.call_extern(
+ irb.emit(tvm.tir.call_extern(
"int32", "VTALoadBuffer2D",
env.dev.command_handle,
src.data, offset, x_size, y_size, x_stride,
else:
raise RuntimeError("Do not support copy %s->%s" % (src.scope, dst.scope))
- return tvm.ir_pass.InjectCopyIntrin(stmt_in, "dma_copy", _inject_copy)
+ return tvm.tir.ir_pass.InjectCopyIntrin(stmt_in, "dma_copy", _inject_copy)
def _get_gemm_intrin_buffer():
assert out_lanes == env.BATCH * env.BLOCK_OUT
out_shape = (env.BATCH, env.BLOCK_OUT)
assert out_shape[0] * out_shape[1] == out_lanes
- wgt = tvm.placeholder((wgt_shape[0], wgt_shape[1]),
- dtype="int%d" % env.WGT_WIDTH,
- name=env.wgt_scope)
- inp = tvm.placeholder((inp_shape[0], inp_shape[1]),
- dtype="int%d" % env.INP_WIDTH,
- name=env.inp_scope)
- k = tvm.reduce_axis((0, wgt_shape[1]), name="k")
+ wgt = te.placeholder((wgt_shape[0], wgt_shape[1]),
+ dtype="int%d" % env.WGT_WIDTH,
+ name=env.wgt_scope)
+ inp = te.placeholder((inp_shape[0], inp_shape[1]),
+ dtype="int%d" % env.INP_WIDTH,
+ name=env.inp_scope)
+ k = te.reduce_axis((0, wgt_shape[1]), name="k")
out_dtype = "int%d" % env.ACC_WIDTH
- out = tvm.compute((out_shape[0], out_shape[1]),
- lambda i, j: tvm.sum(inp[i, k].astype(out_dtype) *
- wgt[j, k].astype(out_dtype),
- axis=[k]),
- name="out")
- wgt_layout = tvm.decl_buffer(
+ out = te.compute((out_shape[0], out_shape[1]),
+ lambda i, j: te.sum(inp[i, k].astype(out_dtype) *
+ wgt[j, k].astype(out_dtype),
+ axis=[k]),
+ name="out")
+ wgt_layout = tvm.tir.decl_buffer(
wgt.shape, wgt.dtype, env.wgt_scope,
scope=env.wgt_scope, offset_factor=wgt_lanes, data_alignment=wgt_lanes)
- inp_layout = tvm.decl_buffer(
+ inp_layout = tvm.tir.decl_buffer(
inp.shape, inp.dtype, env.inp_scope,
scope=env.inp_scope, offset_factor=inp_lanes, data_alignment=inp_lanes)
- out_layout = tvm.decl_buffer(
+ out_layout = tvm.tir.decl_buffer(
out.shape, out.dtype, env.acc_scope,
scope=env.acc_scope, offset_factor=out_lanes, data_alignment=out_lanes)
def _do_fold(op):
if _match_pragma(op, "conv2d_transpose_gemm"):
is_init = ".init" in str(op)
- tvm.ir_pass.PostOrderVisit(op, _find_basics)
+ tvm.tir.ir_pass.PostOrderVisit(op, _find_basics)
if is_init:
# create inner most block
- irb = tvm.ir_builder.create()
+ irb = tvm.tir.ir_builder.create()
dev = env.dev
irb.scope_attr(dev.vta_axis, "coproc_scope", dev.get_task_qid(dev.QID_COMPUTE))
irb.scope_attr(dev.vta_axis, "coproc_uop_scope", dev.vta_push_uop)
- irb.emit(tvm.call_extern("int32", "VTAUopPush",
- 0, 1,
- dout.access_ptr("rw", "int32"),
- 0, 0,
- 0, 0, 0))
+ irb.emit(tvm.tir.call_extern("int32", "VTAUopPush",
+ 0, 1,
+ dout.access_ptr("rw", "int32"),
+ 0, 0,
+ 0, 0, 0))
inner = irb.get()
# TODO(@tmoreau89): This is only a temporary fix, please take a look.
body = op.body.body
- while isinstance(body, tvm.stmt.IfThenElse):
+ while isinstance(body, tvm.tir.IfThenElse):
body = body.then_case
args = body.args
res_tensor = body.func.output(0)
tpl = (args[0], 1, args[1], 1, args[2], 1, args[3], 1, 0, 1, 0, env.BLOCK_OUT)
inner = tvm.tir.AttrStmt(
[dout, res_tensor], 'buffer_bind_scope',
- tvm.call_intrin('handle', 'tvm_tuple', *tpl), inner)
+ tvm.tir.call_intrin('handle', 'tvm_tuple', *tpl), inner)
return inner
else:
conv_call, data_call, kernel_call = calls[-3:]
if selects:
condition = selects[0].condition
else:
- condition = tvm.const(1, 'int')
+ condition = tvm.tir.const(1, 'int')
# create inner most block
- irb = tvm.ir_builder.create()
+ irb = tvm.tir.ir_builder.create()
with irb.if_scope(condition):
dev = env.dev
irb.scope_attr(dev.vta_axis, "coproc_scope", dev.get_task_qid(dev.QID_COMPUTE))
irb.scope_attr(dev.vta_axis, "coproc_uop_scope", dev.vta_push_uop)
- irb.emit(tvm.call_extern("int32", "VTAUopPush",
- 0, 0,
- dout.access_ptr("rw", "int32"),
- dinp.access_ptr("r", "int32"),
- dwgt.access_ptr("r", "int32"),
- 0, 0, 0))
+ irb.emit(tvm.tir.call_extern("int32", "VTAUopPush",
+ 0, 0,
+ dout.access_ptr("rw", "int32"),
+ dinp.access_ptr("r", "int32"),
+ dwgt.access_ptr("r", "int32"),
+ 0, 0, 0))
inner = irb.get()
args = conv_call.args
1, 0, 1, 0, env.BLOCK_OUT)
inner = tvm.tir.AttrStmt(
[dout, res_tensor], 'buffer_bind_scope',
- tvm.call_intrin('handle', 'tvm_tuple', *tpl), inner)
+ tvm.tir.call_intrin('handle', 'tvm_tuple', *tpl), inner)
args = kernel_call.args
tpl = (args[0], 1, args[1], 1, args[2], 1, args[3],
1, 0, env.BLOCK_OUT, 0, env.BLOCK_IN)
inner = tvm.tir.AttrStmt(
[dwgt, kernel_tensor], 'buffer_bind_scope',
- tvm.call_intrin('handle', 'tvm_tuple', *tpl), inner)
+ tvm.tir.call_intrin('handle', 'tvm_tuple', *tpl), inner)
args = data_call.args
tpl = (args[0], 1, args[1], 1, args[2], 1, args[3],
1, 0, 1, 0, env.BLOCK_IN)
inner = tvm.tir.AttrStmt(
[dinp, pad_data_tensor], 'buffer_bind_scope',
- tvm.call_intrin('handle', 'tvm_tuple', *tpl), inner)
+ tvm.tir.call_intrin('handle', 'tvm_tuple', *tpl), inner)
return inner
return None
- ret = tvm.ir_pass.IRTransform(
+ ret = tvm.tir.ir_pass.IRTransform(
stmt_in, _do_fold, None, ["AttrStmt"])
return ret
env = get_env()
def _do_fold(stmt):
if _match_pragma(stmt, "alu"):
- irb = tvm.ir_builder.create()
+ irb = tvm.tir.ir_builder.create()
irb.scope_attr(env.dev.vta_axis, "coproc_scope",
env.dev.get_task_qid(env.dev.QID_COMPUTE))
irb.scope_attr(env.dev.vta_axis, "coproc_uop_scope",
return tvm.tir.Evaluate(0)
return stmt
- stmt_out = tvm.ir_pass.IRTransform(
+ stmt_out = tvm.tir.ir_pass.IRTransform(
stmt_in, None, _do_fold, ["AttrStmt"])
return stmt_out
Transformed statement
"""
env = get_env()
- idxm = tvm.indexmod
+ idxm = tvm.tir.indexmod
def _do_fold(stmt):
def _equal(x, y):
- return tvm.ir_pass.Equal(tvm.ir_pass.Simplify(x - y), 0)
+ return tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.Simplify(x - y), 0)
def _flatten_loop(src_coeff, dst_coeff, extents):
src_coeff = list(src_coeff)
next_ext = extents.pop()
if _equal(next_src, vsrc * vext) and _equal(next_dst, vdst * vext):
- vext = tvm.ir_pass.Simplify(vext * next_ext)
+ vext = tvm.tir.ir_pass.Simplify(vext * next_ext)
else:
rev_src_coeff.append(vsrc)
rev_dst_coeff.append(vdst)
if loop_body.value.name == 'shift_left':
alu_opcode = env.dev.ALU_OPCODE_SHR
lhs = loop_body.value.args[0]
- rhs = tvm.ir_pass.Simplify(-loop_body.value.args[1])
+ rhs = tvm.tir.ir_pass.Simplify(-loop_body.value.args[1])
elif loop_body.value.name == 'shift_right':
alu_opcode = env.dev.ALU_OPCODE_SHR
lhs = loop_body.value.args[0]
elif isinstance(loop_body.value, tvm.tir.Load):
alu_opcode = env.dev.ALU_OPCODE_SHR
lhs = loop_body.value
- rhs = tvm.const(0, "int32")
+ rhs = tvm.tir.const(0, "int32")
else:
raise RuntimeError(
"Expression not recognized %s, %s, %s" % (
lhs_equal = True
rhs_equal = True
for i, coef in enumerate(dst_coeff):
- if not tvm.ir_pass.Equal(coef, src_lhs_coeff[i]):
+ if not tvm.tir.ir_pass.Equal(coef, src_lhs_coeff[i]):
lhs_equal = False
- if not tvm.ir_pass.Equal(coef, src_rhs_coeff[i]):
+ if not tvm.tir.ir_pass.Equal(coef, src_rhs_coeff[i]):
rhs_equal = False
# Make sure at least one of the source is identical to the
# destination (in-place computation)
assert len(src_coeff) > 1
assert len(dst_coeff) > 1
assert len(extents) != 0
- assert tvm.ir_pass.Equal(
- tvm.ir_pass.Simplify(
+ assert tvm.tir.ir_pass.Equal(
+ tvm.tir.ir_pass.Simplify(
idxm(src_coeff[-1], env.BATCH * env.BLOCK_OUT)), 0)
- assert tvm.ir_pass.Equal(
- tvm.ir_pass.Simplify(
+ assert tvm.tir.ir_pass.Equal(
+ tvm.tir.ir_pass.Simplify(
idxm(dst_coeff[-1], env.BATCH * env.BLOCK_OUT)), 0)
- assert tvm.ir_pass.Equal(src_coeff[-2], 1)
- assert tvm.ir_pass.Equal(dst_coeff[-2], 1)
+ assert tvm.tir.ir_pass.Equal(src_coeff[-2], 1)
+ assert tvm.tir.ir_pass.Equal(dst_coeff[-2], 1)
if env.BATCH > 1:
assert len(src_coeff) > 2
assert len(dst_coeff) > 2
assert len(extents) > 1
- assert tvm.ir_pass.Equal(src_coeff[-3], env.BLOCK_OUT)
- assert tvm.ir_pass.Equal(dst_coeff[-3], env.BLOCK_OUT)
+ assert tvm.tir.ir_pass.Equal(src_coeff[-3], env.BLOCK_OUT)
+ assert tvm.tir.ir_pass.Equal(dst_coeff[-3], env.BLOCK_OUT)
# Apply tensorization of the loop coefficients
src_offset = src_coeff[-1]
src_coeff.append(src_offset)
dst_coeff.append(dst_offset)
src_coeff = [
- tvm.ir_pass.Simplify(c // (env.BATCH * env.BLOCK_OUT)) for c in src_coeff]
+ tvm.tir.ir_pass.Simplify(c // (env.BATCH * env.BLOCK_OUT)) for c in src_coeff]
dst_coeff = [
- tvm.ir_pass.Simplify(c // (env.BATCH * env.BLOCK_OUT)) for c in dst_coeff]
+ tvm.tir.ir_pass.Simplify(c // (env.BATCH * env.BLOCK_OUT)) for c in dst_coeff]
# Flatten the outer loops
if extents:
src_coeff, dst_coeff, extents = _flatten_loop(src_coeff, dst_coeff, extents)
# Insert ALU micro-ops
- irb = tvm.ir_builder.create()
+ irb = tvm.tir.ir_builder.create()
for idx, extent in enumerate(extents):
- irb.emit(tvm.call_extern(
+ irb.emit(tvm.tir.call_extern(
"int32", "VTAUopLoopBegin",
extent, dst_coeff[idx], src_coeff[idx], 0))
use_imm = int(use_imm)
- irb.emit(tvm.call_extern(
+ irb.emit(tvm.tir.call_extern(
"int32", "VTAUopPush",
1, 0,
dst_coeff[len(dst_coeff)-1],
0,
alu_opcode, use_imm, imm_val))
for extent in extents:
- irb.emit(tvm.call_extern(
+ irb.emit(tvm.tir.call_extern(
"int32", "VTAUopLoopEnd"))
return irb.get()
return stmt
- stmt_out = tvm.ir_pass.IRTransform(
+ stmt_out = tvm.tir.ir_pass.IRTransform(
stmt_in, None, _do_fold, ["AttrStmt"])
return stmt_out
self.inp_mem_size = 1 << cfg["LOG_INP_BUFF_SIZE"] # bytes
self.inp_mem_banks = (inp_mem_bus_width + \
max_bus_width - 1) // \
- max_bus_width
+ max_bus_width
self.inp_mem_width = min(inp_mem_bus_width, max_bus_width)
self.inp_mem_depth = self.inp_mem_size * 8 // inp_mem_bus_width
self.inp_mem_axi_ratio = self.inp_mem_width // mem_bus_width
self.wgt_mem_size = 1 << cfg["LOG_WGT_BUFF_SIZE"] # bytes
self.wgt_mem_banks = (wgt_mem_bus_width + \
max_bus_width - 1) // \
- max_bus_width
+ max_bus_width
self.wgt_mem_width = min(wgt_mem_bus_width, max_bus_width)
self.wgt_mem_depth = self.wgt_mem_size * 8 // wgt_mem_bus_width
self.wgt_mem_axi_ratio = self.wgt_mem_width // mem_bus_width
self.out_mem_size = 1 << cfg["LOG_OUT_BUFF_SIZE"] # bytes
self.out_mem_banks = (out_mem_bus_width + \
max_bus_width - 1) // \
- max_bus_width
+ max_bus_width
self.out_mem_width = min(out_mem_bus_width, max_bus_width)
self.out_mem_depth = self.out_mem_size * 8 // out_mem_bus_width
self.out_mem_axi_ratio = self.out_mem_width // mem_bus_width
self.macro_defs.append("-DVTA_STORE_ADDR=%s" % (self.store_base_addr))
# IP register offsets
self.macro_defs.append("-DVTA_FETCH_INSN_COUNT_OFFSET=%s" % \
- (self.fetch_insn_count_offset))
+ (self.fetch_insn_count_offset))
self.macro_defs.append("-DVTA_FETCH_INSN_ADDR_OFFSET=%s" % \
- (self.fetch_insn_addr_offset))
+ (self.fetch_insn_addr_offset))
self.macro_defs.append("-DVTA_LOAD_INP_ADDR_OFFSET=%s" % \
- (self.load_inp_addr_offset))
+ (self.load_inp_addr_offset))
self.macro_defs.append("-DVTA_LOAD_WGT_ADDR_OFFSET=%s" % \
- (self.load_wgt_addr_offset))
+ (self.load_wgt_addr_offset))
self.macro_defs.append("-DVTA_COMPUTE_DONE_WR_OFFSET=%s" % \
- (self.compute_done_wr_offet))
+ (self.compute_done_wr_offet))
self.macro_defs.append("-DVTA_COMPUTE_DONE_RD_OFFSET=%s" % \
- (self.compute_done_rd_offet))
+ (self.compute_done_rd_offet))
self.macro_defs.append("-DVTA_COMPUTE_UOP_ADDR_OFFSET=%s" % \
- (self.compute_uop_addr_offset))
+ (self.compute_uop_addr_offset))
self.macro_defs.append("-DVTA_COMPUTE_BIAS_ADDR_OFFSET=%s" % \
- (self.compute_bias_addr_offset))
+ (self.compute_bias_addr_offset))
self.macro_defs.append("-DVTA_STORE_OUT_ADDR_OFFSET=%s" % \
- (self.store_out_addr_offset))
+ (self.store_out_addr_offset))
# Coherency
if coherent:
self.macro_defs.append("-DVTA_COHERENT_ACCESSES=true")
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
from topi import util
from tvm.relay.op.op import register_compute, register_injective_schedule
def _bitpack(*indices):
ret = None
- mask = tvm.const((1 << bits) - 1, pack_type)
+ mask = tvm.tir.const((1 << bits) - 1, pack_type)
for k in range(lanes):
idx = list(indices)
idx[-1] = idx[-1] * lanes + k
if k == 0:
ret = elem & mask
else:
- val = (elem & mask) << tvm.const(k * bits, pack_type)
+ val = (elem & mask) << tvm.tir.const(k * bits, pack_type)
ret = ret | val
return ret
- return tvm.compute(
+ return te.compute(
oshape, _bitpack, name=name, tag='bitpack')
from __future__ import absolute_import as _abs
import tvm
+from tvm import te
import topi
from tvm.relay.op import op as reg
x = inputs[0]
a_min = attrs.a_min
a_max = attrs.a_max
- const_min = tvm.const(a_min, x.dtype)
- const_max = tvm.const(a_max, x.dtype)
- with tvm.tag_scope(topi.tag.ELEMWISE):
- x = tvm.compute(
- x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
- x = tvm.compute(
- x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
+ const_min = tvm.tir.const(a_min, x.dtype)
+ const_max = tvm.tir.const(a_max, x.dtype)
+ with tvm.te.tag_scope(topi.tag.ELEMWISE):
+ x = te.compute(
+ x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA")
+ x = te.compute(
+ x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB")
return [x]
def clip_strategy_vta(attrs, inputs, out_type, target):
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
import topi
ishape = topi.util.get_const_tuple(data.shape)
kshape = topi.util.get_const_tuple(kernel.shape)
- d_i = tvm.reduce_axis((0, kshape[2]), name='d_i')
- d_j = tvm.reduce_axis((0, kshape[3]), name='d_j')
- k_o = tvm.reduce_axis((0, ishape[1]), name='k_o')
- k_i = tvm.reduce_axis((0, ishape[-1]), name='k_i')
+ d_i = te.reduce_axis((0, kshape[2]), name='d_i')
+ d_j = te.reduce_axis((0, kshape[3]), name='d_j')
+ k_o = te.reduce_axis((0, ishape[1]), name='k_o')
+ k_i = te.reduce_axis((0, ishape[-1]), name='k_i')
hstride, wstride = strides
- res = tvm.compute(
+ res = te.compute(
oshape,
- lambda b_o, c_o, i, j, b_i, c_i: tvm.sum(
+ lambda b_o, c_o, i, j, b_i, c_i: te.sum(
pad_data[b_o, k_o, i*hstride+d_i, j*wstride+d_j, b_i, k_i].astype(out_dtype) *
kernel[c_o, k_o, d_i, d_j, c_i, k_i].astype(out_dtype),
axis=[k_o, d_i, d_j, k_i]),
else:
ewise_ops.append(op)
for tensor in op.input_tensors:
- if isinstance(tensor.op, tvm.tensor.PlaceholderOp):
+ if isinstance(tensor.op, tvm.te.PlaceholderOp):
ewise_inputs.append((op, tensor))
else:
_traverse(tensor.op)
_traverse(output.op)
assert len(conv2d_res) == 1
conv2d_stage = conv2d_res[0].output(0)
- s = tvm.create_schedule(output.op)
+ s = te.create_schedule(output.op)
##### space definition begin #####
b, c_o, x_i, x_j, _, _ = s[conv2d_stage].op.axis
###### space definition end ######
data, kernel = conv2d_stage.op.input_tensors
- if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
+ if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag:
temp = data.op.input_tensors[0]
pad_data = data
data = temp
if cfg['oc_nthread'].val > 1:
_, v_t = s[output].split(x_co0, factor=cfg['oc_nthread'].val)
s[output].reorder(v_t, x_bo)
- s[output].bind(v_t, tvm.thread_axis("cthread"))
+ s[output].bind(v_t, te.thread_axis("cthread"))
# virtual threading along spatial rows
if cfg['h_nthread'].val > 1:
_, v_t = s[output].split(x_i0, factor=cfg['h_nthread'].val)
s[output].reorder(v_t, x_bo)
- s[output].bind(v_t, tvm.thread_axis("cthread"))
+ s[output].bind(v_t, te.thread_axis("cthread"))
x_bo, x_co, x_i, x_j, x_bi, x_ci = s[conv2d_stage].op.axis
k_o, d_i, d_j, k_i = s[conv2d_stage].op.reduce_axis
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
import topi
from topi.util import get_const_tuple
out_h = (i_h - 1) * stride_h - fpad_top - fpad_bottom + k_h
out_w = (i_w - 1) * stride_w - fpad_left - fpad_right + k_w
oshape = (b, c_o, out_h, out_w, t_b, t_co)
- d_c = tvm.reduce_axis((0, c_i), name='d_c')
- d_h = tvm.reduce_axis((0, k_h), name='d_h')
- d_w = tvm.reduce_axis((0, k_w), name='d_w')
- d_ci = tvm.reduce_axis((0, t_ci), name='d_ci')
+ d_c = te.reduce_axis((0, c_i), name='d_c')
+ d_h = te.reduce_axis((0, k_h), name='d_h')
+ d_w = te.reduce_axis((0, k_w), name='d_w')
+ d_ci = te.reduce_axis((0, t_ci), name='d_ci')
- out = tvm.compute(
+ out = te.compute(
oshape,
- lambda i_n, i_c, i_h, i_w, j_n, j_c: tvm.sum(
+ lambda i_n, i_c, i_h, i_w, j_n, j_c: te.sum(
data_pad(i_n, d_c, i_h + d_h, i_w + d_w, j_n, d_ci).astype(out_dtype) *
kernel[i_c, d_c, d_h, d_w, j_c, d_ci].astype(out_dtype),
axis=[d_c, d_h, d_w, d_ci]),
if not op.same_as(output.op):
ewise_ops.append(op)
for tensor in op.input_tensors:
- if isinstance(tensor.op, tvm.tensor.PlaceholderOp):
+ if isinstance(tensor.op, tvm.te.PlaceholderOp):
ewise_inputs.append((op, tensor))
else:
_traverse(tensor.op)
_traverse(output.op)
assert len(conv2d_res) == 1
conv2d_stage = conv2d_res[0].output(0)
- s = tvm.create_schedule(output.op)
+ s = te.create_schedule(output.op)
##### space definition begin #####
b, c_o, x_i, x_j, _, c_i = s[conv2d_stage].op.axis
###### space definition end ######
data, kernel = conv2d_stage.op.input_tensors
- if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
+ if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag:
temp = data.op.input_tensors[0]
pad_data = data
data = temp
if cfg['oc_nthread'].val > 1:
_, v_t = s[output].split(x_co0, factor=cfg['oc_nthread'].val)
s[output].reorder(v_t, x_bo)
- s[output].bind(v_t, tvm.thread_axis("cthread"))
+ s[output].bind(v_t, te.thread_axis("cthread"))
# virtual threading along spatial rows
if cfg['h_nthread'].val > 1:
_, v_t = s[output].split(x_i0, factor=cfg['h_nthread'].val)
s[output].reorder(v_t, x_bo)
- s[output].bind(v_t, tvm.thread_axis("cthread"))
+ s[output].bind(v_t, te.thread_axis("cthread"))
x_bo, x_co, x_i, x_j, x_bi, x_ci = s[conv2d_stage].op.axis
k_o, d_i, d_j, k_i = s[conv2d_stage].op.reduce_axis
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
import topi
# Reduction axes (input channel)
assert ishape[1] == wshape[1]
assert ishape[3] == wshape[3]
- k_o = tvm.reduce_axis((0, ishape[1]), name='k_o')
- k_i = tvm.reduce_axis((0, ishape[3]), name='k_i')
- res = tvm.compute(
+ k_o = te.reduce_axis((0, ishape[1]), name='k_o')
+ k_i = te.reduce_axis((0, ishape[3]), name='k_i')
+ res = te.compute(
oshape,
- lambda b_o, c_o, b_i, c_i: tvm.sum(
+ lambda b_o, c_o, b_i, c_i: te.sum(
data[b_o, k_o, b_i, k_i].astype(out_dtype) *
weight[c_o, k_o, c_i, k_i].astype(out_dtype),
axis=[k_o, k_i]),
else:
ewise_ops.append(op)
for tensor in op.input_tensors:
- if isinstance(tensor.op, tvm.tensor.PlaceholderOp):
+ if isinstance(tensor.op, tvm.te.PlaceholderOp):
ewise_inputs.append((op, tensor))
else:
_traverse(tensor.op)
_traverse(output.op)
assert len(dense_res) == 1
dense_stage = dense_res[0].output(0)
- s = tvm.create_schedule(output.op)
+ s = te.create_schedule(output.op)
##### space definition begin #####
b, c_o, _, _ = s[dense_stage].op.axis
if cfg['oc_nthread'].val > 1:
_, v_t = s[output].split(x_co, factor=cfg['oc_nthread'].val)
s[output].reorder(v_t, x_bo)
- s[output].bind(v_t, tvm.thread_axis("cthread"))
+ s[output].bind(v_t, te.thread_axis("cthread"))
x_bo, x_co, x_bi, _ = s[dense_stage].op.axis
k_o, _ = s[dense_stage].op.reduce_axis
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
import topi
kshape = topi.util.get_const_tuple(kernel.shape)
assert group * kshape[1] == ishape[1]
assert kshape[0] % group == 0
- d_i = tvm.reduce_axis((0, kshape[2]), name='d_i')
- d_j = tvm.reduce_axis((0, kshape[3]), name='d_j')
- k_o = tvm.reduce_axis((0, kshape[1]), name='k_o')
- k_i = tvm.reduce_axis((0, kshape[-1]), name='k_i')
+ d_i = te.reduce_axis((0, kshape[2]), name='d_i')
+ d_j = te.reduce_axis((0, kshape[3]), name='d_j')
+ k_o = te.reduce_axis((0, kshape[1]), name='k_o')
+ k_i = te.reduce_axis((0, kshape[-1]), name='k_i')
hstride, wstride = strides
- out = tvm.compute(
+ out = te.compute(
oshape,
- lambda b_o, c_o, i, j, b_i, c_i: tvm.sum(
+ lambda b_o, c_o, i, j, b_i, c_i: te.sum(
pad_data[b_o, c_o // (kshape[0] // group) * kshape[1] + k_o, i * hstride + d_i,
j * wstride + d_j, b_i, k_i].astype(out_dtype) *
kernel[c_o, k_o, d_i, d_j, c_i, k_i].astype(out_dtype),
else:
ewise_ops.append(op)
for tensor in op.input_tensors:
- if isinstance(tensor.op, tvm.tensor.PlaceholderOp):
+ if isinstance(tensor.op, tvm.te.PlaceholderOp):
ewise_inputs.append((op, tensor))
else:
_traverse(tensor.op)
_traverse(output.op)
assert len(conv2d_res) == 1
conv2d_stage = conv2d_res[0].output(0)
- s = tvm.create_schedule(output.op)
+ s = te.create_schedule(output.op)
##### space definition begin #####
b, c_o, x_i, x_j, _, _ = s[conv2d_stage].op.axis
###### space definition end ######
data, kernel = conv2d_stage.op.input_tensors
- if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
+ if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag:
temp = data.op.input_tensors[0]
pad_data = data
data = temp
if cfg['oc_nthread'].val > 1:
_, v_t = s[output].split(x_co0, factor=cfg['oc_nthread'].val)
s[output].reorder(v_t, x_bo)
- s[output].bind(v_t, tvm.thread_axis("cthread"))
+ s[output].bind(v_t, te.thread_axis("cthread"))
# virtual threading along spatial rows
if cfg['h_nthread'].val > 1:
_, v_t = s[output].split(x_i0, factor=cfg['h_nthread'].val)
s[output].reorder(v_t, x_bo)
- s[output].bind(v_t, tvm.thread_axis("cthread"))
+ s[output].bind(v_t, te.thread_axis("cthread"))
x_bo, x_co, x_i, x_j, x_bi, x_ci = s[conv2d_stage].op.axis
k_o, d_i, d_j, k_i = s[conv2d_stage].op.reduce_axis
import os
import tvm
+from tvm import te
from tvm import autotvm
import topi
import vta
('resnet-18.C11', Workload(env.BATCH, 7, 7, 512, 512, 3, 3, 1, 1, 1, 1)),
]
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
def my_clip(x, a_min, a_max):
"""Unlike topi's current clip, put min and max into two stages."""
- const_min = tvm.const(a_min, x.dtype)
- const_max = tvm.const(a_max, x.dtype)
- x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
- x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
+ const_min = tvm.tir.const(a_min, x.dtype)
+ const_max = tvm.tir.const(a_max, x.dtype)
+ x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA")
+ x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB")
return x
def conv2d(N, CI, H, W, CO, KH, KW, strides, padding, dilation):
kernel_shape = (CO//env.BLOCK_OUT, CI//env.BLOCK_IN, KH, KW, env.BLOCK_OUT, env.BLOCK_IN)
bias_shape = (N//env.BATCH, CO//env.BLOCK_OUT, 1, 1, env.BATCH, env.BLOCK_OUT)
- data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype)
- kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
- bias = tvm.placeholder(bias_shape, name="bias", dtype=env.acc_dtype)
+ data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype)
+ kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
+ bias = te.placeholder(bias_shape, name="bias", dtype=env.acc_dtype)
with tvm.target.vta():
res = topi.nn.conv2d(
if tvm.target.Target.current().device_name == 'vta':
s = topi.generic.schedule_conv2d_nchw([res])
else:
- s = tvm.create_schedule([res.op])
+ s = te.create_schedule([res.op])
return s, [data, kernel, bias, res]
import os
import tvm
+from tvm import te
from tvm import autotvm
import topi
import vta
('DCGAN.CT3', Workload(env.BATCH, 16, 16, 256, 128, 4, 4, 1, 1, 2, 2)),
]
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
def my_clip(x, a_min, a_max):
"""Unlike topi's current clip, put min and max into two stages."""
- const_min = tvm.const(a_min, x.dtype)
- const_max = tvm.const(a_max, x.dtype)
- x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
- x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
+ const_min = tvm.tir.const(a_min, x.dtype)
+ const_max = tvm.tir.const(a_max, x.dtype)
+ x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA")
+ x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB")
return x
def conv2d_transpose(N, CI, H, W, CO, KH, KW, strides, padding):
data_shape = (N//env.BATCH, CI//env.BLOCK_IN, H, W, env.BATCH, env.BLOCK_IN)
kernel_shape = (CO//env.BLOCK_OUT, CI//env.BLOCK_IN, KH, KW, env.BLOCK_OUT, env.BLOCK_IN)
- data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype)
- kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
+ data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype)
+ kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
with tvm.target.vta():
res = topi.nn.conv2d_transpose_nchw(
if tvm.target.Target.current().device_name == 'vta':
s = topi.generic.schedule_conv2d_transpose_nchw([res])
else:
- s = tvm.create_schedule([res.op])
+ s = te.create_schedule([res.op])
return s, [data, kernel, res]
import os
import tvm
+from tvm import te
from tvm import autotvm
import topi
import vta
('lstm.dense.4', Workload(4, 256, 128)),
]
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
def my_clip(x, a_min, a_max):
"""Unlike topi's current clip, put min and max into two stages."""
- const_min = tvm.const(a_min, x.dtype)
- const_max = tvm.const(a_max, x.dtype)
- x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
- x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
+ const_min = tvm.tir.const(a_min, x.dtype)
+ const_max = tvm.tir.const(a_max, x.dtype)
+ x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA")
+ x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB")
return x
def dense(N, CI, CO):
data_shape = (N//env.BATCH, CI//env.BLOCK_IN, env.BATCH, env.BLOCK_IN)
kernel_shape = (CO//env.BLOCK_OUT, CI//env.BLOCK_IN, env.BLOCK_OUT, env.BLOCK_IN)
- data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype)
- kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
+ data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype)
+ kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
with tvm.target.vta():
res = topi.nn.dense(data, kernel, None, 'int32')
if tvm.target.Target.current().device_name == 'vta':
s = topi.generic.schedule_dense([res])
else:
- s = tvm.create_schedule([res.op])
+ s = te.create_schedule([res.op])
return s, [data, kernel, res]
import os
import tvm
+from tvm import te
from tvm import autotvm
import topi
import vta
('mobilenet.D9', Workload(env.BATCH, 7, 7, 1024, 1024, 64, 3, 3, 1, 1, 1, 1)),
]
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
def my_clip(x, a_min, a_max):
"""Unlike topi's current clip, put min and max into two stages."""
- const_min = tvm.const(a_min, x.dtype)
- const_max = tvm.const(a_max, x.dtype)
- x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
- x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
+ const_min = tvm.tir.const(a_min, x.dtype)
+ const_max = tvm.tir.const(a_max, x.dtype)
+ x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA")
+ x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB")
return x
def group_conv2d(N, CI, H, W, CO, KH, KW, strides, padding, dilation, group):
kernel_shape = (CO//env.BLOCK_OUT, CI_G//env.BLOCK_IN, KH, KW, env.BLOCK_OUT, env.BLOCK_IN)
bias_shape = (N//env.BATCH, CO//env.BLOCK_OUT, 1, 1, env.BATCH, env.BLOCK_OUT)
- data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype)
- kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
- bias = tvm.placeholder(bias_shape, name="bias", dtype=env.acc_dtype)
+ data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype)
+ kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
+ bias = te.placeholder(bias_shape, name="bias", dtype=env.acc_dtype)
with tvm.target.vta():
res = topi.nn.group_conv2d_nchw(
if tvm.target.Target.current().device_name == 'vta':
s = topi.generic.schedule_group_conv2d_nchw([res])
else:
- s = tvm.create_schedule([res.op])
+ s = te.create_schedule([res.op])
return s, [data, kernel, bias, res]
import topi
import tvm
+from tvm import te
from tvm import rpc, autotvm, relay
from tvm.autotvm.measure.measure_methods import request_remote
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
def register_vta_tuning_tasks():
from tvm.autotvm.task.topi_integration import TaskExtractEnv, deserialize_args
- @tvm.tag_scope(tag=topi.tag.ELEMWISE)
+ @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
def my_clip(x, a_min, a_max):
"""Unlike topi's current clip, put min and max into two stages."""
- const_min = tvm.const(a_min, x.dtype)
- const_max = tvm.const(a_max, x.dtype)
- x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
- x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
+ const_min = tvm.tir.const(a_min, x.dtype)
+ const_max = tvm.tir.const(a_max, x.dtype)
+ x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA")
+ x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB")
return x
# init autotvm env to register VTA operator
if tvm.target.Target.current().device_name == 'vta':
s = topi.generic.schedule_conv2d_nchw([res])
else:
- s = tvm.create_schedule([res.op])
+ s = te.create_schedule([res.op])
return s, [A, W, res]
@autotvm.task.register("topi_nn_dense", override=True)
if tvm.target.Target.current().device_name == 'vta':
s = topi.generic.schedule_dense([res])
else:
- s = tvm.create_schedule([res.op])
+ s = te.create_schedule([res.op])
return s, [A, W, res]
# specific language governing permissions and limitations
# under the License.
import tvm
+from tvm import te
import numpy as np
from tvm.contrib import util
import vta.testing
# To compute number of ops, use a x2 factor for FMA
num_ops = 2 * channel * channel * batch_size
- ko = tvm.reduce_axis((0, channel // env.BLOCK_IN), name='ko')
- ki = tvm.reduce_axis((0, env.BLOCK_IN), name='ki')
+ ko = te.reduce_axis((0, channel // env.BLOCK_IN), name='ko')
+ ki = te.reduce_axis((0, env.BLOCK_IN), name='ki')
- data = tvm.placeholder(data_shape,
+ data = te.placeholder(data_shape,
name="data",
dtype=env.inp_dtype)
- weight = tvm.placeholder(weight_shape,
+ weight = te.placeholder(weight_shape,
name="weight",
dtype=env.wgt_dtype)
- data_buf = tvm.compute(data_shape,
+ data_buf = te.compute(data_shape,
lambda *i: data(*i),
"data_buf")
- weight_buf = tvm.compute(weight_shape,
+ weight_buf = te.compute(weight_shape,
lambda *i: weight(*i),
"weight_buf")
- res_gem = tvm.compute(res_shape,
- lambda bo, co, bi, ci: tvm.sum(
+ res_gem = te.compute(res_shape,
+ lambda bo, co, bi, ci: te.sum(
data_buf[bo, ko, bi, ki].astype(env.acc_dtype) *
weight_buf[co, ko, ci, ki].astype(env.acc_dtype),
axis=[ko, ki]),
name="res_gem")
- res_shf = tvm.compute(res_shape,
+ res_shf = te.compute(res_shape,
lambda *i: res_gem(*i)>>8,
name="res_shf")
- res_max = tvm.compute(res_shape,
- lambda *i: tvm.max(res_shf(*i), 0),
+ res_max = te.compute(res_shape,
+ lambda *i: tvm.te.max(res_shf(*i), 0),
"res_max") #relu
- res_min = tvm.compute(res_shape,
- lambda *i: tvm.min(res_max(*i), (1<<(env.INP_WIDTH-1))-1),
+ res_min = te.compute(res_shape,
+ lambda *i: tvm.te.min(res_max(*i), (1<<(env.INP_WIDTH-1))-1),
"res_min") #relu
- res = tvm.compute(res_shape,
+ res = te.compute(res_shape,
lambda *i: res_min(*i).astype(env.inp_dtype),
name="res")
store_out,
print_ir,
check_correctness):
- s = tvm.create_schedule(res.op)
+ s = te.create_schedule(res.op)
s[data_buf].set_scope(env.inp_scope)
s[weight_buf].set_scope(env.wgt_scope)
s[res_gem].set_scope(env.acc_scope)
from collections import namedtuple
import tvm
+from tvm import te
from tvm import relay
from tvm import autotvm
from tvm.contrib import util
]
# FIXME: we need a custom clip operator to circumvent a pattern detection limitation
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
def my_clip(x, a_min, a_max):
"""Unlike topi's current clip, put min and max into two stages."""
- const_min = tvm.const(a_min, x.dtype)
- const_max = tvm.const(a_max, x.dtype)
- x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
- x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
+ const_min = tvm.tir.const(a_min, x.dtype)
+ const_max = tvm.tir.const(a_max, x.dtype)
+ x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA")
+ x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB")
return x
def run_conv2d(env, remote, wl, target,
data_shape = a_shape
kernel_shape = w_shape
bias_shape = b_shape
- data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype)
- kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
- bias = tvm.placeholder(bias_shape, name="bias", dtype=env.acc_dtype)
+ data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype)
+ kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
+ bias = te.placeholder(bias_shape, name="bias", dtype=env.acc_dtype)
padding = relay.nn.get_pad_tuple2d((wl.hpad, wl.wpad))
# Define base computation schedule
from collections import namedtuple
import tvm
+from tvm import te
from tvm import relay
from tvm import autotvm
from tvm.contrib import util
]
# FIXME: we need a custom clip operator to circumvent a pattern detection limitation
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
def my_clip(x, a_min, a_max):
"""Unlike topi's current clip, put min and max into two stages."""
- const_min = tvm.const(a_min, x.dtype)
- const_max = tvm.const(a_max, x.dtype)
- x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
- x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
+ const_min = tvm.tir.const(a_min, x.dtype)
+ const_max = tvm.tir.const(a_max, x.dtype)
+ x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA")
+ x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB")
return x
# Helper function to get factors
else:
data_shape = a_shape
kernel_shape = w_shape
- data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype)
- kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
+ data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype)
+ kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
padding = relay.nn.get_pad_tuple2d((wl.hpad, wl.wpad))
# Define base computation schedule
import numpy as np
import tvm
+from tvm import te
from tvm import autotvm
from tvm.contrib import util
from tvm.contrib.pickle_memoize import memoize
from vta.testing import simulator
# FIXME: we need a custom clip operator to circumvent a pattern detection limitation
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
def my_clip(x, a_min, a_max):
"""Unlike topi's current clip, put min and max into two stages."""
- const_min = tvm.const(a_min, x.dtype)
- const_max = tvm.const(a_max, x.dtype)
- x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
- x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
+ const_min = tvm.tir.const(a_min, x.dtype)
+ const_max = tvm.tir.const(a_max, x.dtype)
+ x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA")
+ x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB")
return x
def run_gemm(env, remote, target,
kernel_shape = w_shape
fcompute = topi.x86.dense_nopack
fschedule = topi.x86.schedule_dense_nopack
- data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype)
- kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
+ data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype)
+ kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
# Define base computation schedule
with target:
from collections import namedtuple
import tvm
+from tvm import te
from tvm import relay
from tvm import autotvm
from tvm.contrib import util
]
# FIXME: we need a custom clip operator to circumvent a pattern detection limitation
-@tvm.tag_scope(tag=topi.tag.ELEMWISE)
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
def my_clip(x, a_min, a_max):
"""Unlike topi's current clip, put min and max into two stages."""
- const_min = tvm.const(a_min, x.dtype)
- const_max = tvm.const(a_max, x.dtype)
- x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
- x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
+ const_min = tvm.tir.const(a_min, x.dtype)
+ const_max = tvm.tir.const(a_max, x.dtype)
+ x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA")
+ x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB")
return x
def run_group_conv2d(env, remote, wl, target,
data_shape = a_shape
kernel_shape = w_shape
bias_shape = b_shape
- data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype)
- kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
- bias = tvm.placeholder(bias_shape, name="bias", dtype=env.acc_dtype)
+ data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype)
+ kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
+ bias = te.placeholder(bias_shape, name="bias", dtype=env.acc_dtype)
padding = relay.nn.get_pad_tuple2d((wl.hpad, wl.wpad))
# Define base computation schedule
# under the License.
import os
import tvm
+from tvm import te
from tvm import rpc
from vta import get_bitstream_path, download_bitstream, program_fpga, reconfig_runtime
# under the License.
"""Unit test VTA's instructions """
import tvm
+from tvm import te
import numpy as np
import topi
from tvm.contrib import util
"""Test save/store output command"""
def _run(env, remote):
n = 6
- x = tvm.placeholder(
+ x = te.placeholder(
(n, n, env.BATCH, env.BLOCK_OUT),
name="x",
dtype=env.acc_dtype)
- x_buf = tvm.compute(
+ x_buf = te.compute(
(n, n, env.BATCH, env.BLOCK_OUT),
lambda *i: x(*i), "x_buf")
# insert no-op that won't be optimized away
- y_buf = tvm.compute(
+ y_buf = te.compute(
(n, n, env.BATCH, env.BLOCK_OUT),
lambda *i: x_buf(*i)>>0, "y_buf")
- y = tvm.compute(
+ y = te.compute(
(n, n, env.BATCH, env.BLOCK_OUT),
lambda *i: y_buf(*i).astype(env.inp_dtype), "y")
# schedule
- s = tvm.create_schedule(y.op)
+ s = te.create_schedule(y.op)
s[x_buf].set_scope(env.acc_scope)
s[x_buf].pragma(x_buf.op.axis[0], env.dma_copy)
s[y_buf].set_scope(env.acc_scope)
# declare
n = 3
m = 5
- x = tvm.placeholder(
+ x = te.placeholder(
(n, m, env.BATCH, env.BLOCK_OUT),
name="x",
dtype=env.acc_dtype)
x_buf = topi.nn.pad(x, pad_before, pad_after, name="y")
# insert no-op that won't be optimized away
- y_buf = tvm.compute((n + pad_before[0] + pad_after[0],
+ y_buf = te.compute((n + pad_before[0] + pad_after[0],
m + pad_before[1] + pad_after[1],
env.BATCH,
env.BLOCK_OUT), lambda *i: x_buf(*i)>>0, "y_buf")
- y = tvm.compute((n + pad_before[0] + pad_after[0],
+ y = te.compute((n + pad_before[0] + pad_after[0],
m + pad_before[1] + pad_after[1],
env.BATCH,
env.BLOCK_OUT), lambda *i: y_buf(*i).astype(env.inp_dtype), "y")
# schedule
- s = tvm.create_schedule(y.op)
+ s = te.create_schedule(y.op)
s[x_buf].set_scope(env.acc_scope)
s[x_buf].pragma(x_buf.op.axis[0], env.dma_copy)
s[y_buf].set_scope(env.acc_scope)
o = 4
n = 1
m = 4
- x = tvm.placeholder((o, n, env.BATCH, env.BLOCK_IN), name="x", dtype=env.inp_dtype)
- w = tvm.placeholder((m, n, env.BLOCK_OUT, env.BLOCK_IN), name="w", dtype=env.wgt_dtype)
- x_buf = tvm.compute((o, n, env.BATCH, env.BLOCK_IN), lambda *i: x(*i), "x_buf")
- w_buf = tvm.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: w(*i), "w_buf")
- ko = tvm.reduce_axis((0, n), name="ko")
- ki = tvm.reduce_axis((0, env.BLOCK_IN), name="ki")
- y_gem = tvm.compute(
+ x = te.placeholder((o, n, env.BATCH, env.BLOCK_IN), name="x", dtype=env.inp_dtype)
+ w = te.placeholder((m, n, env.BLOCK_OUT, env.BLOCK_IN), name="w", dtype=env.wgt_dtype)
+ x_buf = te.compute((o, n, env.BATCH, env.BLOCK_IN), lambda *i: x(*i), "x_buf")
+ w_buf = te.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: w(*i), "w_buf")
+ ko = te.reduce_axis((0, n), name="ko")
+ ki = te.reduce_axis((0, env.BLOCK_IN), name="ki")
+ y_gem = te.compute(
(o, m, env.BATCH, env.BLOCK_OUT),
lambda bo, co, bi, ci:
- tvm.sum(x_buf[bo, ko, bi, ki].astype(env.acc_dtype) *
+ te.sum(x_buf[bo, ko, bi, ki].astype(env.acc_dtype) *
w_buf[co, ko, ci, ki].astype(env.acc_dtype),
axis=[ko, ki]),
name="y_gem")
- y_shf = tvm.compute(
+ y_shf = te.compute(
(o, m, env.BATCH, env.BLOCK_OUT),
lambda *i: y_gem(*i)>>8,
name="y_shf")
- y_max = tvm.compute(
+ y_max = te.compute(
(o, m, env.BATCH, env.BLOCK_OUT),
- lambda *i: tvm.max(y_shf(*i), 0),
+ lambda *i: tvm.te.max(y_shf(*i), 0),
"y_max") #relu
- y_min = tvm.compute(
+ y_min = te.compute(
(o, m, env.BATCH, env.BLOCK_OUT),
- lambda *i: tvm.min(y_max(*i), (1<<(env.INP_WIDTH-1))-1),
+ lambda *i: tvm.te.min(y_max(*i), (1<<(env.INP_WIDTH-1))-1),
"y_min") #relu
- y = tvm.compute(
+ y = te.compute(
(o, m, env.BATCH, env.BLOCK_OUT),
lambda *i: y_min(*i).astype(env.inp_dtype),
name="y")
def test_schedule1():
# default schedule with no smt
- s = tvm.create_schedule(y.op)
+ s = te.create_schedule(y.op)
# set the scope of the SRAM buffers
s[x_buf].set_scope(env.inp_scope)
s[w_buf].set_scope(env.wgt_scope)
def test_smt():
# test smt schedule
- s = tvm.create_schedule(y.op)
+ s = te.create_schedule(y.op)
s[x_buf].set_scope(env.inp_scope)
s[w_buf].set_scope(env.wgt_scope)
s[y_gem].set_scope(env.acc_scope)
s[y_min].set_scope(env.acc_scope)
abo, aco, abi, aci = s[y].op.axis
abo1, abo2 = s[y].split(abo, nparts=2)
- s[y].bind(abo1, tvm.thread_axis("cthread"))
+ s[y].bind(abo1, te.thread_axis("cthread"))
s[y_gem].compute_at(s[y], abo1)
s[y_shf].compute_at(s[y], abo1)
s[y_max].compute_at(s[y], abo1)
n = 8
imm = np.random.randint(1,5)
# compute
- a = tvm.placeholder(
+ a = te.placeholder(
(m, n, env.BATCH, env.BLOCK_OUT),
name="a",
dtype=env.acc_dtype)
- a_buf = tvm.compute(
+ a_buf = te.compute(
(m, n, env.BATCH, env.BLOCK_OUT),
lambda *i: a(*i),
"a_buf") #DRAM->SRAM
if use_imm:
- res_buf = tvm.compute(
+ res_buf = te.compute(
(m, n, env.BATCH, env.BLOCK_OUT),
lambda *i: tvm_op(a_buf(*i), imm),
"res_buf") #compute
else:
- b = tvm.placeholder(
+ b = te.placeholder(
(m, n, env.BATCH, env.BLOCK_OUT),
name="b",
dtype=env.acc_dtype)
- b_buf = tvm.compute(
+ b_buf = te.compute(
(m, n, env.BATCH, env.BLOCK_OUT),
lambda *i: b(*i),
"b_buf") #DRAM->SRAM
- res_buf = tvm.compute(
+ res_buf = te.compute(
(m, n, env.BATCH, env.BLOCK_OUT),
lambda *i: tvm_op(a_buf(*i), b_buf(*i)),
"res_buf") #compute5B
- res = tvm.compute(
+ res = te.compute(
(m, n, env.BATCH, env.BLOCK_OUT),
lambda *i: res_buf(*i).astype(env.inp_dtype),
"res") #SRAM->DRAM
# schedule
- s = tvm.create_schedule(res.op)
+ s = te.create_schedule(res.op)
s[a_buf].set_scope(env.acc_scope) # SRAM
s[a_buf].pragma(a_buf.op.axis[0], env.dma_copy) # DRAM->SRAM
s[res_buf].set_scope(env.acc_scope) # SRAM
print("\t{:<16}: {:>16}".format(k, v))
check_alu(lambda x, y: x << y, np.left_shift, use_imm=True, test_name="SHL")
- check_alu(tvm.max, np.maximum, use_imm=True, test_name="MAX")
- check_alu(tvm.max, np.maximum, test_name="MAX")
+ check_alu(tvm.te.max, np.maximum, use_imm=True, test_name="MAX")
+ check_alu(tvm.te.max, np.maximum, test_name="MAX")
check_alu(lambda x, y: x + y, use_imm=True, test_name="ADD")
check_alu(lambda x, y: x + y, test_name="ADD")
check_alu(lambda x, y: x >> y, np.right_shift, use_imm=True, test_name="SHR")
m = 8
n = 10
# compute
- a = tvm.placeholder(
+ a = te.placeholder(
(m, n, env.BATCH, env.BLOCK_OUT),
name="a",
dtype=env.acc_dtype)
- a_buf = tvm.compute(
+ a_buf = te.compute(
(m, n, env.BATCH, env.BLOCK_OUT),
lambda *i: a(*i),
"a_buf") # DRAM->SRAM
- max_buf = tvm.compute(
+ max_buf = te.compute(
(m, n, env.BATCH, env.BLOCK_OUT),
- lambda *i: tvm.max(a_buf(*i), 0),
+ lambda *i: tvm.te.max(a_buf(*i), 0),
"res_buf") # relu
- min_buf = tvm.compute(
+ min_buf = te.compute(
(m, n, env.BATCH, env.BLOCK_OUT),
- lambda *i: tvm.min(max_buf(*i), (1<<(env.INP_WIDTH-1))-1),
+ lambda *i: tvm.te.min(max_buf(*i), (1<<(env.INP_WIDTH-1))-1),
"max_buf") # relu
- res = tvm.compute(
+ res = te.compute(
(m, n, env.BATCH, env.BLOCK_OUT),
lambda *i: min_buf(*i).astype(env.inp_dtype),
"min_buf") # SRAM->DRAM
# schedule
- s = tvm.create_schedule(res.op)
+ s = te.create_schedule(res.op)
s[a_buf].set_scope(env.acc_scope) # SRAM
s[a_buf].pragma(a_buf.op.axis[0], env.dma_copy) # DRAM->SRAM
s[max_buf].set_scope(env.acc_scope) # SRAM
imm_shift = np.random.randint(0,8)
imm_scale = np.random.randint(1,5)
# compute
- a = tvm.placeholder(
+ a = te.placeholder(
(m, n, env.BATCH, env.BLOCK_OUT),
name="a", dtype=env.acc_dtype)
- a_buf = tvm.compute(
+ a_buf = te.compute(
(m, n, env.BATCH, env.BLOCK_OUT),
lambda *i: a(*i),
"a_buf") # DRAM->SRAM
- res_shift = tvm.compute(
+ res_shift = te.compute(
(m, n, env.BATCH, env.BLOCK_OUT),
lambda *i: a_buf(*i)+imm_shift,
"res_shift") # compute
- res_scale = tvm.compute(
+ res_scale = te.compute(
(m, n, env.BATCH, env.BLOCK_OUT),
lambda *i: res_shift(*i)>>imm_scale,
"res_scale") # compute
- res = tvm.compute(
+ res = te.compute(
(m, n, env.BATCH, env.BLOCK_OUT),
lambda *i: res_scale(*i).astype(env.inp_dtype),
"res") # SRAM->DRAM
# schedule
- s = tvm.create_schedule(res.op)
+ s = te.create_schedule(res.op)
s[a_buf].set_scope(env.acc_scope) # SRAM
s[res_shift].set_scope(env.acc_scope) # SRAM
s[res_scale].set_scope(env.acc_scope) # SRAM
import topi
import tvm
+from tvm import te
from tvm import rpc, autotvm, relay
from tvm.contrib import graph_runtime, util, download
from tvm.autotvm.measure.measure_methods import request_remote
def register_vta_tuning_tasks():
from tvm.autotvm.task import TaskExtractEnv
- @tvm.tag_scope(tag=topi.tag.ELEMWISE)
+ @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
def my_clip(x, a_min, a_max):
"""Unlike topi's current clip, put min and max into two stages."""
- const_min = tvm.const(a_min, x.dtype)
- const_max = tvm.const(a_max, x.dtype)
- x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
- x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
+ const_min = tvm.tir.const(a_min, x.dtype)
+ const_max = tvm.tir.const(a_max, x.dtype)
+ x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA")
+ x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB")
return x
# init autotvm env to register VTA operator
if tvm.target.Target.current().device_name == 'vta':
s = vta.top.schedule_conv2d_packed([res])
else:
- s = tvm.create_schedule([res.op])
+ s = te.create_schedule([res.op])
return s, [A, W, res]
from matplotlib import pyplot as plt
import tvm
+from tvm import te
from tvm import rpc, autotvm, relay
from tvm.contrib import graph_runtime, util, download
from tvm.contrib.debugger import debug_runtime
import os
import tvm
+from tvm import te
import vta
import numpy as np
from tvm import rpc
# Batch factor o (we use single batch inference)
o = 1
# A placeholder tensor in tiled data format
-A = tvm.placeholder((o, n, env.BATCH, env.BLOCK_IN), name="A", dtype=env.inp_dtype)
+A = te.placeholder((o, n, env.BATCH, env.BLOCK_IN), name="A", dtype=env.inp_dtype)
# B placeholder tensor in tiled data format
-B = tvm.placeholder((m, n, env.BLOCK_OUT, env.BLOCK_IN), name="B", dtype=env.wgt_dtype)
+B = te.placeholder((m, n, env.BLOCK_OUT, env.BLOCK_IN), name="B", dtype=env.wgt_dtype)
# A copy buffer
-A_buf = tvm.compute((o, n, env.BATCH, env.BLOCK_IN), lambda *i: A(*i), "A_buf")
+A_buf = te.compute((o, n, env.BATCH, env.BLOCK_IN), lambda *i: A(*i), "A_buf")
# B copy buffer
-B_buf = tvm.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: B(*i), "B_buf")
+B_buf = te.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: B(*i), "B_buf")
######################################################################
# Matrix Multiplication
# In order to implement matrix multiplication, the lambda function needs to
# include a reduction formula over the input channel dimension axes.
# To create a reduction formula, we can declare a reduction axis using
-# :code:`tvm.reduce_axis`, which takes in the range of reductions.
-# :code:`tvm.sum` takes in the expression to be reduced as well as
+# :code:`te.reduce_axis`, which takes in the range of reductions.
+# :code:`te.sum` takes in the expression to be reduced as well as
# the reduction axes to compute the sum of value over all k in the declared
# ranges.
#
# the computation should be done.
# Outer input feature reduction axis
-ko = tvm.reduce_axis((0, n), name="ko")
+ko = te.reduce_axis((0, n), name="ko")
# Inner input feature reduction axis
-ki = tvm.reduce_axis((0, env.BLOCK_IN), name="ki")
+ki = te.reduce_axis((0, env.BLOCK_IN), name="ki")
# Describe the in-VTA matrix multiplication
-C_buf = tvm.compute(
+C_buf = te.compute(
(o, m, env.BATCH, env.BLOCK_OUT),
lambda bo, co, bi, ci:
- tvm.sum(A_buf[bo, ko, bi, ki].astype(env.acc_dtype) *
+ te.sum(A_buf[bo, ko, bi, ki].astype(env.acc_dtype) *
B_buf[co, ko, ci, ki].astype(env.acc_dtype),
axis=[ko, ki]),
name="C_buf")
# input activation data format.
# Cast to output type, and send to main memory
-C = tvm.compute(
+C = te.compute(
(o, m, env.BATCH, env.BLOCK_OUT),
lambda *i: C_buf(*i).astype(env.inp_dtype),
name="C")
# :code:`C` in the following way:
# Let's take a look at the generated schedule
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
print(tvm.lower(s, [A, B, C], simple_mode=True))
######################################################################
import os
import tvm
+from tvm import te
import vta
import numpy as np
env.BLOCK_OUT)
# Convolution reduction axes
-dy = tvm.reduce_axis((0, kernel_h), name='dy')
-dx = tvm.reduce_axis((0, kernel_w), name='dx')
-ic = tvm.reduce_axis((0, in_channels // env.BLOCK_IN), name='ic')
-ic_tns = tvm.reduce_axis((0, env.BLOCK_IN), name='ic_tns')
+dy = te.reduce_axis((0, kernel_h), name='dy')
+dx = te.reduce_axis((0, kernel_w), name='dx')
+ic = te.reduce_axis((0, in_channels // env.BLOCK_IN), name='ic')
+ic_tns = te.reduce_axis((0, env.BLOCK_IN), name='ic_tns')
# Input placeholder tensors
-data = tvm.placeholder(data_shape,
+data = te.placeholder(data_shape,
name="data",
dtype=env.inp_dtype)
-kernel = tvm.placeholder(kernel_shape,
+kernel = te.placeholder(kernel_shape,
name="kernel",
dtype=env.wgt_dtype)
data_buf = topi.nn.pad(data,
[0, 0, pad_h, pad_w, 0, 0],
name="data_buf")
-kernel_buf = tvm.compute(kernel_shape, lambda *i: kernel(*i), "kernel_buf")
+kernel_buf = te.compute(kernel_shape, lambda *i: kernel(*i), "kernel_buf")
# Declare 2D convolution
-res_conv = tvm.compute(
+res_conv = te.compute(
output_shape,
- lambda bo, co, i, j, bi, ci: tvm.sum(
+ lambda bo, co, i, j, bi, ci: te.sum(
data_buf[bo, ic, i*stride_h+dy, j*stride_w+dx, bi, ic_tns].astype(env.acc_dtype) *
kernel_buf[co, ic, dy, dx, ci, ic_tns].astype(env.acc_dtype),
axis=[ic, dy, dx, ic_tns]),
name="res_conv")
# Add shift stage for fix-point normalization
-res_shr = tvm.compute(output_shape,
+res_shr = te.compute(output_shape,
lambda *i: res_conv(*i) >> 8,
name="res_shr")
# Apply clipping between (0, input max value)
inp_max = (1 << (env.INP_WIDTH - 1)) - 1
-res_max = tvm.compute(output_shape,
- lambda *i: tvm.max(res_shr(*i), 0),
+res_max = te.compute(output_shape,
+ lambda *i: tvm.te.max(res_shr(*i), 0),
"res_max")
-res_min = tvm.compute(output_shape,
- lambda *i: tvm.min(res_max(*i), inp_max),
+res_min = te.compute(output_shape,
+ lambda *i: tvm.te.min(res_max(*i), inp_max),
"res_min")
# Result Tensor
-res = tvm.compute(output_shape,
+res = te.compute(output_shape,
lambda *i: res_min(*i).astype(env.inp_dtype),
name="res")
# - Lowering to VTA hardware intrinsics
# Create TVM schedule
-s = tvm.create_schedule(res.op)
+s = te.create_schedule(res.op)
# Let's look at the default TVM schedule
print(tvm.lower(s, [data, kernel, res], simple_mode=True))
# Perform virtual thread split along output channel outer axis
_, tx = s[res].split(oc_out, factor=v_threads)
s[res].reorder(tx, b_out)
-s[res].bind(tx, tvm.thread_axis("cthread"))
+s[res].bind(tx, te.thread_axis("cthread"))
# Let's look at the current TVM schedule after blocking and virtual threading
print(tvm.lower(s, [data, kernel, res], simple_mode=True))
import os
import tvm
+from tvm import te
import vta
import numpy as np
from tvm import rpc
num_ops = in_channels * out_channels * batch_size * 2
# Reduction axes
-ic = tvm.reduce_axis((0, in_channels // env.BLOCK_IN), name='ic')
-ic_tns = tvm.reduce_axis((0, env.BLOCK_IN), name='ic_tns')
+ic = te.reduce_axis((0, in_channels // env.BLOCK_IN), name='ic')
+ic_tns = te.reduce_axis((0, env.BLOCK_IN), name='ic_tns')
# Input placeholder tensors
-data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype)
-weight = tvm.placeholder(weight_shape, name="weight", dtype=env.wgt_dtype)
+data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype)
+weight = te.placeholder(weight_shape, name="weight", dtype=env.wgt_dtype)
# Copy buffers
-data_buf = tvm.compute(data_shape,
+data_buf = te.compute(data_shape,
lambda *i: data(*i),
"data_buf")
-weight_buf = tvm.compute(weight_shape,
+weight_buf = te.compute(weight_shape,
lambda *i: weight(*i),
"weight_buf")
# Declare matrix multiply computation
-res_gemm = tvm.compute(output_shape,
- lambda bo, co, bi, ci: tvm.sum(
+res_gemm = te.compute(output_shape,
+ lambda bo, co, bi, ci: te.sum(
data_buf[bo, ic, bi, ic_tns].astype(env.acc_dtype) *
weight_buf[co, ic, ci, ic_tns].astype(env.acc_dtype),
axis=[ic, ic_tns]),
name="res_gem")
# Add shift stage for fix-point normalization
-res_shr = tvm.compute(output_shape,
+res_shr = te.compute(output_shape,
lambda *i: res_gemm(*i) >> env.INP_WIDTH,
name="res_shr")
# Apply clipping between (0, input max value)
inp_max = (1<<(env.INP_WIDTH-1))-1
-res_max = tvm.compute(output_shape,
- lambda *i: tvm.max(res_shr(*i), 0),
+res_max = te.compute(output_shape,
+ lambda *i: tvm.te.max(res_shr(*i), 0),
"res_max")
-res_min = tvm.compute(output_shape,
- lambda *i: tvm.min(res_max(*i), inp_max),
+res_min = te.compute(output_shape,
+ lambda *i: tvm.te.min(res_max(*i), inp_max),
"res_min")
# Apply typecast to input data type before sending results back
-res = tvm.compute(output_shape,
+res = te.compute(output_shape,
lambda *i: res_min(*i).astype(env.inp_dtype),
name="res")
# Create TVM schedule
-s = tvm.create_schedule(res.op)
+s = te.create_schedule(res.op)
# Let's look at the default TVM schedule
print(tvm.lower(s, [data, weight, res], simple_mode=True))
import os
import tvm
+from tvm import te
import vta
import numpy as np
# Batch factor o - total 1 x 1 = 1
o = 1
# A placeholder tensor in tiled data format
-A = tvm.placeholder((o, m, env.BATCH, env.BLOCK_OUT), name="A", dtype=env.acc_dtype)
+A = te.placeholder((o, m, env.BATCH, env.BLOCK_OUT), name="A", dtype=env.acc_dtype)
# B placeholder tensor in tiled data format
-B = tvm.placeholder((o, m, env.BATCH, env.BLOCK_OUT), name="B", dtype=env.acc_dtype)
+B = te.placeholder((o, m, env.BATCH, env.BLOCK_OUT), name="B", dtype=env.acc_dtype)
######################################################################
# Copy Buffers
# This can later be interpreted by the compiler as a cached read operation.
# A copy buffer
-A_buf = tvm.compute((o, m, env.BATCH, env.BLOCK_OUT), lambda *i: A(*i), "A_buf")
+A_buf = te.compute((o, m, env.BATCH, env.BLOCK_OUT), lambda *i: A(*i), "A_buf")
# B copy buffer
-B_buf = tvm.compute((o, m, env.BATCH, env.BLOCK_OUT), lambda *i: B(*i), "B_buf")
+B_buf = te.compute((o, m, env.BATCH, env.BLOCK_OUT), lambda *i: B(*i), "B_buf")
######################################################################
# Vector Addition
# the computation should be done.
# Describe the in-VTA vector addition
-C_buf = tvm.compute(
+C_buf = te.compute(
(o, m, env.BATCH, env.BLOCK_OUT),
lambda *i: A_buf(*i).astype(env.acc_dtype) + B_buf(*i).astype(env.acc_dtype),
name="C_buf")
# input activation data format.
# Cast to output type, and send to main memory
-C = tvm.compute(
+C = te.compute(
(o, m, env.BATCH, env.BLOCK_OUT),
lambda *i: C_buf(*i).astype(env.inp_dtype),
name="C")
# :code:`C` in the following way:
# Let's take a look at the generated schedule
-s = tvm.create_schedule(C.op)
+s = te.create_schedule(C.op)
print(tvm.lower(s, [A, B, C], simple_mode=True))