Move the related target modules into tvm.target.
API change:
- tvm.target.current_target -> tvm.target.Target.current
- tvm.datatype -> tvm.target.datatype
ndarray
error
ir
+ target
intrin
tensor
schedule
- target
build
function
autotvm
----------
.. automodule:: tvm.target
:members:
+ :imported-members:
from . import stmt
from . import make
from . import ir_pass
-from . import codegen
from . import schedule
from . import ir_builder
from . import hybrid
from . import testing
from . import error
-from . import datatype
from .api import *
import json
import numpy as np
from .base import _LIB, check_call
-from .. import _api_internal
tvm_shape_index_t = ctypes.c_int64
_fields_ = [("data", ctypes.POINTER(ctypes.c_byte)),
("size", ctypes.c_size_t)]
+
class DataType(ctypes.Structure):
"""TVM datatype structure"""
_fields_ = [("type_code", ctypes.c_uint8),
bits = 64
head = ""
elif head.startswith("custom"):
+ # pylint: disable=import-outside-toplevel
+ import tvm.runtime._ffi_api
low, high = head.find('['), head.find(']')
if not low or not high or low >= high:
raise ValueError("Badly formatted custom type string %s" % type_str)
type_name = head[low + 1:high]
- self.type_code = _api_internal._datatype_get_type_code(type_name)
+ self.type_code = tvm.runtime._ffi_api._datatype_get_type_code(type_name)
head = head[high+1:]
else:
raise ValueError("Do not know how to handle type %s" % type_str)
def __repr__(self):
+ # pylint: disable=import-outside-toplevel
if self.bits == 1 and self.lanes == 1:
return "bool"
if self.type_code in DataType.CODE2STR:
type_name = DataType.CODE2STR[self.type_code]
else:
+ import tvm.runtime._ffi_api
type_name = "custom[%s]" % \
- _api_internal._datatype_get_type_name(self.type_code)
+ tvm.runtime._ffi_api._datatype_get_type_name(self.type_code)
x = "%s%d" % (type_name, self.bits)
if self.lanes != 1:
x += "x%d" % self.lanes
self.device_type = device_type
self.device_id = device_id
+ def _GetDeviceAttr(self, device_type, device_id, attr_id):
+ """Internal helper function to invoke runtime.GetDeviceAttr"""
+ # pylint: disable=import-outside-toplevel
+ import tvm.runtime._ffi_api
+ return tvm.runtime._ffi_api.GetDeviceAttr(
+ device_type, device_id, attr_id)
+
@property
def exist(self):
"""Whether this device exist."""
- return _api_internal._GetDeviceAttr(
+ return self._GetDeviceAttr(
self.device_type, self.device_id, 0) != 0
@property
def max_threads_per_block(self):
"""Maximum number of threads on each block."""
- return _api_internal._GetDeviceAttr(
+ return self._GetDeviceAttr(
self.device_type, self.device_id, 1)
@property
def warp_size(self):
"""Number of threads that executes in concurrent."""
- return _api_internal._GetDeviceAttr(
+ return self._GetDeviceAttr(
self.device_type, self.device_id, 2)
@property
def max_shared_memory_per_block(self):
"""Total amount of shared memory per block in bytes."""
- return _api_internal._GetDeviceAttr(
+ return self._GetDeviceAttr(
self.device_type, self.device_id, 3)
@property
version : str
The version string in `major.minor` format.
"""
- return _api_internal._GetDeviceAttr(
+ return self._GetDeviceAttr(
self.device_type, self.device_id, 4)
@property
def device_name(self):
"""Return the string name of device."""
- return _api_internal._GetDeviceAttr(
+ return self._GetDeviceAttr(
self.device_type, self.device_id, 5)
@property
def max_clock_rate(self):
"""Return the max clock frequency of device."""
- return _api_internal._GetDeviceAttr(
+ return self._GetDeviceAttr(
self.device_type, self.device_id, 6)
@property
def multi_processor_count(self):
"""Return the number of compute units of device."""
- return _api_internal._GetDeviceAttr(
+ return self._GetDeviceAttr(
self.device_type, self.device_id, 7)
@property
dims: List of int
The maximum length of threadIdx.x, threadIdx.y, threadIdx.z
"""
- return json.loads(_api_internal._GetDeviceAttr(
+ return json.loads(self._GetDeviceAttr(
self.device_type, self.device_id, 8))
def sync(self):
def _alter_conv2d_layout(attrs, inputs, tinfo):
workload = get_conv2d_workload(...)
dispatch_ctx = autotvm.task.DispatchContext.current
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
config = dispatch_ctx.query(target, workload)
# Get conv2d_NCHWc workload from config
def dispatch_func(func, *args, **kwargs):
"""The wrapped dispatch function"""
- tgt = _target.current_target()
+ tgt = _target.Target.current()
workload = func(*args, **kwargs)
cfg = DispatchContext.current.query(tgt, workload)
if cfg.is_fallback and not cfg.template_key:
from tvm.runtime import Object, ndarray
from tvm.ir import container
+from tvm.target import codegen
+
from . import api
from . import _api_internal
from . import tensor
from . import expr
from . import ir_pass
from . import stmt as _stmt
-from . import codegen
from . import target as _target
from . import make
from .stmt import LoweredFunc
"LoweredFunc.")
if not isinstance(inputs, (dict, container.Map)):
- target = _target.current_target() if target is None else target
+ target = _target.Target.current() if target is None else target
target = target if target else "llvm"
target_flist = {target: flist}
else:
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Code generation related functions."""
-import tvm._ffi
-
-def build_module(lowered_func, target):
- """Build lowered_func into Module.
-
- Parameters
- ----------
- lowered_func : LoweredFunc
- The lowered function
-
- target : str
- The target module type.
-
- Returns
- -------
- module : Module
- The corressponding module.
- """
- return _Build(lowered_func, target)
-
-tvm._ffi._init_api("tvm.codegen")
# under the License.
"""Util to invoke clang in the system."""
# pylint: disable=invalid-name
-from __future__ import absolute_import as _abs
import subprocess
-from .._ffi.base import py_str
-from .. import codegen
+from tvm._ffi.base import py_str
+import tvm.target
from . import util
matches the major llvm version that built with tvm
"""
cc_list = []
- if hasattr(codegen, "llvm_version_major"):
- major = codegen.llvm_version_major()
+ major = tvm.target.codegen.llvm_version_major(allow_none=True)
+ if major is not None:
cc_list += ["clang-%d.0" % major]
cc_list += ["clang-%d" % major]
cc_list += ["clang"]
"""Utility for ROCm backend"""
import subprocess
from os.path import join, exists
+
+from tvm._ffi.base import py_str
+import tvm.target
+
from . import util
-from .._ffi.base import py_str
-from .. import codegen
from ..api import register_func, convert
def find_lld(required=True):
matches the major llvm version that built with tvm
"""
lld_list = []
- if hasattr(codegen, "llvm_version_major"):
- major = codegen.llvm_version_major()
+ major = tvm.target.codegen.llvm_version_major(allow_none=True)
+ if major is not None:
lld_list += ["ld.lld-%d.0" % major]
lld_list += ["ld.lld-%d" % major]
lld_list += ["ld.lld"]
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Custom datatype functionality"""
-import tvm._ffi
-
-from . import make as _make
-from .api import convert
-from .expr import Call as _Call, Cast as _Cast, FloatImm as _FloatImm
-from ._ffi.runtime_ctypes import DataType
-from . import _api_internal
-
-
-def register(type_name, type_code):
- """Register a custom datatype with the given type name and type code
- Currently, the type code is manually allocated by the user, and the
- user must ensure that no two custom types share the same code.
- Generally, this should be straightforward, as the user will be
- manually registering all of their custom types.
-
- Parameters
- ----------
- type_name : str
- The name of the custom datatype
-
- type_code : int
- The type's code, which should be >= kCustomBegin
- """
- _api_internal._datatype_register(type_name, type_code)
-
-
-def get_type_name(type_code):
- """Get the type name from the type code
-
- Parameters
- ----------
- type_code : int
- The type code
- """
- return _api_internal._datatype_get_type_name(type_code)
-
-
-def get_type_code(type_name):
- """Get the type code from the type name
-
- Parameters
- ----------
- type_name : str
- The type name
- """
- return _api_internal._datatype_get_type_code(type_name)
-
-
-def get_type_registered(type_code):
- """Get a boolean representing whether the type is registered
-
- Parameters
- ----------
- type_code: int
- The type code
- """
- return _api_internal._datatype_get_type_registered(type_code)
-
-
-def register_op(lower_func, op_name, target, type_name, src_type_name=None):
- """Register an external function which computes the given op.
-
- Currently, this will only work with Casts and binary expressions
- whose arguments are named `a` and `b`.
- TODO(gus) figure out what other special cases must be handled by
- looking through expr.py.
-
- Parameters
- ----------
- lower_func : function
- The lowering function to call. See create_lower_func.
-
- op_name : str
- The name of the operation which the function computes, given by its
- Halide::Internal class name (e.g. Add, LE, Cast).
-
- target : str
- The name of codegen target.
-
- type_name : str
- The name of the custom datatype, e.g. posit (but not custom[posit]8).
-
- src_type_name : str
- If op_name is "Cast", then this should be set to the source datatype of
- the argument to the Cast. If op_name is not "Cast", this is unused.
- """
-
- if op_name == "Cast":
- assert src_type_name is not None
- lower_func_name = "tvm.datatype.lower." + target + "." + op_name + "." \
- + type_name + "." + src_type_name
- else:
- lower_func_name = "tvm.datatype.lower." + target + "." + op_name + "." \
- + type_name
- tvm._ffi.register_func(lower_func_name, lower_func)
-
-
-def create_lower_func(extern_func_name):
- """Returns a function which lowers an operation to a function call.
-
- Parameters
- ----------
- extern_func_name : str
- The name of the extern "C" function to lower to
- """
-
- def lower(op):
- """
- Takes an op---either a Cast or a binary op (e.g. an Add) and returns a
- call to the specified external function, passing the op's argument
- (Cast) or arguments (a binary op). The return type of the call depends
- on the type of the op: if it is a custom type, then a uint of the same
- width as the custom type is returned. Otherwise, the type is
- unchanged."""
- dtype = op.dtype
- t = DataType(dtype)
- if get_type_registered(t.type_code):
- dtype = "uint" + str(t.bits)
- if t.lanes > 1:
- dtype += "x" + str(t.lanes)
- if isinstance(op, (_Cast, _FloatImm)):
- return _make.Call(dtype, extern_func_name, convert([op.value]),
- _Call.Extern, None, 0)
- return _make.Call(dtype, extern_func_name, convert([op.a, op.b]),
- _Call.Extern, None, 0)
-
- return lower
_internal_assert(func_id == "max_num_threads", "This function cannot be directly invoked!")
_internal_assert(args.__len__() <= 1, "At most one argument accepted!")
if args.__len__() == 0:
- res = _tgt.current_target().max_num_threads
+ res = _tgt.Target.current().max_num_threads
else:
_internal_assert(isinstance(args[0], _expr.IntImm), "In tvm bool should be uint")
- res = _tgt.current_target(args[0].value).max_num_threads
+ res = _tgt.Target.current(args[0].value).max_num_threads
return _api.convert(res)
def max_num_threads(allow_none=True):
"""Get max number of threads for GPU targets."""
- return target.current_target(allow_none).max_num_threads
+ return target.Target.current(allow_none).max_num_threads
HYBRID_GLOBALS = {
"""Expression Intrinsics and math functions in TVM."""
# pylint: disable=redefined-builtin
import tvm._ffi
-import tvm.codegen
+import tvm.target.codegen
from . import make as _make
from .api import convert, const
call : Expr
The call expression.
"""
- llvm_id = tvm.codegen.llvm_lookup_intrinsic_id(name)
+ llvm_id = tvm.target.codegen.llvm_lookup_intrinsic_id(name)
assert llvm_id != 0, "%s is not an LLVM intrinsic" % name
return call_pure_intrin(dtype, 'llvm_intrin', tvm.const(llvm_id, 'uint32'), *args)
def _update_target(self, target):
"""Update target."""
- target = target if target else tvm.target.current_target()
+ target = target if target else tvm.target.Target.current()
if target is None:
raise ValueError("Target is not set in env or passed as argument.")
tgts = {}
from .backend.vm import VMExecutor
def _update_target(target):
- target = target if target else _target.current_target()
+ target = target if target else _target.Target.current()
if target is None:
raise ValueError("Target is not set in env or passed as argument.")
def is_fast_int8_on_intel():
""" Checks whether the hardware has support for fast Int8 arithmetic operations. """
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
intel_supported_arches = {'-mcpu=skylake-avx512', '-mcpu=cascadelake'}
return intel_supported_arches.intersection(set(target.options))
def is_fast_int8_on_arm():
""" Checks whether the hardware has support for fast Int8 arithmetic operations. """
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
return '+v8.2a,+dotprod' in ' '.join(target.options)
########################
func = mod['main']
func = _quantize.CreateStatsCollector(func)
- if tvm.target.current_target():
- target = tvm.target.current_target()
+ if tvm.target.Target.current():
+ target = tvm.target.Target.current()
ctx = tvm.context(target.target_name)
else:
target = 'llvm'
# under the License.
#pylint: disable=unused-argument,inconsistent-return-statements
"""Internal module for registering attribute for annotation."""
-from __future__ import absolute_import
-
-from ... import target as _target
+import tvm
from .. import expr as _expr
from .. import analysis as _analysis
from ..base import register_relay_node
@register_partition_function("add")
def add_partition_function(ref_call, new_args, ctx):
"""Rewrite function for ewise add for partition"""
- target = _target.current_target()
+ target = tvm.target.Target.current()
if target and 'cuda' in target.keys:
#TODO(wuwei/ziheng) cuda specific rules
return add_partition_generic(ref_call, new_args, ctx)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Target management API of TVM.
-
-TVM's target string is in fomat ``<target_name> [-option=value]...``.
-
-Note
-----
-The list of options include:
-
-- **-device=<device name>**
-
- The device name.
-
-- **-mtriple=<target triple>** or **-target**
-
- Specify the target triple, which is useful for cross
- compilation.
-
-- **-mcpu=<cpuname>**
-
- Specify a specific chip in the current architecture to
- generate code for. By default this is infered from the
- target triple and autodetected to the current architecture.
-
-- **-mattr=a1,+a2,-a3,...**
-
- Override or control specific attributes of the target,
- such as whether SIMD operations are enabled or not. The
- default set of attributes is set by the current CPU.
-
-- **-system-lib**
-
- Build TVM system library module. System lib is a global module that contains
- self registered functions in program startup. User can get the module using
- :any:`tvm.runtime.system_lib`.
- It is useful in environments where dynamic loading api like dlopen is banned.
- The system lib will be available as long as the result code is linked by the program.
-
-We can use :any:`tvm.target.create` to create a tvm.target.Target from the target string.
-We can also use other specific function in this module to create specific targets.
-"""
-import warnings
-import tvm._ffi
-
-from tvm.runtime import Object
-from ._ffi.base import _LIB_NAME
-from . import _api_internal
-
-try:
- from decorator import decorate
-except ImportError as err_msg:
- # Allow decorator to be missing in runtime
- if _LIB_NAME != "libtvm_runtime.so":
- raise err_msg
-
-def _merge_opts(opts, new_opts):
- """Helper function to merge options"""
- if isinstance(new_opts, str):
- new_opts = new_opts.split()
- if new_opts:
- opt_set = set(opts)
- new_opts = [opt for opt in new_opts if opt not in opt_set]
- return opts + new_opts
- return opts
-
-
-@tvm._ffi.register_object
-class Target(Object):
- """Target device information, use through TVM API.
-
- Note
- ----
- Do not use class constructor, you can create target using the following functions
-
- - :any:`tvm.target.create` create target from string
- - :any:`tvm.target.arm_cpu` create arm_cpu target
- - :any:`tvm.target.cuda` create CUDA target
- - :any:`tvm.target.rocm` create ROCM target
- - :any:`tvm.target.mali` create Mali target
- - :any:`tvm.target.intel_graphics` create Intel Graphics target
- """
- def __new__(cls):
- # Always override new to enable class
- obj = Object.__new__(cls)
- obj._keys = None
- obj._options = None
- obj._libs = None
- return obj
-
- @property
- def keys(self):
- if not self._keys:
- self._keys = [k.value for k in self.keys_array]
- return self._keys
-
- @property
- def options(self):
- if not self._options:
- self._options = [o.value for o in self.options_array]
- return self._options
-
- @property
- def libs(self):
- if not self._libs:
- self._libs = [l.value for l in self.libs_array]
- return self._libs
-
- @property
- def model(self):
- for opt in self.options_array:
- if opt.value.startswith('-model='):
- return opt.value[7:]
- return 'unknown'
-
- @property
- def mcpu(self):
- """Returns the mcpu from the target if it exists."""
- mcpu = ''
- if self.options is not None:
- for opt in self.options:
- if 'mcpu' in opt:
- mcpu = opt.split('=')[1]
- return mcpu
-
- def __enter__(self):
- _api_internal._EnterTargetScope(self)
- return self
-
- def __exit__(self, ptype, value, trace):
- _api_internal._ExitTargetScope(self)
-
-
-@tvm._ffi.register_object
-class GenericFunc(Object):
- """GenericFunc node reference. This represents a generic function
- that may be specialized for different targets. When this object is
- called, a specialization is chosen based on the current target.
-
- Note
- ----
- Do not construct an instance of this object, it should only ever be
- used as a return value from calling into C++.
- """
- def __call__(self, *args):
- return _api_internal._GenericFuncCallFunc(self, *args)
-
- def set_default(self, func, allow_override=False):
- """Set the default function to be used if no specializations match
- the current target.
-
- Parameters
- ----------
- func : function
- The default function
-
- allow_override : bool
- Whether to allow the current default to be overridden
- """
- _api_internal._GenericFuncSetDefault(self, func, allow_override)
-
- def register(self, func, key_list, allow_override=False):
- """Register a specialization for this GenericFunc.
-
- Parameters
- ----------
- func : function
- The function to be registered.
-
- key : str or list of str
- The key to be registered.
-
- allow_override : bool, optional
- Whether to allow existing keys to be overridden.
- """
- key_list = [key_list] if isinstance(key_list, str) else key_list
- _api_internal._GenericFuncRegisterFunc(self, func, key_list, allow_override)
-
-
-def get_native_generic_func(name):
- """Get a generic function from the global registry. If no
- function is registered under the given name, a new generic
- function is created.
-
- Parameters
- ----------
- name : string
- The name of the generic function to get
-
- Returns
- -------
- func : GenericFunc
- The generic function for the given name
- """
- return _api_internal._GenericFuncGetGlobal(name)
-
-
-def override_native_generic_func(func_name):
- """Override a generic function defined in C++
-
- Generic function allows registration of further functions
- that can be dispatched on current target context.
- If no registered dispatch is matched, the fdefault will be called.
-
- Parameters
- ----------
- func_name : string
- The name of the generic func to be overridden
-
- Returns
- -------
- fgeneric : function
- A wrapped generic function.
-
- Example
- -------
- .. code-block:: python
-
- import tvm
- # wrap function as target generic
- @tvm.target.override_native_generic_func("my_func")
- def my_func(a):
- return a + 1
- # register specialization of my_func under target cuda
- @my_func.register("cuda")
- def my_func_cuda(a):
- return a + 2
- # displays 3, because my_func is called
- print(my_func(2))
- # displays 4, because my_func_cuda is called
- with tvm.target.cuda():
- print(my_func(2))
- """
- generic_func_node = get_native_generic_func(func_name)
-
- def fdecorate(fdefault):
- """Wrap a target generic function, overriding the previous
- default that was set for the generic function.
-
- Parameters
- ----------
- fdefault : function
- The default function.
-
- Returns
- -------
- fgeneric : function
- A wrapped generic function.
-
- """
- generic_func_node.set_default(fdefault, allow_override=True)
-
- def register(key, func=None, override=True):
- """Register function to be the dispatch function.
-
- Parameters
- ----------
- key : str or list of str
- The key to be registered.
-
- func : function
- The function to be registered.
-
- override : bool, optional
- Whether override existing registration.
-
- Returns
- -------
- The register function is necessary.
- """
- def _do_reg(myf):
- generic_func_node.register(myf, key, override)
- return myf
- if func:
- return _do_reg(func)
- return _do_reg
-
- def dispatch_func(func, *args, **kwargs):
- #pylint: disable=unused-argument
- """The wrapped dispath function"""
- if kwargs:
- raise RuntimeError(
- "Keyword arguments cannot be used when invoking generic_func %s" % func_name)
- return generic_func_node(*args)
- fresult = decorate(fdefault, dispatch_func)
- fresult.fdefault = fdefault
- fresult.register = register
- return fresult
- return fdecorate
-
-def generic_func(fdefault):
- """Wrap a target generic function.
-
- Generic function allows registration of further functions
- that can be dispatched on current target context.
- If no registered dispatch is matched, the fdefault will be called.
-
- Parameters
- ----------
- fdefault : function
- The default function.
-
- Returns
- -------
- fgeneric : function
- A wrapped generic function.
-
- Example
- -------
- .. code-block:: python
-
- import tvm
- # wrap function as target generic
- @tvm.target.generic_func
- def my_func(a):
- return a + 1
- # register specialization of my_func under target cuda
- @my_func.register("cuda")
- def my_func_cuda(a):
- return a + 2
- # displays 3, because my_func is called
- print(my_func(2))
- # displays 4, because my_func_cuda is called
- with tvm.target.cuda():
- print(my_func(2))
- """
- dispatch_dict = {}
- func_name = fdefault.__name__
-
- def register(key, func=None, override=False):
- """Register function to be the dispatch function.
-
- Parameters
- ----------
- key : str or list of str
- The key to be registered.
-
- func : function
- The function to be registered.
-
- override : bool
- Whether override existing registration.
-
- Returns
- -------
- The register function is necessary.
- """
- def _do_reg(myf):
- key_list = [key] if isinstance(key, str) else key
- for k in key_list:
- if k in dispatch_dict and not override:
- raise ValueError(
- "Key is already registered for %s" % func_name)
- dispatch_dict[k] = myf
- return myf
- if func:
- return _do_reg(func)
- return _do_reg
-
- def dispatch_func(func, *args, **kwargs):
- """The wrapped dispath function"""
- target = current_target()
- if target is None:
- return func(*args, **kwargs)
- for k in target.keys:
- if k in dispatch_dict:
- return dispatch_dict[k](*args, **kwargs)
- return func(*args, **kwargs)
- fdecorate = decorate(fdefault, dispatch_func)
- fdecorate.register = register
- fdecorate.fdefault = fdefault
- return fdecorate
-
-
-def cuda(model='unknown', options=None):
- """Returns a cuda target.
-
- Parameters
- ----------
- model: str
- The model of cuda device (e.g. 1080ti)
- options : str or list of str
- Additional options
- """
- opts = _merge_opts(['-model=%s' % model], options)
- return _api_internal._TargetCreate("cuda", *opts)
-
-
-def rocm(model='unknown', options=None):
- """Returns a ROCM target.
-
- Parameters
- ----------
- model: str
- The model of this device
- options : str or list of str
- Additional options
- """
- opts = _merge_opts(["-model=%s" % model], options)
- return _api_internal._TargetCreate("rocm", *opts)
-
-
-def mali(model='unknown', options=None):
- """Returns a ARM Mali GPU target.
-
- Parameters
- ----------
- model: str
- The model of this device
- options : str or list of str
- Additional options
- """
- opts = ["-device=mali", '-model=%s' % model]
- opts = _merge_opts(opts, options)
- return _api_internal._TargetCreate("opencl", *opts)
-
-
-def intel_graphics(model='unknown', options=None):
- """Returns an Intel Graphics target.
-
- Parameters
- ----------
- model: str
- The model of this device
- options : str or list of str
- Additional options
- """
- opts = ["-device=intel_graphics", '-model=%s' % model]
- opts = _merge_opts(opts, options)
- return _api_internal._TargetCreate("opencl", *opts)
-
-
-def opengl(model='unknown', options=None):
- """Returns a OpenGL target.
-
- Parameters
- ----------
- options : str or list of str
- Additional options
- """
- opts = _merge_opts(["-model=%s" % model], options)
- return _api_internal._TargetCreate("opengl", *opts)
-
-
-def arm_cpu(model='unknown', options=None):
- """Returns a ARM CPU target.
- This function will also download pre-tuned op parameters when there is none.
-
- Parameters
- ----------
- model: str
- SoC name or phone name of the arm board.
- options : str or list of str
- Additional options
- """
- trans_table = {
- "pixel2": ["-model=snapdragon835", "-target=arm64-linux-android -mattr=+neon"],
- "mate10": ["-model=kirin970", "-target=arm64-linux-android -mattr=+neon"],
- "mate10pro": ["-model=kirin970", "-target=arm64-linux-android -mattr=+neon"],
- "p20": ["-model=kirin970", "-target=arm64-linux-android -mattr=+neon"],
- "p20pro": ["-model=kirin970", "-target=arm64-linux-android -mattr=+neon"],
- "rasp3b": ["-model=bcm2837", "-target=armv7l-linux-gnueabihf -mattr=+neon"],
- "rasp4b": ["-model=bcm2711", "-target=arm-linux-gnueabihf -mattr=+neon"],
- "rk3399": ["-model=rk3399", "-target=aarch64-linux-gnu -mattr=+neon"],
- "pynq": ["-model=pynq", "-target=armv7a-linux-eabi -mattr=+neon"],
- "ultra96": ["-model=ultra96", "-target=aarch64-linux-gnu -mattr=+neon"],
- }
- pre_defined_opt = trans_table.get(model, ["-model=%s" % model])
-
- opts = ["-device=arm_cpu"] + pre_defined_opt
- opts = _merge_opts(opts, options)
- return _api_internal._TargetCreate("llvm", *opts)
-
-
-def rasp(options=None):
- """Return a Raspberry 3b target.
-
- Parameters
- ----------
- options : str or list of str
- Additional options
- """
- warnings.warn('tvm.target.rasp() is going to be deprecated. '
- 'Please use tvm.target.arm_cpu("rasp3b")')
- return arm_cpu('rasp3b', options)
-
-
-def vta(model='unknown', options=None):
- opts = ["-device=vta", '-keys=cpu', '-model=%s' % model]
- opts = _merge_opts(opts, options)
- ret = _api_internal._TargetCreate("ext_dev", *opts)
- return ret
-
-
-def bifrost(model='unknown', options=None):
- """Return an ARM Mali GPU target (Bifrost architecture).
-
- Parameters
- ----------
- options : str or list of str
- Additional options
- """
- opts = ["-device=bifrost", '-model=%s' % model]
- opts = _merge_opts(opts, options)
- return _api_internal._TargetCreate("opencl", *opts)
-
-
-def create(target_str):
- """Get a target given target string.
-
- Parameters
- ----------
- target_str : str
- The target string.
-
- Returns
- -------
- target : Target
- The target object
-
- Note
- ----
- See the note on :any:`tvm.target` on target string format.
- """
- if isinstance(target_str, Target):
- return target_str
- if not isinstance(target_str, str):
- raise ValueError("target_str has to be string type")
-
- return _api_internal._TargetFromString(target_str)
-
-
-def current_target(allow_none=True):
- """Returns the current target.
-
- Parameters
- ----------
- allow_none : bool
- Whether allow the current target to be none
-
- Raises
- ------
- ValueError if current target is not set.
- """
- return _api_internal._GetCurrentTarget(allow_none)
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Target description and codgen module.
+
+TVM's target string is in fomat ``<target_name> [-option=value]...``.
+
+Note
+----
+The list of options include:
+
+- **-device=<device name>**
+
+ The device name.
+
+- **-mtriple=<target triple>** or **-target**
+
+ Specify the target triple, which is useful for cross
+ compilation.
+
+- **-mcpu=<cpuname>**
+
+ Specify a specific chip in the current architecture to
+ generate code for. By default this is infered from the
+ target triple and autodetected to the current architecture.
+
+- **-mattr=a1,+a2,-a3,...**
+
+ Override or control specific attributes of the target,
+ such as whether SIMD operations are enabled or not. The
+ default set of attributes is set by the current CPU.
+
+- **-system-lib**
+
+ Build TVM system library module. System lib is a global module that contains
+ self registered functions in program startup. User can get the module using
+ :any:`tvm.runtime.system_lib`.
+ It is useful in environments where dynamic loading api like dlopen is banned.
+ The system lib will be available as long as the result code is linked by the program.
+
+We can use :py:func:`~tvm.target.create` to create a tvm.target.Target from the target string.
+We can also use other specific function in this module to create specific targets.
+"""
+from .target import Target, create
+from .target import cuda, rocm, mali, intel_graphics, opengl, arm_cpu, rasp, vta, bifrost
+from .generic_func import GenericFunc
+from .generic_func import generic_func, get_native_generic_func, override_native_generic_func
+from . import datatype
+from . import codegen
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""FFI APIs for tvm.target"""
+import tvm._ffi
+
+
+tvm._ffi._init_api("target", __name__)
--- /dev/null
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Code generation related functions."""
+from . import _ffi_api
+
+
+def build_module(lowered_func, target):
+ """Build lowered_func into Module.
+
+ Parameters
+ ----------
+ lowered_func : LoweredFunc
+ The lowered function
+
+ target : str
+ The target module type.
+
+ Returns
+ -------
+ module : runtime.Module
+ The corressponding module.
+ """
+ return _ffi_api.Build(lowered_func, target)
+
+
+def llvm_lookup_intrinsic_id(name):
+ """Lookup LLVM intrinsic id by name.
+
+ Parameters
+ ----------
+ name : str
+ The name of the intrinsic.
+
+ Returns
+ -------
+ intrin_id : int
+ The intrinsic id.
+ """
+ return _ffi_api.llvm_lookup_intrinsic_id(name)
+
+
+def llvm_version_major(allow_none=False):
+ """Get the major LLVM version.
+
+ Parameters
+ ----------
+ allow_none : bool
+ Whether do we allow none.
+
+ Returns
+ -------
+ major : int
+ The major LLVM version.
+ """
+ try:
+ return _ffi_api.llvm_version_major()
+ except AttributeError:
+ if allow_none:
+ return None
+ raise RuntimeError(
+ "LLVM version is not available, please check if you build with LLVM")
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Custom datatype functionality"""
+import tvm._ffi
+
+import tvm.runtime._ffi_api
+from tvm.runtime import convert, DataType
+from tvm.expr import Call as _Call, Cast as _Cast, FloatImm as _FloatImm
+
+
+def register(type_name, type_code):
+ """Register a custom datatype with the given type name and type code
+ Currently, the type code is manually allocated by the user, and the
+ user must ensure that no two custom types share the same code.
+ Generally, this should be straightforward, as the user will be
+ manually registering all of their custom types.
+
+ Parameters
+ ----------
+ type_name : str
+ The name of the custom datatype
+
+ type_code : int
+ The type's code, which should be >= kCustomBegin
+ """
+ tvm.runtime._ffi_api._datatype_register(type_name, type_code)
+
+
+def get_type_name(type_code):
+ """Get the type name from the type code
+
+ Parameters
+ ----------
+ type_code : int
+ The type code
+ """
+ return tvm.runtime._ffi_api._datatype_get_type_name(type_code)
+
+
+def get_type_code(type_name):
+ """Get the type code from the type name
+
+ Parameters
+ ----------
+ type_name : str
+ The type name
+ """
+ return tvm.runtime._ffi_api._datatype_get_type_code(type_name)
+
+
+def get_type_registered(type_code):
+ """Get a boolean representing whether the type is registered
+
+ Parameters
+ ----------
+ type_code: int
+ The type code
+ """
+ return tvm.runtime._ffi_api._datatype_get_type_registered(type_code)
+
+
+def register_op(lower_func, op_name, target, type_name, src_type_name=None):
+ """Register an external function which computes the given op.
+
+ Currently, this will only work with Casts and binary expressions
+ whose arguments are named `a` and `b`.
+ TODO(gus) figure out what other special cases must be handled by
+ looking through expr.py.
+
+ Parameters
+ ----------
+ lower_func : function
+ The lowering function to call. See create_lower_func.
+
+ op_name : str
+ The name of the operation which the function computes, given by its
+ Halide::Internal class name (e.g. Add, LE, Cast).
+
+ target : str
+ The name of codegen target.
+
+ type_name : str
+ The name of the custom datatype, e.g. posit (but not custom[posit]8).
+
+ src_type_name : str
+ If op_name is "Cast", then this should be set to the source datatype of
+ the argument to the Cast. If op_name is not "Cast", this is unused.
+ """
+
+ if op_name == "Cast":
+ assert src_type_name is not None
+ lower_func_name = "tvm.datatype.lower." + target + "." + op_name + "." \
+ + type_name + "." + src_type_name
+ else:
+ lower_func_name = "tvm.datatype.lower." + target + "." + op_name + "." \
+ + type_name
+ tvm._ffi.register_func(lower_func_name, lower_func)
+
+
+def create_lower_func(extern_func_name):
+ """Returns a function which lowers an operation to a function call.
+
+ Parameters
+ ----------
+ extern_func_name : str
+ The name of the extern "C" function to lower to
+ """
+
+ def lower(op):
+ """
+ Takes an op---either a Cast or a binary op (e.g. an Add) and returns a
+ call to the specified external function, passing the op's argument
+ (Cast) or arguments (a binary op). The return type of the call depends
+ on the type of the op: if it is a custom type, then a uint of the same
+ width as the custom type is returned. Otherwise, the type is
+ unchanged."""
+ dtype = op.dtype
+ t = DataType(dtype)
+ if get_type_registered(t.type_code):
+ dtype = "uint" + str(t.bits)
+ if t.lanes > 1:
+ dtype += "x" + str(t.lanes)
+ if isinstance(op, (_Cast, _FloatImm)):
+ return _Call(dtype, extern_func_name, convert([op.value]),
+ _Call.Extern, None, 0)
+ return _Call(dtype, extern_func_name, convert([op.a, op.b]),
+ _Call.Extern, None, 0)
+
+ return lower
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Generic function."""
+
+import tvm._ffi
+
+try:
+ from decorator import decorate
+except ImportError as err_msg:
+ # Allow decorator to be missing in runtime
+ if _LIB_NAME != "libtvm_runtime.so":
+ raise err_msg
+
+from tvm.runtime import Object
+from . target import Target
+from . import _ffi_api
+
+
+@tvm._ffi.register_object
+class GenericFunc(Object):
+ """GenericFunc node reference. This represents a generic function
+ that may be specialized for different targets. When this object is
+ called, a specialization is chosen based on the current target.
+
+ Note
+ ----
+ Do not construct an instance of this object, it should only ever be
+ used as a return value from calling into C++.
+ """
+ def __call__(self, *args):
+ return _ffi_api.GenericFuncCallFunc(self, *args)
+
+ def set_default(self, func, allow_override=False):
+ """Set the default function to be used if no specializations match
+ the current target.
+
+ Parameters
+ ----------
+ func : function
+ The default function
+
+ allow_override : bool
+ Whether to allow the current default to be overridden
+ """
+ _ffi_api.GenericFuncSetDefault(self, func, allow_override)
+
+ def register(self, func, key_list, allow_override=False):
+ """Register a specialization for this GenericFunc.
+
+ Parameters
+ ----------
+ func : function
+ The function to be registered.
+
+ key : str or list of str
+ The key to be registered.
+
+ allow_override : bool, optional
+ Whether to allow existing keys to be overridden.
+ """
+ key_list = [key_list] if isinstance(key_list, str) else key_list
+ _ffi_api.GenericFuncRegisterFunc(self, func, key_list, allow_override)
+
+
+def get_native_generic_func(name):
+ """Get a generic function from the global registry. If no
+ function is registered under the given name, a new generic
+ function is created.
+
+ Parameters
+ ----------
+ name : string
+ The name of the generic function to get
+
+ Returns
+ -------
+ func : GenericFunc
+ The generic function for the given name
+ """
+ return _ffi_api.GenericFuncGetGlobal(name)
+
+
+def override_native_generic_func(func_name):
+ """Override a generic function defined in C++
+
+ Generic function allows registration of further functions
+ that can be dispatched on current target context.
+ If no registered dispatch is matched, the fdefault will be called.
+
+ Parameters
+ ----------
+ func_name : string
+ The name of the generic func to be overridden
+
+ Returns
+ -------
+ fgeneric : function
+ A wrapped generic function.
+
+ Example
+ -------
+ .. code-block:: python
+
+ import tvm
+ # wrap function as target generic
+ @tvm.target.override_native_generic_func("my_func")
+ def my_func(a):
+ return a + 1
+ # register specialization of my_func under target cuda
+ @my_func.register("cuda")
+ def my_func_cuda(a):
+ return a + 2
+ # displays 3, because my_func is called
+ print(my_func(2))
+ # displays 4, because my_func_cuda is called
+ with tvm.target.cuda():
+ print(my_func(2))
+ """
+ generic_func_node = get_native_generic_func(func_name)
+
+ def fdecorate(fdefault):
+ """Wrap a target generic function, overriding the previous
+ default that was set for the generic function.
+
+ Parameters
+ ----------
+ fdefault : function
+ The default function.
+
+ Returns
+ -------
+ fgeneric : function
+ A wrapped generic function.
+
+ """
+ generic_func_node.set_default(fdefault, allow_override=True)
+
+ def register(key, func=None, override=True):
+ """Register function to be the dispatch function.
+
+ Parameters
+ ----------
+ key : str or list of str
+ The key to be registered.
+
+ func : function
+ The function to be registered.
+
+ override : bool, optional
+ Whether override existing registration.
+
+ Returns
+ -------
+ The register function is necessary.
+ """
+ def _do_reg(myf):
+ generic_func_node.register(myf, key, override)
+ return myf
+ if func:
+ return _do_reg(func)
+ return _do_reg
+
+ def dispatch_func(func, *args, **kwargs):
+ #pylint: disable=unused-argument
+ """The wrapped dispath function"""
+ if kwargs:
+ raise RuntimeError(
+ "Keyword arguments cannot be used when invoking generic_func %s" % func_name)
+ return generic_func_node(*args)
+ fresult = decorate(fdefault, dispatch_func)
+ fresult.fdefault = fdefault
+ fresult.register = register
+ return fresult
+ return fdecorate
+
+def generic_func(fdefault):
+ """Wrap a target generic function.
+
+ Generic function allows registration of further functions
+ that can be dispatched on current target context.
+ If no registered dispatch is matched, the fdefault will be called.
+
+ Parameters
+ ----------
+ fdefault : function
+ The default function.
+
+ Returns
+ -------
+ fgeneric : function
+ A wrapped generic function.
+
+ Example
+ -------
+ .. code-block:: python
+
+ import tvm
+ # wrap function as target generic
+ @tvm.target.generic_func
+ def my_func(a):
+ return a + 1
+ # register specialization of my_func under target cuda
+ @my_func.register("cuda")
+ def my_func_cuda(a):
+ return a + 2
+ # displays 3, because my_func is called
+ print(my_func(2))
+ # displays 4, because my_func_cuda is called
+ with tvm.target.cuda():
+ print(my_func(2))
+ """
+ dispatch_dict = {}
+ func_name = fdefault.__name__
+
+ def register(key, func=None, override=False):
+ """Register function to be the dispatch function.
+
+ Parameters
+ ----------
+ key : str or list of str
+ The key to be registered.
+
+ func : function
+ The function to be registered.
+
+ override : bool
+ Whether override existing registration.
+
+ Returns
+ -------
+ The register function is necessary.
+ """
+ def _do_reg(myf):
+ key_list = [key] if isinstance(key, str) else key
+ for k in key_list:
+ if k in dispatch_dict and not override:
+ raise ValueError(
+ "Key is already registered for %s" % func_name)
+ dispatch_dict[k] = myf
+ return myf
+ if func:
+ return _do_reg(func)
+ return _do_reg
+
+ def dispatch_func(func, *args, **kwargs):
+ """The wrapped dispath function"""
+ target = Target.current()
+ if target is None:
+ return func(*args, **kwargs)
+ for k in target.keys:
+ if k in dispatch_dict:
+ return dispatch_dict[k](*args, **kwargs)
+ return func(*args, **kwargs)
+ fdecorate = decorate(fdefault, dispatch_func)
+ fdecorate.register = register
+ fdecorate.fdefault = fdefault
+ return fdecorate
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Target data structure."""
+import warnings
+import tvm._ffi
+
+from tvm.runtime import Object
+from . import _ffi_api
+
+
+@tvm._ffi.register_object
+class Target(Object):
+ """Target device information, use through TVM API.
+
+ Note
+ ----
+ Do not use class constructor, you can create target using the following functions
+
+ - :py:func:`~tvm.target.create` create target from string
+ - :py:func:`~tvm.target.arm_cpu` create arm_cpu target
+ - :py:func:`~tvm.target.cuda` create CUDA target
+ - :py:func:`~tvm.target.rocm` create ROCM target
+ - :py:func:`~tvm.target.mali` create Mali target
+ - :py:func:`~tvm.target.intel_graphics` create Intel Graphics target
+ """
+ def __new__(cls):
+ # Always override new to enable class
+ obj = Object.__new__(cls)
+ obj._keys = None
+ obj._options = None
+ obj._libs = None
+ return obj
+
+ @property
+ def keys(self):
+ if not self._keys:
+ self._keys = [k.value for k in self.keys_array]
+ return self._keys
+
+ @property
+ def options(self):
+ if not self._options:
+ self._options = [o.value for o in self.options_array]
+ return self._options
+
+ @property
+ def libs(self):
+ if not self._libs:
+ self._libs = [l.value for l in self.libs_array]
+ return self._libs
+
+ @property
+ def model(self):
+ for opt in self.options_array:
+ if opt.value.startswith('-model='):
+ return opt.value[7:]
+ return 'unknown'
+
+ @property
+ def mcpu(self):
+ """Returns the mcpu from the target if it exists."""
+ mcpu = ''
+ if self.options is not None:
+ for opt in self.options:
+ if 'mcpu' in opt:
+ mcpu = opt.split('=')[1]
+ return mcpu
+
+ def __enter__(self):
+ _ffi_api.EnterTargetScope(self)
+ return self
+
+ def __exit__(self, ptype, value, trace):
+ _ffi_api.ExitTargetScope(self)
+
+ @staticmethod
+ def current(allow_none=True):
+ """Returns the current target.
+
+ Parameters
+ ----------
+ allow_none : bool
+ Whether allow the current target to be none
+
+ Raises
+ ------
+ ValueError if current target is not set.
+ """
+ return _ffi_api.GetCurrentTarget(allow_none)
+
+
+def _merge_opts(opts, new_opts):
+ """Helper function to merge options"""
+ if isinstance(new_opts, str):
+ new_opts = new_opts.split()
+ if new_opts:
+ opt_set = set(opts)
+ new_opts = [opt for opt in new_opts if opt not in opt_set]
+ return opts + new_opts
+ return opts
+
+
+def cuda(model='unknown', options=None):
+ """Returns a cuda target.
+
+ Parameters
+ ----------
+ model: str
+ The model of cuda device (e.g. 1080ti)
+ options : str or list of str
+ Additional options
+ """
+ opts = _merge_opts(['-model=%s' % model], options)
+ return _ffi_api.TargetCreate("cuda", *opts)
+
+
+def rocm(model='unknown', options=None):
+ """Returns a ROCM target.
+
+ Parameters
+ ----------
+ model: str
+ The model of this device
+ options : str or list of str
+ Additional options
+ """
+ opts = _merge_opts(["-model=%s" % model], options)
+ return _ffi_api.TargetCreate("rocm", *opts)
+
+
+def mali(model='unknown', options=None):
+ """Returns a ARM Mali GPU target.
+
+ Parameters
+ ----------
+ model: str
+ The model of this device
+ options : str or list of str
+ Additional options
+ """
+ opts = ["-device=mali", '-model=%s' % model]
+ opts = _merge_opts(opts, options)
+ return _ffi_api.TargetCreate("opencl", *opts)
+
+
+def intel_graphics(model='unknown', options=None):
+ """Returns an Intel Graphics target.
+
+ Parameters
+ ----------
+ model: str
+ The model of this device
+ options : str or list of str
+ Additional options
+ """
+ opts = ["-device=intel_graphics", '-model=%s' % model]
+ opts = _merge_opts(opts, options)
+ return _ffi_api.TargetCreate("opencl", *opts)
+
+
+def opengl(model='unknown', options=None):
+ """Returns a OpenGL target.
+
+ Parameters
+ ----------
+ options : str or list of str
+ Additional options
+ """
+ opts = _merge_opts(["-model=%s" % model], options)
+ return _ffi_api.TargetCreate("opengl", *opts)
+
+
+def arm_cpu(model='unknown', options=None):
+ """Returns a ARM CPU target.
+ This function will also download pre-tuned op parameters when there is none.
+
+ Parameters
+ ----------
+ model: str
+ SoC name or phone name of the arm board.
+ options : str or list of str
+ Additional options
+ """
+ trans_table = {
+ "pixel2": ["-model=snapdragon835", "-target=arm64-linux-android -mattr=+neon"],
+ "mate10": ["-model=kirin970", "-target=arm64-linux-android -mattr=+neon"],
+ "mate10pro": ["-model=kirin970", "-target=arm64-linux-android -mattr=+neon"],
+ "p20": ["-model=kirin970", "-target=arm64-linux-android -mattr=+neon"],
+ "p20pro": ["-model=kirin970", "-target=arm64-linux-android -mattr=+neon"],
+ "rasp3b": ["-model=bcm2837", "-target=armv7l-linux-gnueabihf -mattr=+neon"],
+ "rasp4b": ["-model=bcm2711", "-target=arm-linux-gnueabihf -mattr=+neon"],
+ "rk3399": ["-model=rk3399", "-target=aarch64-linux-gnu -mattr=+neon"],
+ "pynq": ["-model=pynq", "-target=armv7a-linux-eabi -mattr=+neon"],
+ "ultra96": ["-model=ultra96", "-target=aarch64-linux-gnu -mattr=+neon"],
+ }
+ pre_defined_opt = trans_table.get(model, ["-model=%s" % model])
+
+ opts = ["-device=arm_cpu"] + pre_defined_opt
+ opts = _merge_opts(opts, options)
+ return _ffi_api.TargetCreate("llvm", *opts)
+
+
+def rasp(options=None):
+ """Return a Raspberry 3b target.
+
+ Parameters
+ ----------
+ options : str or list of str
+ Additional options
+ """
+ warnings.warn('tvm.target.rasp() is going to be deprecated. '
+ 'Please use tvm.target.arm_cpu("rasp3b")')
+ return arm_cpu('rasp3b', options)
+
+
+def vta(model='unknown', options=None):
+ opts = ["-device=vta", '-keys=cpu', '-model=%s' % model]
+ opts = _merge_opts(opts, options)
+ ret = _ffi_api.TargetCreate("ext_dev", *opts)
+ return ret
+
+
+def bifrost(model='unknown', options=None):
+ """Return an ARM Mali GPU target (Bifrost architecture).
+
+ Parameters
+ ----------
+ options : str or list of str
+ Additional options
+ """
+ opts = ["-device=bifrost", '-model=%s' % model]
+ opts = _merge_opts(opts, options)
+ return _ffi_api.TargetCreate("opencl", *opts)
+
+
+def create(target_str):
+ """Get a target given target string.
+
+ Parameters
+ ----------
+ target_str : str
+ The target string.
+
+ Returns
+ -------
+ target : Target
+ The target object
+
+ Note
+ ----
+ See the note on :py:mod:`~tvm.target` on target string format.
+ """
+ if isinstance(target_str, Target):
+ return target_str
+ if not isinstance(target_str, str):
+ raise ValueError("target_str has to be string type")
+
+ return _ffi_api.TargetFromString(target_str)
namespace runtime {
std::string GetCustomTypeName(uint8_t type_code) {
- auto f = tvm::runtime::Registry::Get("_datatype_get_type_name");
- CHECK(f) << "Function _datatype_get_type_name not found";
+ auto f = tvm::runtime::Registry::Get("runtime._datatype_get_type_name");
+ CHECK(f) << "Function runtime._datatype_get_type_name not found";
return (*f)(type_code).operator std::string();
}
uint8_t GetCustomTypeCode(const std::string& type_name) {
- auto f = tvm::runtime::Registry::Get("_datatype_get_type_code");
- CHECK(f) << "Function _datatype_get_type_code not found";
+ auto f = tvm::runtime::Registry::Get("runtime._datatype_get_type_code");
+ CHECK(f) << "Function runtime._datatype_get_type_code not found";
return (*f)(type_name).operator int();
}
bool GetCustomTypeRegistered(uint8_t type_code) {
- auto f = tvm::runtime::Registry::Get("_datatype_get_type_registered");
- CHECK(f) << "Function _datatype_get_type_registered not found";
+ auto f = tvm::runtime::Registry::Get("runtime._datatype_get_type_registered");
+ CHECK(f) << "Function runtime._datatype_get_type_registered not found";
return (*f)(type_code).operator bool();
}
});
// set device api
-TVM_REGISTER_GLOBAL("_GetDeviceAttr")
+TVM_REGISTER_GLOBAL("runtime.GetDeviceAttr")
.set_body([](TVMArgs args, TVMRetValue *ret) {
TVMContext ctx;
ctx.device_type = static_cast<DLDeviceType>(args[0].operator int());
return (*codegen_f)(blob_byte_array, system_lib, target_triple);
}
-TVM_REGISTER_GLOBAL("codegen._Build")
+TVM_REGISTER_GLOBAL("target.Build")
.set_body([](TVMArgs args, TVMRetValue *ret) {
if (args[0].IsObjectRef<tir::LoweredFunc>()) {
*ret = Build({args[0]}, args[1]);
using runtime::TVMArgs;
using runtime::TVMRetValue;
-TVM_REGISTER_GLOBAL("_datatype_register")
+TVM_REGISTER_GLOBAL("runtime._datatype_register")
.set_body([](TVMArgs args, TVMRetValue* ret) {
datatype::Registry::Global()->Register(args[0], static_cast<uint8_t>(args[1].operator int()));
});
-TVM_REGISTER_GLOBAL("_datatype_get_type_code")
+TVM_REGISTER_GLOBAL("runtime._datatype_get_type_code")
.set_body([](TVMArgs args, TVMRetValue* ret) {
*ret = datatype::Registry::Global()->GetTypeCode(args[0]);
});
-TVM_REGISTER_GLOBAL("_datatype_get_type_name")
+TVM_REGISTER_GLOBAL("runtime._datatype_get_type_name")
.set_body([](TVMArgs args, TVMRetValue* ret) {
*ret = Registry::Global()->GetTypeName(args[0].operator int());
});
-TVM_REGISTER_GLOBAL("_datatype_get_type_registered")
+TVM_REGISTER_GLOBAL("runtime._datatype_get_type_registered")
.set_body([](TVMArgs args, TVMRetValue* ret) {
*ret = Registry::Global()->GetTypeRegistered(args[0].operator int());
});
} else {
ss << runtime::TypeCode2Str(src_type_code);
}
-
return runtime::Registry::Get(ss.str());
}
func.CallPacked(args, ret);
}
-TVM_REGISTER_GLOBAL("_GenericFuncCreate")
+TVM_REGISTER_GLOBAL("target.GenericFuncCreate")
.set_body([](TVMArgs args, TVMRetValue* ret) {
*ret = GenericFunc(make_object<GenericFuncNode>());
});
-TVM_REGISTER_GLOBAL("_GenericFuncGetGlobal")
+TVM_REGISTER_GLOBAL("target.GenericFuncGetGlobal")
.set_body([](TVMArgs args, TVMRetValue* ret) {
std::string func_name = args[0];
*ret = GenericFunc::Get(func_name);
});
-TVM_REGISTER_GLOBAL("_GenericFuncSetDefault")
+TVM_REGISTER_GLOBAL("target.GenericFuncSetDefault")
.set_body([](TVMArgs args, TVMRetValue* ret) {
GenericFunc generic_func = args[0];
// Intentionally copy and not de-allocate it, to avoid free pyobject during shutdown
.set_default(*func, allow_override);
});
-TVM_REGISTER_GLOBAL("_GenericFuncRegisterFunc")
+TVM_REGISTER_GLOBAL("target.GenericFuncRegisterFunc")
.set_body([](TVMArgs args, TVMRetValue* ret) {
GenericFunc generic_func = args[0];
// Intentionally copy and not de-allocate it, to avoid free pyobject during shutdown
.register_func(tags_vector, *func, allow_override);
});
-TVM_REGISTER_GLOBAL("_GenericFuncCallFunc")
+TVM_REGISTER_GLOBAL("target.GenericFuncCallFunc")
.set_body([](TVMArgs args, TVMRetValue* ret) {
GenericFunc generic_func = args[0];
TVMArgs func_args(&args.values[1], &args.type_codes[1], args.num_args - 1);
return llvm::Function::lookupIntrinsicID(name);
}
-TVM_REGISTER_GLOBAL("codegen.llvm_lookup_intrinsic_id")
-.set_body([](TVMArgs args, TVMRetValue* rv) {
- *rv = static_cast<int64_t>(LookupLLVMIntrinsic(args[0]));
- });
-
TVM_REGISTER_GLOBAL("codegen.build_llvm")
.set_body([](TVMArgs args, TVMRetValue* rv) {
auto n = make_object<LLVMModuleNode>();
*rv = runtime::Module(n);
});
-TVM_REGISTER_GLOBAL("codegen.llvm_version_major")
+TVM_REGISTER_GLOBAL("target.llvm_lookup_intrinsic_id")
+.set_body([](TVMArgs args, TVMRetValue* rv) {
+ *rv = static_cast<int64_t>(LookupLLVMIntrinsic(args[0]));
+ });
+
+TVM_REGISTER_GLOBAL("target.llvm_version_major")
.set_body([](TVMArgs args, TVMRetValue* rv) {
- std::ostringstream os;
int major = TVM_LLVM_VERSION / 10;
*rv = major;
});
return Target(t);
}
-TVM_REGISTER_GLOBAL("_TargetCreate")
+TVM_REGISTER_GLOBAL("target.TargetCreate")
.set_body([](TVMArgs args, TVMRetValue* ret) {
std::string target_name = args[0];
std::vector<std::string> options;
*ret = CreateTarget(target_name, options);
});
-TVM_REGISTER_GLOBAL("_TargetFromString")
+TVM_REGISTER_GLOBAL("target.TargetFromString")
.set_body([](TVMArgs args, TVMRetValue* ret) {
std::string target_str = args[0];
*ret = Target::Create(target_str);
return Target();
}
-TVM_REGISTER_GLOBAL("_GetCurrentTarget")
+TVM_REGISTER_GLOBAL("target.GetCurrentTarget")
.set_body([](TVMArgs args, TVMRetValue* ret) {
bool allow_not_defined = args[0];
*ret = Target::Current(allow_not_defined);
}
};
-TVM_REGISTER_GLOBAL("_EnterTargetScope")
+TVM_REGISTER_GLOBAL("target.EnterTargetScope")
.set_body_typed(Target::Internal::EnterScope);
-TVM_REGISTER_GLOBAL("_ExitTargetScope")
+TVM_REGISTER_GLOBAL("target.ExitTargetScope")
.set_body_typed(Target::Internal::ExitScope);
namespace target {
return expr;
}
-#define DEFINE_MUTATE__(OP, NodeName) \
- inline PrimExpr VisitExpr_(const NodeName* op) final { \
- auto type_code = op->dtype.code(); \
+#define DEFINE_MUTATE__(OP, NodeName) \
+ inline PrimExpr VisitExpr_(const NodeName* op) final { \
+ auto type_code = op->dtype.code(); \
bool toBeLowered = datatype::Registry::Global()->GetTypeRegistered(type_code); \
- PrimExpr expr = StmtExprMutator::VisitExpr_(op); \
- op = expr.as<NodeName>(); \
- if (toBeLowered) { \
- auto lower = datatype::Get##OP##LowerFunc(target_, type_code); \
- CHECK(lower) << #OP " lowering function for target " << target_ << " type " \
- << static_cast<unsigned>(type_code) << " not found"; \
- return (*lower)(expr); \
- } \
- return expr; \
+ PrimExpr expr = StmtExprMutator::VisitExpr_(op); \
+ op = expr.as<NodeName>(); \
+ if (toBeLowered) { \
+ auto lower = datatype::Get##OP##LowerFunc(target_, type_code); \
+ CHECK(lower) << #OP " lowering function for target " << target_ << " type " \
+ << static_cast<unsigned>(type_code) << " not found"; \
+ return (*lower)(expr); \
+ } \
+ return expr; \
}
DEFINE_MUTATE__(Add, AddNode);
return copy[i] - C[i];
}, "elemwise_sub");
- const runtime::PackedFunc* enter_target_scope_func = runtime::Registry::Get("_EnterTargetScope");
- (*enter_target_scope_func)(target_cuda);
+ With<Target> cuda_scope(target_cuda);
auto s1 = topi::cuda::schedule_injective(target_cuda, {elemwise_add});
- (*enter_target_scope_func)(target_llvm);
+
+ With<Target> llvm_scope(target_llvm);
auto s2 = create_schedule({elemwise_sub->op});
auto config = BuildConfig::Create();
if not tvm.runtime.enabled(target):
print("Target %s is not enabled" % target)
return
- f = tvm.codegen.build_module(fapi, target)
+ f = tvm.target.codegen.build_module(fapi, target)
# verify
ctx = tvm.cpu(0)
a = tvm.nd.array(np.random.uniform(size=(nn,)).astype(A.dtype), ctx)
# compile conv2d for x86 (skylake, cascadelake) and test assembly contains *pmadd* instructions
targets = ["llvm -mcpu=skylake-avx512", "llvm -mcpu=cascadelake"]
- llvm_version = tvm.codegen.llvm_version_major()
+ llvm_version = tvm.target.codegen.llvm_version_major()
for target in targets:
if llvm_version >= 8:
dtypes = ('uint8', 'int8', 'int32')
parameters = {"weight": tvm.nd.array(wdata.astype(weight_dtype))}
targets = ["llvm -mcpu=skylake-avx512", "llvm -mcpu=cascadelake"]
- llvm_version = tvm.codegen.llvm_version_major()
+ llvm_version = tvm.target.codegen.llvm_version_major()
for target in targets:
if llvm_version >= 8:
with relay.build_config(opt_level=3):
@autotvm.template
def bad_matmul(N, L, M, dtype):
- if 'bad_device' in tvm.target.current_target().keys:
+ if 'bad_device' in tvm.target.Target.current().keys:
A = tvm.placeholder((N, L), name='A', dtype=dtype)
B = tvm.placeholder((L, M), name='B', dtype=dtype)
f1 = tvm.lower(s, [A,B,C], name="fadd_pipeline")
fsplits = [x for x in tvm.ir_pass.SplitHostDevice(f1)]
fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0])
- mhost = tvm.codegen.build_module(fsplits[0], "c")
+ mhost = tvm.target.codegen.build_module(fsplits[0], "c")
temp = util.tempdir()
path_dso = temp.relpath("temp.so")
mhost.export_library(path_dso)
return
if not tvm.runtime.enabled(host):
return
- mhost = tvm.codegen.build_module(fsplits[0], host)
- mdev = tvm.codegen.build_module(fsplits[1:], device)
+ mhost = tvm.target.codegen.build_module(fsplits[0], host)
+ mdev = tvm.target.codegen.build_module(fsplits[1:], device)
mhost.import_module(mdev)
code = mdev.get_source()
f = mhost.entry_func
fmt = "hsaco"
else:
fmt = device
- mhost = tvm.codegen.build_module(fsplits[0], host)
- mdev = tvm.codegen.build_module(fsplits[1:], device)
+ mhost = tvm.target.codegen.build_module(fsplits[0], host)
+ mdev = tvm.target.codegen.build_module(fsplits[1:], device)
temp = util.tempdir()
mpath = temp.relpath("test.%s" % fmt)
mdev.save(mpath)
def check_llvm_object():
if not tvm.runtime.enabled("llvm"):
return
- if tvm.codegen.llvm_version_major() < 5:
+ if tvm.target.codegen.llvm_version_major() < 5:
return
- if tvm.codegen.llvm_version_major() > 6:
+ if tvm.target.codegen.llvm_version_major() > 6:
return
# build two functions
f2 = tvm.lower(s, [A, B, C], name="fadd1")
def check_llvm_ir():
if not tvm.runtime.enabled("llvm"):
return
- if tvm.codegen.llvm_version_major() < 5:
+ if tvm.target.codegen.llvm_version_major() < 5:
return
- if tvm.codegen.llvm_version_major() > 6:
+ if tvm.target.codegen.llvm_version_major() > 6:
return
# build two functions
f2 = tvm.lower(s, [A, B, C], name="fadd1")
stmt = ib.get()
fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
- f = tvm.codegen.build_module(fapi, "llvm")
+ f = tvm.target.codegen.build_module(fapi, "llvm")
a = tvm.nd.array(np.zeros(10, dtype=dtype))
f(a)
f(a)
stmt = ib.get()
fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
- f = tvm.codegen.build_module(fapi, "llvm")
+ f = tvm.target.codegen.build_module(fapi, "llvm")
a = tvm.nd.array(np.zeros(10, dtype=dtype))
f(a)
for target in ["llvm", "stackvm"]:
if not tvm.runtime.enabled(target):
continue
- f = tvm.codegen.build_module(fapi, target)
+ f = tvm.target.codegen.build_module(fapi, target)
s = f.get_source()
check(f)
def test_fp16_to_fp32():
- if tvm.codegen.llvm_version_major() < 6:
+ if tvm.target.codegen.llvm_version_major() < 6:
print("Skipping due to LLVM version being {} < 6".format(
- tvm.codegen.llvm_version_major()))
+ tvm.target.codegen.llvm_version_major()))
return
def fp16_to_fp32(target, width, match=None, not_match=None):
# In this case, we have built the test functions used below right into TVM.
# CDLL("libmybfloat16.so", RTLD_GLOBAL)
- tvm.datatype.register("bfloat", 129)
+ tvm.target.datatype.register("bfloat", 129)
- tvm.datatype.register_op(
- tvm.datatype.create_lower_func("FloatToBFloat16_wrapper"), "Cast",
+ tvm.target.datatype.register_op(
+ tvm.target.datatype.create_lower_func("FloatToBFloat16_wrapper"), "Cast",
"llvm", "bfloat", "float")
- tvm.datatype.register_op(
- tvm.datatype.create_lower_func("BFloat16ToFloat_wrapper"), "Cast",
+ tvm.target.datatype.register_op(
+ tvm.target.datatype.create_lower_func("BFloat16ToFloat_wrapper"), "Cast",
"llvm", "float", "bfloat")
- tvm.datatype.register_op(
- tvm.datatype.create_lower_func("BFloat16Add_wrapper"), "Add", "llvm",
+ tvm.target.datatype.register_op(
+ tvm.target.datatype.create_lower_func("BFloat16Add_wrapper"), "Add", "llvm",
"bfloat")
- tvm.datatype.register_op(
- tvm.datatype.create_lower_func("FloatToBFloat16_wrapper"), "FloatImm",
+ tvm.target.datatype.register_op(
+ tvm.target.datatype.create_lower_func("FloatToBFloat16_wrapper"), "FloatImm",
"llvm", "bfloat")
def lower_datatypes_and_build(schedule, args):
with tvm.target.create("metal"):
assert mygeneric(1) == 3
- assert tvm.target.current_target() is None
+ assert tvm.target.Target.current() is None
def test_target_string_parse():
stmt = ib.get()
fapi = tvm.ir_pass.MakeAPI(stmt, "arange", [Ab], 0, True)
fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
- f = tvm.codegen.build_module(fapi, "stackvm")
+ f = tvm.target.codegen.build_module(fapi, "stackvm")
a = tvm.nd.array(np.zeros(10, dtype=dtype))
aview = MyTensorView(a)
f(aview)
i + 1))
fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
- m = tvm.codegen.build_module(fapi, "llvm")
+ m = tvm.target.codegen.build_module(fapi, "llvm")
for name in names:
m.save(name)
idxd = tvm.indexdiv
if groups == 1:
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
dispatch_ctx = autotvm.DispatchContext.current
cfg = dispatch_ctx.query(target, workload)
else:
raise RuntimeError("Unsupported template_key '%s'" % cfg.template_key)
else:
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
dispatch_ctx = autotvm.DispatchContext.current
cfg = dispatch_ctx.query(target, workload)
if cfg.is_fallback: # if is fallback, clear query cache and return None
- autotvm.task.clear_fallback_cache(tvm.target.current_target(), workload)
+ autotvm.task.clear_fallback_cache(tvm.target.Target.current(), workload)
if layout == 'NHWC' and kernel_layout == 'HWOI':
new_attrs['data_layout'] = 'NCHW'
new_attrs['kernel_layout'] = 'OIHW'
# this part to make tuning records correct
s[kernel_vec].pragma(s[kernel_vec].op.axis[0], 'debug_skip_region')
else:
- max_threads = tvm.target.current_target(allow_none=False).max_num_threads
+ max_threads = tvm.target.Target.current(allow_none=False).max_num_threads
co, ci, kh, kw, vc = s[kernel_vec].op.axis
fused = s[kernel_vec].fuse(co, ci, kh, kw, vc)
fused, vec = s[kernel_vec].split(fused, VC)
"""Fuse all the axis and bind to GPU threads"""
axis = axis or s[tensor].op.axis
fused = s[tensor].fuse(*axis)
- max_threads = tvm.target.current_target(allow_none=False).max_num_threads
+ max_threads = tvm.target.Target.current(allow_none=False).max_num_threads
bx, tx = s[tensor].split(fused, num_thread or max_threads)
s[tensor].bind(bx, tvm.thread_axis("blockIdx.x"))
s[tensor].bind(tx, tvm.thread_axis("threadIdx.x"))
output : tvm.Tensor
3-D with shape [batch, M, N]
"""
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target.target_name == "cuda" and "cublas" in target.libs:
return cublas.batch_matmul(x, y, False, True)
return batch_matmul_default(x, y)
s: Schedule
The computation schedule for the op.
"""
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target.target_name == "cuda" and "cublas" in target.libs:
return generic.schedule_extern(outs)
cfg.define_split("tile_rc", cfg.axis(rc), num_outputs=3)
cfg.define_knob("auto_unroll_max_step", [64, 512, 1500])
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target.target_name in ['nvptx', 'rocm']:
cfg.define_knob("unroll_explicit", [1])
else:
cfg.define_split("tile_rc", cfg.axis(rc), num_outputs=3)
cfg.define_knob("auto_unroll_max_step", [64, 512, 1500])
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target.target_name in ['nvptx', 'rocm']:
cfg.define_knob("unroll_explicit", [1])
else:
cfg.define_split("tile_rc", cfg.axis(rc), num_outputs=3)
cfg.define_knob("auto_unroll_max_step", [64, 512, 1500])
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target.target_name in ['nvptx', 'rocm']:
cfg.define_knob("unroll_explicit", [1])
else:
output : tvm.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if "cudnn" in target.libs:
if layout == 'NCHW':
s: Schedule
The computation schedule for conv2d.
"""
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if 'cudnn' in target.libs:
return generic.schedule_extern(outs)
s: Schedule
The computation schedule for conv2d.
"""
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if 'cudnn' in target.libs:
return generic.schedule_extern(outs)
cfg.define_split("tile_rx", rx, num_outputs=2)
cfg.define_knob("auto_unroll_max_step", [0, 512, 1500])
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target.target_name in ['nvptx', 'rocm']:
cfg.define_knob("unroll_explicit", [1])
else:
cfg.define_split("tile_rc", cfg.axis(rc), num_outputs=3)
cfg.define_knob("auto_unroll_max_step", [64, 512, 1500])
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target.target_name in ['nvptx', 'rocm']:
cfg.define_knob("unroll_explicit", [1])
else:
cfg.define_split("tile_x", x, num_outputs=4)
cfg.define_split("tile_rc", rc, num_outputs=2)
cfg.define_knob("auto_unroll_max_step", [0, 128, 1500])
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target.target_name in ['nvptx', 'rocm']:
cfg.define_knob("unroll_explicit", [1])
else:
Unlike other TOPI functions, this function operates on both graph level and operator level,
so we have to pass 'F' to make it support our two versions of graph IR, Relay.
"""
- if 'cudnn' in tvm.target.current_target().libs or 'miopen' in tvm.target.current_target().libs:
+ if 'cudnn' in tvm.target.Target.current().libs or 'miopen' in tvm.target.Target.current().libs:
return None
copy_inputs = list(inputs)
CO, _, KH, KW = get_const_tuple(kernel.shape)
dispatch_ctx = autotvm.DispatchContext.current
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if groups == 1:
# query config of this workload
output : tvm.Tensor
5-D with shape [batch, out_channel, out_depth, out_height, out_width]
"""
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if "cudnn" in target.libs:
if layout == 'NCDHW':
s: Schedule
The computation schedule for conv2d.
"""
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if 'cudnn' in target.libs:
return generic.schedule_extern(outs)
s: Schedule
The computation schedule for conv2d.
"""
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if 'cudnn' in target.libs:
return generic.schedule_extern(outs)
cfg.define_split("tile_rx", rx, num_outputs=2)
cfg.define_knob("auto_unroll_max_step", [0, 512, 1500])
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target.target_name in ['nvptx', 'rocm']:
cfg.define_knob("unroll_explicit", [1])
else:
cfg.define_split("tile_rx", rx, num_outputs=2)
cfg.define_knob("auto_unroll_max_step", [0, 512, 1500])
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target.target_name in ['nvptx', 'rocm']:
cfg.define_knob("unroll_explicit", [1])
else:
out_dtype = data.dtype
batch, in_dim = data.shape
out_dim, _ = weight.shape
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if "cublas" in target.libs:
matmul = cublas.matmul(data, weight, False, True, out_dtype)
if bias is not None:
The computation schedule for dense.
"""
# pylint: disable=unused-argument
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
if target.target_name == "cuda" and "cublas" in target.libs:
batch, in_dim = get_const_tuple(data.shape)
out_dim, _ = get_const_tuple(weight.shape)
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if "cublas" in target.libs:
matmul = cublas.matmul(data, weight, False, True, out_dtype)
if bias is not None:
def schedule_dense_int8(cfg, outs):
"""Dense schedule for int8 on CUDA"""
s = tvm.create_schedule([x.op for x in outs])
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
if "cublas" in target.libs:
cfg.define_split("tile_x", x, num_outputs=4)
cfg.define_knob("auto_unroll_max_step", [0, 256, 1500])
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target.target_name in ['nvptx', 'rocm']:
cfg.define_knob("unroll_explicit", [1])
else:
# num_thread here could be 728, it is larger than cuda.max_num_threads
num_thread = tvm.ir_pass.Simplify(temp.shape[3]).value
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target and (target.target_name not in ["cuda", "nvptx"]):
num_thread = target.max_num_threads
xoc, xic = s[Output].split(c, factor=num_thread)
cfg.define_split("tile_rx", rx, num_outputs=2)
cfg.define_knob("auto_unroll_max_step", [0, 512, 1500])
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target.target_name in ['nvptx', 'rocm']:
cfg.define_knob("unroll_explicit", [1])
else:
The updated schedule.
"""
fused = sch[out].fuse(*sch[out].op.axis)
- num_thread = tvm.target.current_target(allow_none=False).max_num_threads
+ num_thread = tvm.target.Target.current(allow_none=False).max_num_threads
max_block = 256
try:
id_index = tvm.make.node("IntImm", dtype="int32", value=id_index)
score_index = tvm.make.node("IntImm", dtype="int32", value=score_index)
- max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads)
+ max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
nthread_tx = max_threads
nthread_bx = batch_size * num_anchors // max_threads + 1
tx = tvm.thread_axis("threadIdx.x")
idx_in = ib.buffer_ptr(idx_in)
idx = ib.buffer_ptr(idx)
partial = ib.buffer_ptr(partial)
- max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads)
+ max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
elem_per_thread = num_anchors // max_threads + 1
nthread_tx = max_threads
nthread_bx = batch_size
ib = tvm.ir_builder.create()
partial_in = ib.buffer_ptr(partial_in)
partial = ib.buffer_ptr(partial)
- max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads)
+ max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
elem_per_thread = num_anchors // max_threads + 1
nthread_tx = max_threads
nthread_bx = batch_size
idx_in = ib.buffer_ptr(idx_in)
idx = ib.buffer_ptr(idx)
partial = ib.buffer_ptr(partial)
- max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads)
+ max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
elem_per_thread = num_anchors // max_threads + 1
nthread_tx = max_threads
nthread_bx = batch_size * num_anchors // max_threads + 1
valid_count = ib.buffer_ptr(valid_count)
out = ib.buffer_ptr(out)
- max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads)
+ max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
nthread_tx = max_threads
nthread_bx = batch_size * num_anchors * elem_length // max_threads + 1
tx = tvm.thread_axis("threadIdx.x")
"""
batch_size = data.shape[0]
num_anchors = data.shape[1]
- max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads)
+ max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
elem_per_thread = num_anchors // max_threads + 1
new_range = num_anchors // elem_per_thread + 1
temp_flag_buf = api.decl_buffer(
num_valid_boxes = ib.allocate("int32", (1,), name="num_valid_boxes", scope="local")
max_threads = int(
- tvm.target.current_target(allow_none=False).max_num_threads)
+ tvm.target.Target.current(allow_none=False).max_num_threads)
nthread_tx = max_threads
nthread_bx = num_anchors // max_threads + 1
tx = tvm.thread_axis("threadIdx.x")
idx = ib.buffer_ptr(idx)
max_threads = int(math.sqrt(
- tvm.target.current_target(allow_none=False).max_num_threads))
+ tvm.target.Target.current(allow_none=False).max_num_threads))
nthread_tx = max_threads
nthread_bx = num_anchors // max_threads + 1
tx = tvm.thread_axis("threadIdx.x")
out = ib.buffer_ptr(out)
max_threads = int(math.sqrt(
- tvm.target.current_target(allow_none=False).max_num_threads))
+ tvm.target.Target.current(allow_none=False).max_num_threads))
nthread_tx = max_threads
nthread_bx = num_anchors // max_threads + 1
tx = tvm.thread_axis("threadIdx.x")
sch: Schedule
The computation schedule for the op.
"""
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
cpp_target = cpp.TEST_create_target(target.target_name)
return cpp.cuda.schedule_lrn(cpp_target, outs)
def _schedule(PaddedInput, Pool):
if isinstance(PaddedInput.op, tvm.tensor.ComputeOp):
s[PaddedInput].compute_inline()
- num_thread = tvm.target.current_target(allow_none=False).max_num_threads
+ num_thread = tvm.target.Target.current(allow_none=False).max_num_threads
if Pool.op in s.outputs:
Out = Pool
OL = s.cache_write(Pool, "local")
else:
out = outs[0].op.output(0)
fused = s[out].fuse(*s[out].op.axis)
- num_thread = tvm.target.current_target(allow_none=False).max_num_threads
+ num_thread = tvm.target.Target.current(allow_none=False).max_num_threads
bx, tx = s[out].split(fused, factor=num_thread)
s[out].bind(bx, tvm.thread_axis("blockIdx.x"))
s[out].bind(tx, tvm.thread_axis("threadIdx.x"))
"""
batch, num_anchors, height, width = get_const_tuple(cls_prob_buf.shape)
num_anchors //= 2
- max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads)
+ max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
nthread_tx = max_threads
nthread_bx = (batch * height * width) // max_threads + 1
tx = tvm.thread_axis("threadIdx.x")
The result IR statement.
"""
batch, num_bbox = get_const_tuple(data_buf.shape)
- max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads)
+ max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
ib = tvm.ir_builder.create()
p_data = ib.buffer_ptr(data_buf)
index_out = ib.buffer_ptr(out_index_buf)
return i / u
batch, num_bbox = get_const_tuple(out_buf.shape)
- max_threads = int(math.sqrt(tvm.target.current_target(allow_none=False).max_num_threads))
+ max_threads = int(math.sqrt(tvm.target.Target.current(allow_none=False).max_num_threads))
tx = tvm.thread_axis("threadIdx.x")
bx = tvm.thread_axis("blockIdx.x")
ib = tvm.ir_builder.create()
if len(sch[data_out].op.axis) > 0:
all_reduce = False
num_thread = 32
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target and target.target_name == "opencl":
# without it, CL_INVALID_WORK_GROUP_SIZE occurred when running test_topi_reduce.py
# don't know why
thread_y = tvm.thread_axis((0, num_thread), "threadIdx.y")
else:
all_reduce = True
- num_thread = tvm.target.current_target(allow_none=False).max_num_threads
+ num_thread = tvm.target.Target.current(allow_none=False).max_num_threads
thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x")
# Fuse and refactor the reduce axis
axis_mul_before *= value
elif i > axis:
axis_mul_after *= value
- max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads)
+ max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
ib = tvm.ir_builder.create()
data = ib.buffer_ptr(data)
values_out = ib.buffer_ptr(values_out)
axis_mul_before *= value
elif i > axis:
axis_mul_after *= value
- max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads)
+ max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
ib = tvm.ir_builder.create()
data = ib.buffer_ptr(data)
valid_count = ib.buffer_ptr(valid_count)
The result IR statement.
"""
max_threads = int(math.sqrt(
- tvm.target.current_target(allow_none=False).max_num_threads))
+ tvm.target.Target.current(allow_none=False).max_num_threads))
tx = tvm.thread_axis("threadIdx.x")
ty = tvm.thread_axis("threadIdx.y")
bx = tvm.thread_axis("blockIdx.x")
threshold = tvm.make.node("FloatImm", dtype="float32", value=threshold)
- max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads)
+ max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
nthread_tx = max_threads
nthread_bx = (batch_size * num_anchors) // max_threads + 1
tx = tvm.thread_axis("threadIdx.x")
score = ib.buffer_ptr(temp_score)
out_loc = ib.buffer_ptr(out)
- max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads)
+ max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
nthread_tx = max_threads
nthread_bx = (batch_size * num_anchors) // max_threads + 1
tx = tvm.thread_axis("threadIdx.x")
s: Schedule
The computation schedule for reorg.
"""
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
cpp_target = cpp.TEST_create_target(target.target_name)
return cpp.cuda.schedule_injective(cpp_target, outs)
sch: Schedule
The computation schedule for the op.
"""
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
return cpp.generic.schedule_extern(target, outs)
sch: Schedule
The computation schedule for the op.
"""
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
if target.target_name != "llvm":
raise RuntimeError("schedule_injective not registered for '%s'" % target)
outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
def _default_schedule(outs, auto_inline):
"""Default schedule for llvm."""
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
if target.target_name not in ("llvm", "c"):
raise RuntimeError("schedule not registered for '%s'" % target)
sch: Schedule
The computation schedule for the op.
"""
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
cpp_target = cpp.TEST_create_target(target.target_name)
return cpp.generic.default_schedule(cpp_target, outs, False)
@tvm.target.generic_func
def schedule_batch_matmul(outs):
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
cpp_target = cpp.TEST_create_target(target.target_name)
return cpp.generic.default_schedule(cpp_target, outs, False)
def _default_schedule(outs, auto_inline):
"""Default schedule for llvm."""
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
if target.target_name != "llvm":
raise RuntimeError("schedule not registered for '%s'" % target)
s: Schedule
The computation schedule for the op.
"""
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
cpp_target = cpp.TEST_create_target(target.target_name)
return cpp.generic.default_schedule(cpp_target, outs, False)
return None
dispatch_ctx = autotvm.task.DispatchContext.current
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
# query schedule and fallback if necessary
workload = autotvm.task.args_to_workload(
cfg.define_split("tile_x", x, num_outputs=4)
cfg.define_knob("auto_unroll_max_step", [0, 256, 1500])
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target.target_name in ['nvptx', 'rocm']:
cfg.define_knob("unroll_explicit", [1])
else:
# num_thread here could be 728, it is larger than cuda.max_num_threads
num_thread = tvm.ir_pass.Simplify(temp.shape[3]).value
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target and (target.target_name not in ["cuda", "nvptx"]):
num_thread = target.max_num_threads
xoc, xic = s[Output].split(c, factor=num_thread)
# this part to make tuning records correct
s[kernel_vec].pragma(s[kernel_vec].op.axis[0], 'debug_skip_region')
else:
- max_threads = tvm.target.current_target(allow_none=False).max_num_threads
+ max_threads = tvm.target.Target.current(allow_none=False).max_num_threads
co, ci, kh, kw, vc = s[kernel_vec].op.axis
fused = s[kernel_vec].fuse(co, ci, kh, kw, vc)
fused, vec = s[kernel_vec].split(fused, VC)
n, ic_chunk, ih, iw, ic_bn = get_const_tuple(data.shape)
in_channel = ic_chunk * ic_bn
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn = \
get_const_tuple(kernel.shape)
num_filter = oc_chunk * oc_bn
4-D with shape [batch, out_channel, out_height, out_width]
"""
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if "miopen" in target.libs:
assert layout == 'NCHW', "Only NCHW layout is supported."
CO, CI, KH, KW = get_const_tuple(kernel.shape)
s: Schedule
The computation schedule for conv2d.
"""
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target and "miopen" in target.libs:
return generic.schedule_extern(outs)
out_dtype = data.dtype
batch, in_dim = data.shape
out_dim, _ = weight.shape
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if "rocblas" in target.libs:
assert out_dtype == data.dtype, "Mixed precision not supported."
matmul = rocblas.matmul(data, weight, False, True)
s: Schedule
The computation schedule for dense.
"""
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if target.target_name == "rocm" and "rocblas" in target.libs:
return generic.schedule_extern(outs)
return topi.cuda.schedule_dense(cfg, outs)
@generic.schedule_lrn.register(["rocm", "gpu"])
def schedule_lrn(outs):
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
cpp_target = cpp.TEST_create_target(target.target_name)
return cpp.rocm.schedule_lrn(cpp_target, outs)
output : tvm.Tensor
3-D with shape [batch, M, N]
"""
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if "cblas" in target.libs:
return cblas.batch_matmul(x, y, False, True)
sch: Schedule
The computation schedule for the op.
"""
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if "cblas" in target.libs:
return generic.schedule_extern(outs)
kh, kw, oc, _ = kshape
elif pat.match(layout) is not None:
n, ic_chunk, h, w, ic_bn = dshape
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
oc_chunk, k_ic_chunk, kh, kw, k_ic_bn, oc_bn = kshape
assert ic_chunk == k_ic_chunk
assert ic_bn == k_ic_bn
data = data_pad.op.input_tensors[0]
args = [s, cfg, data_vec, conv_out, outs[0]]
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
_, _, kh, kw, _, _, = get_const_tuple(kernel.shape)
if kh == 1 and kw == 1:
conv2d_avx_1x1._schedule_conv_NCHWc(*args)
# Set workload. Config update.
dispatch_ctx = autotvm.task.DispatchContext.current
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if is_depthwise:
workload = autotvm.task.args_to_workload(
is_dtype_support = data_dtype == 'uint8' and kernel_dtype == 'int8'
# 2) Check LLVM support
- llvm_version = tvm.codegen.llvm_version_major()
+ llvm_version = tvm.target.codegen.llvm_version_major()
is_llvm_support = llvm_version >= 8
# 3) Check target
- mcpu = tvm.target.current_target().mcpu
+ mcpu = tvm.target.Target.current().mcpu
is_target_support = False
if mcpu in ('skylake-avx512', 'cascadelake'):
is_target_support = True
kh, kw, oc, _ = kshape
elif pat.match(layout) is not None:
n, ic_chunk, h, w, ic_bn = dshape
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
oc_chunk, k_ic, kh, kw, k_ic_f, oc_bn, k_ic_s = kshape
ic = ic_chunk * ic_bn
assert ic == k_ic * k_ic_f * k_ic_s
data = data_pad.op.input_tensors[0]
args = [s, cfg, data_vec, conv_out, outs[0]]
- target = tvm.target.current_target(allow_none=False)
+ target = tvm.target.Target.current(allow_none=False)
# int8 conv kernel is 7-dim
_, _, kh, kw, _, _, _ = get_const_tuple(kernel.shape)
if kh == 1 and kw == 1:
@autotvm.register_topi_compute(nn.dense, "cpu", "direct")
def _declaration_dense(cfg, data, weight, bias=None, out_dtype=None):
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if "cblas" in target.libs:
C = cblas.matmul(data, weight, False, True)
if bias is not None:
@autotvm.register_topi_schedule(generic.schedule_dense, "cpu", "direct")
def _schedule_dense(cfg, outs):
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if "cblas" in target.libs:
return generic.schedule_extern(outs)
@autotvm.register_topi_schedule(generic.schedule_dense, "cpu", "direct_pack")
def _schedule_dense_pack(cfg, outs):
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if "cblas" in target.libs:
return generic.schedule_extern(outs)
@autotvm.register_topi_schedule(generic.schedule_dense, "cpu", "direct_nopack")
def _schedule_dense_nopack(cfg, outs):
- target = tvm.target.current_target()
+ target = tvm.target.Target.current()
if "cblas" in target.libs:
return generic.schedule_extern(outs)
"""Core kernel of dot product of 4 Int8 operations"""
#pylint: disable=invalid-name
import tvm
+import tvm.target.codegen
def dot_16x1x16_uint8_int8_int32():
"""Dispatch the most optimized intrin depending on the target"""
- mcpu = tvm.target.current_target().mcpu
+ mcpu = tvm.target.Target.current().mcpu
assert mcpu in ("skylake-avx512", "cascadelake"), \
"An old Intel machine that does not have fast Int8 support."
vec_b = ins[1].vload([0, 0], "int8x64")
vnni_inst_name = 'llvm.x86.avx512.vpdpbusd.512'
- llvm_id = tvm.codegen.llvm_lookup_intrinsic_id(vnni_inst_name)
+ llvm_id = tvm.target.codegen.llvm_lookup_intrinsic_id(vnni_inst_name)
if llvm_id != 0: # VNNI is available for current LLVM version
vec_bi32 = tvm.call_pure_intrin('int32x16', 'reinterpret', vec_b)
import tvm
def get_fp32_len():
- mcpu = tvm.target.current_target().mcpu
+ mcpu = tvm.target.Target.current().mcpu
fp32_vec_len = 8
if mcpu in ('skylake-avx512', 'cascadelake'):
fp32_vec_len = 16
groups,
out_dtype)]
# If it's not packed, run on ARM CPU
- with tvm.target.arm_cpu(tvm.target.current_target().model):
+ with tvm.target.arm_cpu(tvm.target.Target.current().model):
return _nn.compute_conv2d(attrs, inputs, output_type, target)
# If VTA is not the target, default to _nn def
return topi.generic.schedule_conv2d_nchw(outs)
return topi.generic.schedule_group_conv2d_nchw(outs)
# If it's not packed, run on ARM CPU
- with tvm.target.arm_cpu(tvm.target.current_target().model):
- return _nn.schedule_conv2d(attrs, outs, tvm.target.current_target())
+ with tvm.target.arm_cpu(tvm.target.Target.current().model):
+ return _nn.schedule_conv2d(attrs, outs, tvm.target.Target.current())
# If VTA is not the target, default to _nn def
return _nn.schedule_conv2d(attrs, outs, target)
return [topi.nn.conv2d_transpose_nchw(
inputs[0], inputs[1], strides, padding, out_dtype)]
# If it's not packed, run on ARM CPU
- with tvm.target.arm_cpu(tvm.target.current_target().model):
+ with tvm.target.arm_cpu(tvm.target.Target.current().model):
return _nn.compute_conv2d_transpose(attrs, inputs, output_type, target)
# If VTA is not the target, default to _nn def
if is_packed_layout(layout):
return topi.nn.schedule_conv2d_transpose_nchw(outputs)
# If it's not packed, run on ARM CPU
- with tvm.target.arm_cpu(tvm.target.current_target().model):
- return _nn.schedule_conv2d_transpose(attrs, outputs, tvm.target.current_target())
+ with tvm.target.arm_cpu(tvm.target.Target.current().model):
+ return _nn.schedule_conv2d_transpose(attrs, outputs, tvm.target.Target.current())
# If VTA is not the target, default to _nn def
- return _nn.schedule_conv2d_transpose(attrs, outputs, tvm.target.current_target())
+ return _nn.schedule_conv2d_transpose(attrs, outputs, tvm.target.Target.current())
@reg.register_compute("nn.dense", level=15)
target = tvm.target.create(target)
return [topi.nn.dense(inputs[0], inputs[1], None, out_dtype)]
# If it's not packed, run on ARM CPU
- with tvm.target.arm_cpu(tvm.target.current_target().model):
+ with tvm.target.arm_cpu(tvm.target.Target.current().model):
return _nn.compute_dense(attrs, inputs, out_type, target)
# If VTA is not the target, default to _nn def
assert target.device_name == "vta"
return topi.generic.schedule_dense(outs)
# If it's not packed, run on ARM CPU
- with tvm.target.arm_cpu(tvm.target.current_target().model):
- return _nn.schedule_dense(attrs, outs, tvm.target.current_target())
+ with tvm.target.arm_cpu(tvm.target.Target.current().model):
+ return _nn.schedule_dense(attrs, outs, tvm.target.Target.current())
# If VTA is not the target, default to _nn def
return _nn.schedule_dense(attrs, outs, target)
res = my_clip(res, 0, (1 << env.OUT_WIDTH - 1) - 1)
res = topi.cast(res, env.out_dtype)
- if tvm.target.current_target().device_name == 'vta':
+ if tvm.target.Target.current().device_name == 'vta':
s = topi.generic.schedule_conv2d_nchw([res])
else:
s = tvm.create_schedule([res.op])
res = my_clip(res, 0, (1 << env.OUT_WIDTH - 1) - 1)
res = topi.cast(res, env.out_dtype)
- if tvm.target.current_target().device_name == 'vta':
+ if tvm.target.Target.current().device_name == 'vta':
s = topi.generic.schedule_conv2d_transpose_nchw([res])
else:
s = tvm.create_schedule([res.op])
res = my_clip(res, 0, 127)
res = topi.cast(res, "int8")
- if tvm.target.current_target().device_name == 'vta':
+ if tvm.target.Target.current().device_name == 'vta':
s = topi.generic.schedule_dense([res])
else:
s = tvm.create_schedule([res.op])
res = my_clip(res, 0, (1 << env.OUT_WIDTH - 1) - 1)
res = topi.cast(res, env.out_dtype)
- if tvm.target.current_target().device_name == 'vta':
+ if tvm.target.Target.current().device_name == 'vta':
s = topi.generic.schedule_group_conv2d_nchw([res])
else:
s = tvm.create_schedule([res.op])
res = my_clip(res, 0, 127)
res = topi.cast(res, "int8")
- if tvm.target.current_target().device_name == 'vta':
+ if tvm.target.Target.current().device_name == 'vta':
s = topi.generic.schedule_conv2d_nchw([res])
else:
s = tvm.create_schedule([res.op])
res = my_clip(res, 0, 127)
res = topi.cast(res, "int8")
- if tvm.target.current_target().device_name == 'vta':
+ if tvm.target.Target.current().device_name == 'vta':
s = topi.generic.schedule_dense([res])
else:
s = tvm.create_schedule([res.op])
res = my_clip(res, 0, 127)
res = topi.cast(res, "int8")
- if tvm.target.current_target().device_name == 'vta':
+ if tvm.target.Target.current().device_name == 'vta':
s = topi.generic.schedule_conv2d_nchw([res])
else:
s = tvm.create_schedule([res.op])