major = codegen.llvm_version_major()
lld_list += ["ld.lld-%d.0" % major]
lld_list += ["ld.lld-%d" % major]
- lld_list += ["lld"]
+ lld_list += ["ld.lld"]
valid_list = [util.which(x) for x in lld_list]
valid_list = [x for x in valid_list if x]
if not valid_list and required:
# Global declarations of environment.
tgt_host="llvm"
-# Change it to respective GPU if gpu is enabled Ex: cuda, opencl
+# Change it to respective GPU if gpu is enabled Ex: cuda, opencl, rocm
tgt="cuda"
######################################################################
# compute grid. These are GPU specific constructs that allow us
# to generate code that runs on GPU.
#
-if tgt == "cuda" or tgt.startswith('opencl'):
+if tgt == "cuda" or tgt == "rocm" or tgt.startswith('opencl'):
s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
#
# The following code fetches the device module and prints the content code.
#
-if tgt == "cuda" or tgt.startswith('opencl'):
+if tgt == "cuda" or tgt == "rocm" or tgt.startswith('opencl'):
dev_module = fadd.imported_modules[0]
print("-----GPU code-----")
print(dev_module.get_source())
fadd.save(temp.relpath("myadd.o"))
if tgt == "cuda":
fadd.imported_modules[0].save(temp.relpath("myadd.ptx"))
+if tgt == "rocm":
+ fadd.imported_modules[0].save(temp.relpath("myadd.hsaco"))
if tgt.startswith('opencl'):
fadd.imported_modules[0].save(temp.relpath("myadd.cl"))
cc.create_shared(temp.relpath("myadd.so"), [temp.relpath("myadd.o")])
fadd1_dev = tvm.module.load(temp.relpath("myadd.ptx"))
fadd1.import_module(fadd1_dev)
+if tgt == "rocm":
+ fadd1_dev = tvm.module.load(temp.relpath("myadd.hsaco"))
+ fadd1.import_module(fadd1_dev)
+
if tgt.startswith('opencl'):
fadd1_dev = tvm.module.load(temp.relpath("myadd.cl"))
fadd1.import_module(fadd1_dev)