From 9816efc2df63cf6a14a6de46dc2adfafde58acc1 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Wed, 26 Feb 2020 21:10:47 -0800 Subject: [PATCH] [REFACTOR][PY][API-CHANGE] Remove legacy python files. (#4943) * [REFACTOR][PY][API-CHANGE] Remove legacy python files. Remove legacy python files. Use the te namespace for most of the tensor expression primitives. - tvm.create_schedule -> tvm.te.create_schedule - tvm.placeholder -> tvm.te.placeholder - tvm.compute -> tvm.te.compute * Remove top-level exposures. --- apps/android_rpc/tests/android_rpc_test.py | 21 +- apps/benchmark/arm_cpu_imagenet_bench.py | 1 + apps/benchmark/gpu_imagenet_bench.py | 1 + apps/benchmark/mobile_gpu_imagenet_bench.py | 1 + apps/bundle_deploy/build_model.py | 1 + apps/dso_plugin_module/test_plugin_module.py | 1 + apps/extension/python/tvm_ext/__init__.py | 1 + apps/extension/tests/test_ext.py | 23 +- apps/howto_deploy/prepare_test_libs.py | 9 +- apps/howto_deploy/python_deploy.py | 1 + apps/ios_rpc/tests/ios_rpc_test.py | 15 +- apps/sgx/enclave/src/build_model.py | 1 + apps/sgx/run_model.py | 1 + docs/api/python/te.rst | 1 + docs/api/python/tir.rst | 2 +- docs/conf.py | 1 + golang/sample/deploy.py | 11 +- jvm/core/src/test/scripts/test_add_cpu.py | 11 +- jvm/core/src/test/scripts/test_add_gpu.py | 15 +- jvm/core/src/test/scripts/test_graph_runtime.py | 7 +- python/tvm/__init__.py | 13 +- python/tvm/api.py | 38 -- python/tvm/arith/analyzer.py | 2 +- python/tvm/autotvm/feature.py | 25 +- python/tvm/autotvm/graph_tuner/base_graph_tuner.py | 5 +- python/tvm/autotvm/measure/measure_methods.py | 12 +- python/tvm/autotvm/task/code_hash.py | 4 +- python/tvm/autotvm/task/space.py | 34 +- python/tvm/autotvm/task/task.py | 29 +- python/tvm/autotvm/task/topi_integration.py | 6 +- python/tvm/autotvm/util.py | 2 +- python/tvm/contrib/binutil.py | 11 +- python/tvm/contrib/cblas.py | 6 +- python/tvm/contrib/cublas.py | 7 +- python/tvm/contrib/cublaslt.py | 4 +- python/tvm/contrib/cudnn.py | 13 +- python/tvm/contrib/debugger/debug_result.py | 1 + python/tvm/contrib/miopen.py | 9 +- python/tvm/contrib/mps.py | 7 +- python/tvm/contrib/nnpack.py | 14 +- python/tvm/contrib/nvcc.py | 5 +- python/tvm/contrib/peak.py | 27 +- python/tvm/contrib/random.py | 8 +- python/tvm/contrib/rocblas.py | 5 +- python/tvm/contrib/rocm.py | 10 +- python/tvm/contrib/sdaccel.py | 5 +- python/tvm/contrib/sparse.py | 13 +- python/tvm/contrib/tedd.py | 12 +- python/tvm/driver/build_module.py | 22 +- python/tvm/hybrid/calls.py | 25 +- python/tvm/hybrid/parser.py | 36 +- python/tvm/hybrid/util.py | 7 +- python/tvm/intrin.py | 19 - python/tvm/make.py | 52 -- python/tvm/relay/__init__.py | 2 +- python/tvm/relay/backend/_backend.py | 4 +- python/tvm/relay/backend/compile_engine.py | 17 +- python/tvm/relay/backend/graph_runtime_codegen.py | 2 +- python/tvm/relay/build_module.py | 2 +- python/tvm/relay/debug.py | 8 +- python/tvm/relay/frontend/coreml.py | 1 - python/tvm/relay/frontend/darknet.py | 1 - python/tvm/relay/frontend/mxnet.py | 2 - python/tvm/relay/frontend/pytorch.py | 2 +- python/tvm/relay/frontend/tensorflow.py | 5 +- python/tvm/relay/frontend/tflite.py | 1 - python/tvm/relay/op/_reduce.py | 2 +- python/tvm/relay/op/_tensor.py | 4 +- python/tvm/relay/op/_transform.py | 7 +- python/tvm/relay/op/algorithm.py | 2 +- python/tvm/relay/op/nn/_nn.py | 3 +- python/tvm/relay/op/op.py | 15 +- python/tvm/relay/param_dict.py | 6 +- python/tvm/relay/quantize/quantize.py | 6 +- python/tvm/relay/testing/__init__.py | 1 + python/tvm/relay/testing/config.py | 1 + python/tvm/relay/transform.py | 1 + python/tvm/runtime/vm.py | 1 + python/tvm/target/build_config.py | 2 +- python/tvm/target/generic_func.py | 2 + python/tvm/te/__init__.py | 3 + python/tvm/te/operation.py | 22 +- python/tvm/te/tag.py | 20 +- python/tvm/tir/__init__.py | 6 +- python/tvm/tir/buffer.py | 14 +- python/tvm/tir/expr.py | 10 +- python/tvm/tir/generic.py | 2 +- python/tvm/tir/ir_builder.py | 18 +- python/tvm/tir/op.py | 34 +- python/tvm/tir/stmt.py | 4 +- rust/frontend/examples/resnet/src/build_resnet.py | 1 + rust/frontend/tests/basics/src/tvm_add.py | 15 +- rust/runtime/tests/build_model.py | 1 + rust/runtime/tests/test_nn/src/build_test_graph.py | 1 + .../tests/test_tvm_basic/src/build_test_lib.py | 11 +- .../tests/test_tvm_dso/src/build_test_lib.py | 11 +- tests/python/contrib/test_binutil.py | 1 + tests/python/contrib/test_cblas.py | 23 +- tests/python/contrib/test_cublas.py | 19 +- tests/python/contrib/test_cudnn.py | 13 +- tests/python/contrib/test_dlpack.py | 13 +- tests/python/contrib/test_edgetpu_runtime.py | 1 + tests/python/contrib/test_gemm_acc16.py | 17 +- tests/python/contrib/test_gemm_acc32_vnni.py | 13 +- tests/python/contrib/test_miopen.py | 5 +- tests/python/contrib/test_mps.py | 23 +- tests/python/contrib/test_mxnet_bridge.py | 9 +- tests/python/contrib/test_nnpack.py | 27 +- tests/python/contrib/test_random.py | 7 +- tests/python/contrib/test_rocblas.py | 7 +- tests/python/contrib/test_rpc_proxy.py | 1 + tests/python/contrib/test_rpc_tracker.py | 1 + tests/python/contrib/test_sort.py | 21 +- tests/python/contrib/test_sparse.py | 33 +- tests/python/contrib/test_tedd.py | 48 +- tests/python/contrib/test_tflite_runtime.py | 9 +- tests/python/frontend/caffe2/test_forward.py | 1 + tests/python/frontend/coreml/test_forward.py | 1 + tests/python/frontend/darknet/test_forward.py | 1 + tests/python/frontend/keras/test_forward.py | 1 + tests/python/frontend/mxnet/test_forward.py | 1 + tests/python/frontend/mxnet/test_graph.py | 1 + tests/python/frontend/mxnet/test_qnn_ops_utils.py | 1 + tests/python/frontend/onnx/test_forward.py | 1 + tests/python/frontend/pytorch/test_forward.py | 3 +- tests/python/frontend/tensorflow/test_forward.py | 3 +- tests/python/frontend/tflite/test_forward.py | 1 + tests/python/integration/test_dot.py | 31 +- tests/python/integration/test_ewise.py | 91 ++-- tests/python/integration/test_ewise_fpga.py | 27 +- tests/python/integration/test_gemm.py | 23 +- tests/python/integration/test_reduce.py | 123 ++--- tests/python/integration/test_scan.py | 23 +- tests/python/integration/test_tuning.py | 41 +- tests/python/integration/test_winograd_nnpack.py | 7 +- .../quantization/test_quantization_accuracy.py | 1 + tests/python/relay/benchmarking/benchmark_vm.py | 3 +- tests/python/relay/test_adt.py | 1 + tests/python/relay/test_any.py | 1 + tests/python/relay/test_backend_compile_engine.py | 7 +- tests/python/relay/test_backend_graph_runtime.py | 1 + tests/python/relay/test_backend_interpreter.py | 1 + tests/python/relay/test_change_batch.py | 1 + tests/python/relay/test_cpp_build_module.py | 1 + tests/python/relay/test_error_reporting.py | 1 + tests/python/relay/test_expr_functor.py | 1 + tests/python/relay/test_external_codegen.py | 1 + tests/python/relay/test_external_runtime.py | 1 + tests/python/relay/test_feature.py | 1 + tests/python/relay/test_ir_bind.py | 1 + tests/python/relay/test_ir_module.py | 1 + tests/python/relay/test_ir_nodes.py | 41 +- tests/python/relay/test_ir_parser.py | 1 + tests/python/relay/test_ir_text_printer.py | 3 +- tests/python/relay/test_ir_well_formed.py | 1 + tests/python/relay/test_json_compact.py | 1 + tests/python/relay/test_memory_alloc.py | 1 + tests/python/relay/test_op_grad_level1.py | 1 + tests/python/relay/test_op_grad_level2.py | 5 +- tests/python/relay/test_op_grad_level3.py | 1 + tests/python/relay/test_op_level1.py | 37 +- tests/python/relay/test_op_level10.py | 5 +- tests/python/relay/test_op_level2.py | 73 +-- tests/python/relay/test_op_level3.py | 31 +- tests/python/relay/test_op_level4.py | 7 +- tests/python/relay/test_op_level5.py | 23 +- tests/python/relay/test_op_level6.py | 1 + tests/python/relay/test_op_qnn_add.py | 1 + tests/python/relay/test_op_qnn_concatenate.py | 1 + tests/python/relay/test_op_qnn_conv2d.py | 1 + tests/python/relay/test_op_qnn_dense.py | 1 + tests/python/relay/test_op_qnn_dequantize.py | 1 + tests/python/relay/test_op_qnn_mul.py | 1 + tests/python/relay/test_op_qnn_quantize.py | 1 + tests/python/relay/test_op_qnn_requantize.py | 1 + tests/python/relay/test_param_dict.py | 1 + tests/python/relay/test_pass_alpha_equal.py | 93 ++-- tests/python/relay/test_pass_alter_op_layout.py | 1 + tests/python/relay/test_pass_annotation.py | 1 + tests/python/relay/test_pass_auto_quantize.py | 1 + tests/python/relay/test_pass_canonicalize_cast.py | 1 + tests/python/relay/test_pass_check_kind.py | 57 +-- .../relay/test_pass_combine_parallel_conv2d.py | 1 + .../relay/test_pass_combine_parallel_dense.py | 1 + tests/python/relay/test_pass_convert_op_layout.py | 1 + .../relay/test_pass_dead_code_elimination.py | 3 +- .../relay/test_pass_eliminate_common_subexpr.py | 1 + tests/python/relay/test_pass_eta_expand.py | 1 + tests/python/relay/test_pass_fold_constant.py | 3 +- tests/python/relay/test_pass_fold_scale_axis.py | 1 + tests/python/relay/test_pass_fuse_ops.py | 1 + tests/python/relay/test_pass_gradient.py | 1 + tests/python/relay/test_pass_lambda_lift.py | 1 + tests/python/relay/test_pass_legalize.py | 1 + tests/python/relay/test_pass_mac_count.py | 11 +- tests/python/relay/test_pass_manager.py | 1 + tests/python/relay/test_pass_partial_eval.py | 1 + tests/python/relay/test_pass_partition_graph.py | 1 + tests/python/relay/test_pass_qnn_legalize.py | 1 + .../relay/test_pass_remove_unused_functions.py | 1 + tests/python/relay/test_pass_to_a_normal_form.py | 1 + tests/python/relay/test_pass_to_cps.py | 1 + .../python/relay/test_pass_to_graph_normal_form.py | 1 + tests/python/relay/test_pass_unmatched_cases.py | 1 + tests/python/relay/test_pass_vars.py | 1 + tests/python/relay/test_py_converter.py | 1 + tests/python/relay/test_type_functor.py | 3 +- tests/python/relay/test_type_infer.py | 1 + tests/python/relay/test_type_solver.py | 1 + tests/python/relay/test_typecall.py | 1 + tests/python/relay/test_vm.py | 1 + tests/python/relay/test_vm_serialization.py | 1 + .../unittest/test_arith_canonical_simplify.py | 133 ++--- .../python/unittest/test_arith_const_int_bound.py | 53 +- tests/python/unittest/test_arith_deduce_bound.py | 63 +-- .../unittest/test_arith_detect_clip_bound.py | 19 +- .../unittest/test_arith_detect_linear_equation.py | 21 +- tests/python/unittest/test_arith_domain_touched.py | 13 +- tests/python/unittest/test_arith_intset.py | 29 +- tests/python/unittest/test_arith_modular_set.py | 37 +- .../python/unittest/test_arith_rewrite_simplify.py | 557 +++++++++++---------- tests/python/unittest/test_arith_stmt_simplify.py | 57 +-- tests/python/unittest/test_autotvm_common.py | 21 +- tests/python/unittest/test_autotvm_feature.py | 41 +- .../unittest/test_autotvm_flop_calculator.py | 79 +-- tests/python/unittest/test_autotvm_measure.py | 1 + tests/python/unittest/test_autotvm_record.py | 1 + tests/python/unittest/test_autotvm_space.py | 11 +- .../python/unittest/test_autotvm_xgboost_model.py | 1 + tests/python/unittest/test_build_lower.py | 27 +- tests/python/unittest/test_codegen_arm.py | 33 +- tests/python/unittest/test_codegen_blob.py | 13 +- tests/python/unittest/test_codegen_bool.py | 17 +- tests/python/unittest/test_codegen_c_host.py | 45 +- tests/python/unittest/test_codegen_cross_llvm.py | 11 +- tests/python/unittest/test_codegen_cuda.py | 105 ++-- tests/python/unittest/test_codegen_device.py | 55 +- tests/python/unittest/test_codegen_extern.py | 47 +- tests/python/unittest/test_codegen_llvm.py | 213 ++++---- tests/python/unittest/test_codegen_opencl.py | 41 +- tests/python/unittest/test_codegen_rocm.py | 47 +- tests/python/unittest/test_codegen_static_init.py | 31 +- tests/python/unittest/test_codegen_vm_basic.py | 49 +- tests/python/unittest/test_codegen_vulkan.py | 39 +- tests/python/unittest/test_codegen_x86.py | 9 +- tests/python/unittest/test_container.py | 1 + .../unittest/test_custom_datatypes_mybfloat16.py | 17 +- tests/python/unittest/test_graph_tuner_core.py | 15 +- tests/python/unittest/test_graph_tuner_utils.py | 1 + tests/python/unittest/test_hybrid_script.py | 136 ++--- tests/python/unittest/test_ir_builder.py | 49 +- tests/python/unittest/test_lang_basic.py | 117 ++--- tests/python/unittest/test_lang_buffer.py | 129 ++--- tests/python/unittest/test_lang_constructor.py | 33 +- tests/python/unittest/test_lang_container.py | 23 +- tests/python/unittest/test_lang_data_layout.py | 21 +- tests/python/unittest/test_lang_group.py | 45 +- tests/python/unittest/test_lang_operator.py | 95 ++-- tests/python/unittest/test_lang_reflection.py | 21 +- tests/python/unittest/test_lang_schedule.py | 161 +++--- tests/python/unittest/test_lang_tag.py | 64 +-- tests/python/unittest/test_lang_target.py | 1 + tests/python/unittest/test_lang_tensor.py | 255 +++++----- .../unittest/test_lang_tensor_overload_op.py | 65 +-- tests/python/unittest/test_lang_verify_compute.py | 33 +- .../python/unittest/test_pass_attrs_hash_equal.py | 21 +- tests/python/unittest/test_pass_basic.py | 41 +- tests/python/unittest/test_pass_bound_checkers.py | 267 +++++----- .../unittest/test_pass_combine_context_call.py | 19 +- .../unittest/test_pass_decorate_device_scope.py | 21 +- tests/python/unittest/test_pass_equal.py | 39 +- tests/python/unittest/test_pass_hoist_if.py | 55 +- .../unittest/test_pass_inject_copy_intrin.py | 109 ++-- .../unittest/test_pass_inject_double_buffer.py | 15 +- tests/python/unittest/test_pass_inject_vthread.py | 39 +- tests/python/unittest/test_pass_inline.py | 25 +- tests/python/unittest/test_pass_ir_transform.py | 17 +- tests/python/unittest/test_pass_lift_attr_scope.py | 13 +- tests/python/unittest/test_pass_loop_partition.py | 311 ++++++------ tests/python/unittest/test_pass_lower_intrin.py | 43 +- .../python/unittest/test_pass_lower_warp_memory.py | 15 +- tests/python/unittest/test_pass_makeapi.py | 25 +- tests/python/unittest/test_pass_remove_no_op.py | 19 +- .../unittest/test_pass_rewrite_for_tensor_core.py | 71 +-- .../unittest/test_pass_rewrite_unsafe_select.py | 13 +- .../python/unittest/test_pass_split_host_device.py | 9 +- tests/python/unittest/test_pass_storage_flatten.py | 73 +-- tests/python/unittest/test_pass_storage_rewrite.py | 321 ++++++------ tests/python/unittest/test_pass_storage_sync.py | 59 +-- tests/python/unittest/test_pass_unroll.py | 41 +- tests/python/unittest/test_pass_vectorize.py | 53 +- tests/python/unittest/test_pass_verify_gpu_code.py | 75 +-- tests/python/unittest/test_pass_verify_memory.py | 61 +-- tests/python/unittest/test_pass_virtual_thread.py | 27 +- tests/python/unittest/test_runtime_error.py | 1 + tests/python/unittest/test_runtime_extension.py | 13 +- tests/python/unittest/test_runtime_graph.py | 7 +- tests/python/unittest/test_runtime_graph_debug.py | 7 +- .../python/unittest/test_runtime_heterogeneous.py | 35 +- tests/python/unittest/test_runtime_measure.py | 5 +- tests/python/unittest/test_runtime_micro.py | 25 +- .../python/unittest/test_runtime_module_export.py | 19 +- tests/python/unittest/test_runtime_module_load.py | 32 +- tests/python/unittest/test_runtime_ndarray.py | 7 +- tests/python/unittest/test_runtime_packed_func.py | 103 ++-- tests/python/unittest/test_runtime_rpc.py | 21 +- tests/python/unittest/test_runtime_vm_profiler.py | 1 + .../unittest/test_schedule_bound_inference.py | 315 ++++++------ tests/python/unittest/test_schedule_graph.py | 119 ++--- tests/python/unittest/test_schedule_lstm.py | 51 +- .../python/unittest/test_schedule_schedule_ops.py | 433 ++++++++-------- tests/python/unittest/test_schedule_tensor_core.py | 119 ++--- tests/python/unittest/test_schedule_tensorize.py | 225 ++++----- tests/python/unittest/test_testing.py | 1 + tests/python/unittest/test_tvm_intrin.py | 9 +- tests/web/prepare_test_libs.py | 9 +- tests/web/websock_rpc_test.py | 9 +- tests/webgl/test_local_gemm.py | 15 +- tests/webgl/test_local_multi_stage.py | 11 +- tests/webgl/test_local_save_load.py | 11 +- tests/webgl/test_local_topi_conv2d_nchw.py | 7 +- tests/webgl/test_local_topi_dense.py | 7 +- tests/webgl/test_local_topi_pooling.py | 5 +- tests/webgl/test_local_topi_softmax.py | 9 +- tests/webgl/test_remote_save_load.py | 11 +- tests/webgl/test_static_webgl_library.py | 9 +- topi/python/topi/argwhere.py | 24 +- topi/python/topi/arm_cpu/bitserial_conv2d.py | 170 ++++--- topi/python/topi/arm_cpu/bitserial_dense.py | 39 +- topi/python/topi/arm_cpu/conv2d.py | 97 ++-- topi/python/topi/arm_cpu/conv2d_alter_op.py | 19 +- topi/python/topi/arm_cpu/conv2d_int8.py | 15 +- topi/python/topi/arm_cpu/conv2d_spatial_pack.py | 120 ++--- topi/python/topi/arm_cpu/conv2d_transpose.py | 51 +- topi/python/topi/arm_cpu/depthwise_conv2d.py | 87 ++-- topi/python/topi/arm_cpu/injective.py | 13 +- topi/python/topi/arm_cpu/tensor_intrin.py | 47 +- topi/python/topi/bifrost/conv2d.py | 105 ++-- topi/python/topi/bifrost/dense.py | 21 +- topi/python/topi/bifrost/depthwise_conv2d.py | 19 +- topi/python/topi/bifrost/gemm.py | 89 ++-- topi/python/topi/bifrost/transforms.py | 25 +- topi/python/topi/broadcast.py | 150 +++--- topi/python/topi/cuda/batch_matmul.py | 23 +- topi/python/topi/cuda/conv1d.py | 53 +- topi/python/topi/cuda/conv1d_transpose_ncw.py | 55 +- topi/python/topi/cuda/conv2d.py | 10 +- topi/python/topi/cuda/conv2d_alter_op.py | 23 +- topi/python/topi/cuda/conv2d_direct.py | 27 +- topi/python/topi/cuda/conv2d_hwcn.py | 31 +- topi/python/topi/cuda/conv2d_int8.py | 89 ++-- topi/python/topi/cuda/conv2d_transpose_nchw.py | 81 +-- topi/python/topi/cuda/conv2d_winograd.py | 107 ++-- topi/python/topi/cuda/conv3d.py | 28 +- topi/python/topi/cuda/conv3d_direct.py | 29 +- topi/python/topi/cuda/deformable_conv2d.py | 31 +- topi/python/topi/cuda/dense.py | 79 ++- topi/python/topi/cuda/depthwise_conv2d.py | 63 +-- topi/python/topi/cuda/group_conv2d_nchw.py | 127 ++--- topi/python/topi/cuda/injective.py | 15 +- topi/python/topi/cuda/nms.py | 245 ++++----- topi/python/topi/cuda/pooling.py | 37 +- topi/python/topi/cuda/rcnn/proposal.py | 139 ++--- topi/python/topi/cuda/reduction.py | 17 +- topi/python/topi/cuda/softmax.py | 10 +- topi/python/topi/cuda/sort.py | 156 +++--- topi/python/topi/cuda/ssd/multibox.py | 136 +++-- topi/python/topi/cuda/tensor_intrin.py | 25 +- topi/python/topi/cuda/vision.py | 5 +- topi/python/topi/generic/__init__.py | 2 +- topi/python/topi/generic/conv2d.py | 11 +- topi/python/topi/generic/extern.py | 2 - topi/python/topi/generic/injective.py | 7 +- topi/python/topi/generic/nn.py | 12 +- topi/python/topi/generic/vision.py | 7 +- topi/python/topi/generic_op_impl.py | 20 +- topi/python/topi/hls/injective.py | 9 +- topi/python/topi/hls/nn.py | 59 +-- topi/python/topi/image/resize.py | 165 +++--- topi/python/topi/intel_graphics/conv2d.py | 113 ++--- topi/python/topi/intel_graphics/conv2d_alter_op.py | 9 +- .../python/topi/intel_graphics/depthwise_conv2d.py | 61 +-- topi/python/topi/mali/conv2d.py | 138 ++--- topi/python/topi/mali/dense.py | 21 +- topi/python/topi/mali/depthwise_conv2d.py | 31 +- topi/python/topi/math.py | 196 ++++---- topi/python/topi/nn/batch_matmul.py | 17 +- topi/python/topi/nn/bitserial_conv2d.py | 70 +-- topi/python/topi/nn/bitserial_dense.py | 25 +- topi/python/topi/nn/bitserial_util.py | 8 +- topi/python/topi/nn/bnn.py | 31 +- topi/python/topi/nn/conv1d.py | 31 +- topi/python/topi/nn/conv1d_transpose.py | 23 +- topi/python/topi/nn/conv2d.py | 241 ++++----- topi/python/topi/nn/conv2d_transpose.py | 24 +- topi/python/topi/nn/conv3d.py | 39 +- topi/python/topi/nn/deformable_conv2d.py | 43 +- topi/python/topi/nn/dense.py | 27 +- topi/python/topi/nn/depth_to_space.py | 19 +- topi/python/topi/nn/depthwise_conv2d.py | 95 ++-- topi/python/topi/nn/dilate.py | 21 +- topi/python/topi/nn/elemwise.py | 33 +- topi/python/topi/nn/fifo_buffer.py | 129 ++--- topi/python/topi/nn/flatten.py | 13 +- topi/python/topi/nn/local_response_norm.py | 4 +- topi/python/topi/nn/mapping.py | 25 +- topi/python/topi/nn/pad.py | 33 +- topi/python/topi/nn/pooling.py | 26 +- topi/python/topi/nn/softmax.py | 51 +- topi/python/topi/nn/space_to_depth.py | 19 +- topi/python/topi/nn/sparse.py | 47 +- topi/python/topi/nn/upsampling.py | 31 +- topi/python/topi/nn/util.py | 4 +- topi/python/topi/nn/winograd_util.py | 2 +- topi/python/topi/opengl/conv2d_nchw.py | 9 +- topi/python/topi/opengl/dense.py | 9 +- topi/python/topi/opengl/injective.py | 8 +- topi/python/topi/opengl/pooling.py | 16 +- topi/python/topi/opengl/softmax.py | 6 +- topi/python/topi/reduction.py | 32 +- topi/python/topi/rocm/conv2d.py | 8 +- topi/python/topi/rocm/dense.py | 35 +- topi/python/topi/sort.py | 89 ++-- topi/python/topi/sparse/csrmm.py | 35 +- topi/python/topi/sparse/csrmv.py | 35 +- topi/python/topi/sparse/dense.py | 79 +-- topi/python/topi/tensor.py | 10 +- .../python/topi/testing/conv2d_transpose_python.py | 4 +- topi/python/topi/testing/conv3d_ncdhw_python.py | 2 +- topi/python/topi/testing/conv3d_ndhwc_python.py | 2 +- .../python/topi/testing/depthwise_conv2d_python.py | 16 +- topi/python/topi/testing/pool3d_python.py | 4 +- topi/python/topi/testing/pool_grad_python.py | 2 +- topi/python/topi/testing/roi_align_python.py | 4 +- topi/python/topi/transform.py | 159 +++--- topi/python/topi/util.py | 85 ++-- topi/python/topi/vision/nms.py | 95 ++-- topi/python/topi/vision/rcnn/proposal.py | 115 ++--- topi/python/topi/vision/rcnn/roi_align.py | 41 +- topi/python/topi/vision/rcnn/roi_pool.py | 51 +- topi/python/topi/vision/reorg.py | 4 +- topi/python/topi/vision/ssd/multibox.py | 53 +- topi/python/topi/x86/batch_matmul.py | 23 +- topi/python/topi/x86/binarize_pack.py | 7 +- topi/python/topi/x86/binary_dense.py | 9 +- topi/python/topi/x86/bitserial_conv2d.py | 89 ++-- topi/python/topi/x86/bitserial_dense.py | 41 +- topi/python/topi/x86/conv1d.py | 19 +- topi/python/topi/x86/conv2d.py | 33 +- topi/python/topi/x86/conv2d_alter_op.py | 33 +- topi/python/topi/x86/conv2d_avx_1x1.py | 35 +- topi/python/topi/x86/conv2d_avx_common.py | 5 +- topi/python/topi/x86/conv2d_int8.py | 25 +- topi/python/topi/x86/conv2d_transpose.py | 4 +- topi/python/topi/x86/conv3d.py | 79 +-- topi/python/topi/x86/dense.py | 79 +-- topi/python/topi/x86/depthwise_conv2d.py | 33 +- topi/python/topi/x86/injective.py | 15 +- topi/python/topi/x86/nn.py | 7 +- topi/python/topi/x86/pooling.py | 16 +- topi/python/topi/x86/reduction.py | 10 +- topi/python/topi/x86/roi_align.py | 56 +-- topi/python/topi/x86/sparse.py | 4 +- topi/python/topi/x86/tensor_intrin.py | 187 +++---- topi/python/topi/x86/util.py | 2 +- topi/recipe/broadcast/test_broadcast_map.py | 7 +- topi/recipe/conv/depthwise_conv2d_test.py | 21 +- topi/recipe/conv/test_conv2d_hwcn_map.py | 7 +- topi/recipe/conv/test_conv_int8_arm.py | 7 +- topi/recipe/conv/test_conv_int8_intel.py | 7 +- topi/recipe/gemm/android_gemm_square.py | 27 +- topi/recipe/gemm/cuda_gemm_square.py | 31 +- topi/recipe/gemm/gemm_int8.py | 23 +- topi/recipe/reduce/test_reduce_map.py | 5 +- topi/recipe/rnn/lstm.py | 53 +- topi/recipe/rnn/matexp.py | 31 +- topi/tests/python/common.py | 1 + topi/tests/python/test_fifo_buffer.py | 17 +- topi/tests/python/test_topi_basic.py | 9 +- topi/tests/python/test_topi_batch_matmul.py | 5 +- topi/tests/python/test_topi_bitserial_conv2d.py | 9 +- .../python/test_topi_bitserial_conv2d_rasp.py | 5 +- topi/tests/python/test_topi_bitserial_dense.py | 5 +- topi/tests/python/test_topi_bnn.py | 9 +- topi/tests/python/test_topi_broadcast.py | 35 +- topi/tests/python/test_topi_clip.py | 5 +- topi/tests/python/test_topi_conv1d.py | 5 +- .../tests/python/test_topi_conv1d_transpose_ncw.py | 5 +- topi/tests/python/test_topi_conv2d_NCHWc.py | 7 +- topi/tests/python/test_topi_conv2d_hwcn.py | 7 +- topi/tests/python/test_topi_conv2d_int8.py | 7 +- topi/tests/python/test_topi_conv2d_nchw.py | 7 +- topi/tests/python/test_topi_conv2d_nhwc.py | 5 +- .../python/test_topi_conv2d_nhwc_pack_int8.py | 5 +- .../python/test_topi_conv2d_transpose_nchw.py | 5 +- topi/tests/python/test_topi_conv2d_winograd.py | 7 +- topi/tests/python/test_topi_conv3d_ncdhw.py | 7 +- topi/tests/python/test_topi_conv3d_ndhwc.py | 5 +- topi/tests/python/test_topi_deformable_conv2d.py | 9 +- topi/tests/python/test_topi_dense.py | 13 +- topi/tests/python/test_topi_depth_to_space.py | 3 +- topi/tests/python/test_topi_depthwise_conv2d.py | 21 +- .../test_topi_depthwise_conv2d_back_input.py | 5 +- .../test_topi_depthwise_conv2d_back_weight.py | 5 +- topi/tests/python/test_topi_dilate.py | 5 +- topi/tests/python/test_topi_group_conv2d.py | 13 +- .../python/test_topi_group_conv2d_NCHWc_int8.py | 5 +- topi/tests/python/test_topi_image.py | 15 +- topi/tests/python/test_topi_lrn.py | 3 +- topi/tests/python/test_topi_math.py | 25 +- topi/tests/python/test_topi_matmul.py | 5 +- topi/tests/python/test_topi_pooling.py | 15 +- topi/tests/python/test_topi_reduce.py | 3 +- topi/tests/python/test_topi_relu.py | 13 +- topi/tests/python/test_topi_reorg.py | 3 +- topi/tests/python/test_topi_softmax.py | 11 +- topi/tests/python/test_topi_sort.py | 5 +- topi/tests/python/test_topi_space_to_depth.py | 3 +- topi/tests/python/test_topi_sparse.py | 71 +-- topi/tests/python/test_topi_tensor.py | 17 +- topi/tests/python/test_topi_transform.py | 85 ++-- topi/tests/python/test_topi_upsampling.py | 11 +- topi/tests/python/test_topi_vision.py | 31 +- tutorials/autotvm/tune_conv2d_cuda.py | 31 +- tutorials/autotvm/tune_relay_arm.py | 1 + tutorials/autotvm/tune_relay_cuda.py | 1 + tutorials/autotvm/tune_relay_mobile_gpu.py | 1 + tutorials/autotvm/tune_relay_x86.py | 1 + tutorials/autotvm/tune_simple_template.py | 31 +- tutorials/cross_compilation_and_rpc.py | 15 +- tutorials/dev/low_level_custom_pass.py | 29 +- tutorials/dev/relay_pass_infra.py | 1 + tutorials/frontend/build_gcn.py | 1 + tutorials/frontend/deploy_model_on_android.py | 1 + tutorials/frontend/deploy_model_on_rasp.py | 1 + tutorials/frontend/deploy_quantized.py | 1 + tutorials/frontend/deploy_ssd_gluoncv.py | 3 +- tutorials/frontend/from_caffe2.py | 1 + tutorials/frontend/from_coreml.py | 1 + tutorials/frontend/from_darknet.py | 1 + tutorials/frontend/from_keras.py | 1 + tutorials/frontend/from_mxnet.py | 1 + tutorials/frontend/from_onnx.py | 1 + tutorials/frontend/from_tensorflow.py | 1 + tutorials/frontend/from_tflite.py | 1 + tutorials/frontend/using_external_lib.py | 1 + tutorials/language/extern_op.py | 29 +- tutorials/language/intrin_math.py | 61 +-- tutorials/language/reduction.py | 67 +-- tutorials/language/scan.py | 63 +-- tutorials/language/schedule_primitives.py | 85 ++-- tutorials/language/tedd.py | 13 +- tutorials/language/tensorize.py | 71 +-- tutorials/language/tuple_inputs.py | 49 +- tutorials/optimize/opt_conv_cuda.py | 39 +- tutorials/optimize/opt_conv_tensorcore.py | 95 ++-- tutorials/optimize/opt_gemm.py | 31 +- tutorials/optimize/opt_matmul_auto_tensorcore.py | 43 +- tutorials/relay_quick_start.py | 1 + tutorials/tensor_expr_get_started.py | 17 +- tutorials/topi/intro_topi.py | 27 +- vta/apps/gemm/python/tsim.py | 1 + vta/apps/gemm/tests/python/chisel_accel.py | 23 +- vta/apps/tsim_example/python/tsim.py | 1 + vta/apps/tsim_example/tests/python/chisel_accel.py | 1 + .../tsim_example/tests/python/verilog_accel.py | 1 + vta/python/vta/build_module.py | 14 +- vta/python/vta/environment.py | 11 +- vta/python/vta/intrin.py | 47 +- vta/python/vta/ir_pass.py | 171 +++---- vta/python/vta/pkg_config.py | 24 +- vta/python/vta/top/bitpack.py | 7 +- vta/python/vta/top/op.py | 15 +- vta/python/vta/top/vta_conv2d.py | 23 +- vta/python/vta/top/vta_conv2d_transpose.py | 23 +- vta/python/vta/top/vta_dense.py | 15 +- vta/python/vta/top/vta_group_conv2d.py | 23 +- vta/scripts/tune_conv2d.py | 19 +- vta/scripts/tune_conv2d_transpose.py | 17 +- vta/scripts/tune_dense.py | 17 +- vta/scripts/tune_group_conv2d.py | 19 +- vta/scripts/tune_resnet.py | 15 +- .../python/integration/test_benchmark_gemm.py | 31 +- .../integration/test_benchmark_topi_conv2d.py | 17 +- .../test_benchmark_topi_conv2d_transpose.py | 15 +- .../integration/test_benchmark_topi_dense.py | 15 +- .../test_benchmark_topi_group_conv2d.py | 17 +- vta/tests/python/pynq/test_program_rpc.py | 1 + vta/tests/python/unittest/test_vta_insn.py | 101 ++-- vta/tutorials/autotvm/tune_relay_vta.py | 13 +- vta/tutorials/frontend/deploy_classification.py | 1 + vta/tutorials/matrix_multiply.py | 25 +- vta/tutorials/optimize/convolution_opt.py | 35 +- vta/tutorials/optimize/matrix_multiply_opt.py | 31 +- vta/tutorials/vta_get_started.py | 15 +- 595 files changed, 9038 insertions(+), 8687 deletions(-) delete mode 100644 python/tvm/api.py delete mode 100644 python/tvm/intrin.py delete mode 100644 python/tvm/make.py diff --git a/apps/android_rpc/tests/android_rpc_test.py b/apps/android_rpc/tests/android_rpc_test.py index 122d07f..32af005 100644 --- a/apps/android_rpc/tests/android_rpc_test.py +++ b/apps/android_rpc/tests/android_rpc_test.py @@ -22,6 +22,7 @@ Use "android" as the key if you wish to avoid modifying this script. """ import tvm +from tvm import te import os from tvm import rpc from tvm.contrib import util, ndk @@ -44,9 +45,9 @@ test_vulkan = False def test_rpc_module(): # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') a_np = np.random.uniform(size=1024).astype(A.dtype) temp = util.tempdir() @@ -56,7 +57,7 @@ def test_rpc_module(): session_timeout=60) # Compile the Graph for CPU target - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].parallel(xi) s[B].pragma(xo, "parallel_launch_point") @@ -79,10 +80,10 @@ def test_rpc_module(): # Compile the Graph for OpenCL target if test_opencl: - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) - s[B].bind(xi, tvm.thread_axis("threadIdx.x")) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) + s[B].bind(xi, te.thread_axis("threadIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) # Build the dynamic lib. # If we don't want to do metal and only use cpu, just set target to be target f = tvm.build(s, [A, B], "opencl", target_host=target, name="myadd") @@ -102,10 +103,10 @@ def test_rpc_module(): # Compile the Graph for Vulkan target if test_vulkan: - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) - s[B].bind(xi, tvm.thread_axis("threadIdx.x")) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) + s[B].bind(xi, te.thread_axis("threadIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) # Build the dynamic lib. # If we don't want to do metal and only use cpu, just set target to be target f = tvm.build(s, [A, B], "vulkan", target_host=target, name="myadd") diff --git a/apps/benchmark/arm_cpu_imagenet_bench.py b/apps/benchmark/arm_cpu_imagenet_bench.py index 5403e96..53b6168 100644 --- a/apps/benchmark/arm_cpu_imagenet_bench.py +++ b/apps/benchmark/arm_cpu_imagenet_bench.py @@ -22,6 +22,7 @@ import argparse import numpy as np import tvm +from tvm import te from tvm.contrib.util import tempdir import tvm.contrib.graph_runtime as runtime from tvm import relay diff --git a/apps/benchmark/gpu_imagenet_bench.py b/apps/benchmark/gpu_imagenet_bench.py index fd96be6..dfb0445 100644 --- a/apps/benchmark/gpu_imagenet_bench.py +++ b/apps/benchmark/gpu_imagenet_bench.py @@ -23,6 +23,7 @@ import threading import numpy as np import tvm +from tvm import te import tvm.contrib.graph_runtime as runtime from tvm import relay diff --git a/apps/benchmark/mobile_gpu_imagenet_bench.py b/apps/benchmark/mobile_gpu_imagenet_bench.py index d5d60a2..4f93a0d 100644 --- a/apps/benchmark/mobile_gpu_imagenet_bench.py +++ b/apps/benchmark/mobile_gpu_imagenet_bench.py @@ -22,6 +22,7 @@ import argparse import numpy as np import tvm +from tvm import te from tvm.contrib.util import tempdir import tvm.contrib.graph_runtime as runtime from tvm import relay diff --git a/apps/bundle_deploy/build_model.py b/apps/bundle_deploy/build_model.py index de9e735..37e3024 100644 --- a/apps/bundle_deploy/build_model.py +++ b/apps/bundle_deploy/build_model.py @@ -20,6 +20,7 @@ import argparse import os from tvm import relay import tvm +from tvm import te import logging diff --git a/apps/dso_plugin_module/test_plugin_module.py b/apps/dso_plugin_module/test_plugin_module.py index 6304ef9..0704dd0 100644 --- a/apps/dso_plugin_module/test_plugin_module.py +++ b/apps/dso_plugin_module/test_plugin_module.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import os def test_plugin_module(): diff --git a/apps/extension/python/tvm_ext/__init__.py b/apps/extension/python/tvm_ext/__init__.py index 31b149e..377db7c 100644 --- a/apps/extension/python/tvm_ext/__init__.py +++ b/apps/extension/python/tvm_ext/__init__.py @@ -21,6 +21,7 @@ import os import ctypes # Import TVM first to get library symbols import tvm +from tvm import te def load_lib(): """Load library, the functions will be registered into TVM""" diff --git a/apps/extension/tests/test_ext.py b/apps/extension/tests/test_ext.py index 257ecd6..f7e17d2 100644 --- a/apps/extension/tests/test_ext.py +++ b/apps/extension/tests/test_ext.py @@ -16,6 +16,8 @@ # under the License. import tvm_ext import tvm +import tvm._ffi.registry +from tvm import te import numpy as np def test_bind_add(): @@ -26,9 +28,9 @@ def test_bind_add(): def test_ext_dev(): n = 10 - A = tvm.placeholder((n,), name='A') - B = tvm.compute((n,), lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A') + B = te.compute((n,), lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) def check_llvm(): if not tvm.runtime.enabled("llvm"): return @@ -43,8 +45,8 @@ def test_ext_dev(): def test_sym_add(): - a = tvm.var('a') - b = tvm.var('b') + a = te.var('a') + b = te.var('b') c = tvm_ext.sym_add(a, b) assert c.a == a and c.b == b @@ -59,19 +61,20 @@ def test_ext_vec(): assert(isinstance(v2, tvm_ext.IntVec)) assert v2[2] == 3 - tvm.convert(ivec_cb)(ivec) + tvm.runtime.convert(ivec_cb)(ivec) def test_extract_ext(): - fdict = tvm.extract_ext_funcs(tvm_ext._LIB.TVMExtDeclare) + fdict = tvm._ffi.registry.extract_ext_funcs( + tvm_ext._LIB.TVMExtDeclare) assert fdict["mul"](3, 4) == 12 def test_extern_call(): n = 10 - A = tvm.placeholder((n,), name='A') - B = tvm.compute((n,), lambda *i: tvm.call_extern("float32", "TVMTestAddOne", A(*i)), name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A') + B = te.compute((n,), lambda *i: tvm.tir.call_extern("float32", "TVMTestAddOne", A(*i)), name='B') + s = te.create_schedule(B.op) def check_llvm(): if not tvm.runtime.enabled("llvm"): diff --git a/apps/howto_deploy/prepare_test_libs.py b/apps/howto_deploy/prepare_test_libs.py index b620bc7..88d9f8e 100644 --- a/apps/howto_deploy/prepare_test_libs.py +++ b/apps/howto_deploy/prepare_test_libs.py @@ -16,13 +16,14 @@ # under the License. """Script to prepare test_addone.so""" import tvm +from tvm import te import os def prepare_test_libs(base_path): - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) # Compile library as dynamic library fadd_dylib = tvm.build(s, [A, B], "llvm", name="addone") dylib_path = os.path.join(base_path, "test_addone_dll.so") diff --git a/apps/howto_deploy/python_deploy.py b/apps/howto_deploy/python_deploy.py index 07a27fe..2a44325 100644 --- a/apps/howto_deploy/python_deploy.py +++ b/apps/howto_deploy/python_deploy.py @@ -19,6 +19,7 @@ # file python_deploy.py import tvm +from tvm import te import numpy as np def verify(mod, fname): diff --git a/apps/ios_rpc/tests/ios_rpc_test.py b/apps/ios_rpc/tests/ios_rpc_test.py index ac3718f..973c252 100644 --- a/apps/ios_rpc/tests/ios_rpc_test.py +++ b/apps/ios_rpc/tests/ios_rpc_test.py @@ -21,6 +21,7 @@ And configure the proxy host field as commented. """ import tvm +from tvm import te import os import re import sys @@ -54,14 +55,14 @@ def compile_metal(src): def test_rpc_module(): # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') temp = util.tempdir() - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) - s[B].bind(xi, tvm.thread_axis("threadIdx.x")) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) + s[B].bind(xi, te.thread_axis("threadIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) # Build the dynamic lib. # If we don't want to do metal and only use cpu, just set target to be target f = tvm.build(s, [A, B], "metal", target_host=target, name="myadd") @@ -70,7 +71,7 @@ def test_rpc_module(): arch=arch, sdk=sdk) xcode.codesign(path_dso1) - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].parallel(xi) s[B].pragma(xo, "parallel_launch_point") diff --git a/apps/sgx/enclave/src/build_model.py b/apps/sgx/enclave/src/build_model.py index dff5716..f8906d1 100644 --- a/apps/sgx/enclave/src/build_model.py +++ b/apps/sgx/enclave/src/build_model.py @@ -23,6 +23,7 @@ from os import path as osp from tvm import relay from tvm.relay import testing import tvm +from tvm import te def main(): diff --git a/apps/sgx/run_model.py b/apps/sgx/run_model.py index fb39e34..c7af963 100644 --- a/apps/sgx/run_model.py +++ b/apps/sgx/run_model.py @@ -17,6 +17,7 @@ import os.path as osp import numpy as np import tvm +from tvm import te CWD = osp.abspath(osp.dirname(__file__)) diff --git a/docs/api/python/te.rst b/docs/api/python/te.rst index dc3d3da..1f70c4d 100644 --- a/docs/api/python/te.rst +++ b/docs/api/python/te.rst @@ -23,6 +23,7 @@ tvm.te :members: :imported-members: :exclude-members: + any, all, min_value, max_value, trace, exp, erf, tanh, sigmoid, log, cos, sin, atan, sqrt, rsqrt, floor, ceil, trunc, abs, round, nearbyint, isnan, power, popcount, fmod, if_then_else, div, indexdiv, indexmod, truncdiv, truncmod, floordiv, floormod, diff --git a/docs/api/python/tir.rst b/docs/api/python/tir.rst index d1017cd..ea1ac66 100644 --- a/docs/api/python/tir.rst +++ b/docs/api/python/tir.rst @@ -20,5 +20,5 @@ tvm.tir .. automodule:: tvm.tir :members: :imported-members: - :exclude-members: PrimExpr + :exclude-members: PrimExpr, const :autosummary: diff --git a/docs/conf.py b/docs/conf.py index 05f4cfc..d882f75 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -61,6 +61,7 @@ source_parsers = { os.environ['TVM_BUILD_DOC'] = '1' # Version information. import tvm +from tvm import te version = tvm.__version__ release = tvm.__version__ diff --git a/golang/sample/deploy.py b/golang/sample/deploy.py index 3b22136..d523b9c 100644 --- a/golang/sample/deploy.py +++ b/golang/sample/deploy.py @@ -21,6 +21,7 @@ Get Started with TVM Go from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np # Global declarations of environment. @@ -31,15 +32,15 @@ tgt="llvm" ###################################################################### # Describe the Computation # ------------------------ -n = tvm.var("n") -A = tvm.placeholder((n,), name='A') -B = tvm.placeholder((n,), name='B') -C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") +n = te.var("n") +A = te.placeholder((n,), name='A') +B = te.placeholder((n,), name='B') +C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") ###################################################################### # Schedule the Computation # ------------------------ -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) ###################################################################### # Compilation diff --git a/jvm/core/src/test/scripts/test_add_cpu.py b/jvm/core/src/test/scripts/test_add_cpu.py index dd7e4a8..bda66f8 100644 --- a/jvm/core/src/test/scripts/test_add_cpu.py +++ b/jvm/core/src/test/scripts/test_add_cpu.py @@ -17,14 +17,15 @@ import os import tvm +from tvm import te from tvm.contrib import cc, util def test_add(target_dir): - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") - s = tvm.create_schedule(C.op) + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") + s = te.create_schedule(C.op) fadd = tvm.build(s, [A, B, C], "llvm", target_host="llvm", name="myadd") fadd.save(os.path.join(target_dir, "add_cpu.o")) diff --git a/jvm/core/src/test/scripts/test_add_gpu.py b/jvm/core/src/test/scripts/test_add_gpu.py index e3f4fbf..d520054 100644 --- a/jvm/core/src/test/scripts/test_add_gpu.py +++ b/jvm/core/src/test/scripts/test_add_gpu.py @@ -17,22 +17,23 @@ import os import tvm +from tvm import te from tvm.contrib import cc, util def test_add(target_dir): if not tvm.runtime.enabled("cuda"): print("skip %s because cuda is not enabled..." % __file__) return - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) bx, tx = s[C].split(C.op.axis[0], factor=64) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) fadd_cuda = tvm.build(s, [A, B, C], "cuda", target_host="llvm", name="myadd") fadd_cuda.save(os.path.join(target_dir, "add_gpu.o")) diff --git a/jvm/core/src/test/scripts/test_graph_runtime.py b/jvm/core/src/test/scripts/test_graph_runtime.py index 4d82973..63a76d1 100644 --- a/jvm/core/src/test/scripts/test_graph_runtime.py +++ b/jvm/core/src/test/scripts/test_graph_runtime.py @@ -17,14 +17,15 @@ import os import tvm +from tvm import te import json from tvm.contrib import graph_runtime def dump_graph_lib(target_dir): dim = 4 - A = tvm.placeholder((dim,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - sched = tvm.create_schedule(B.op) + A = te.placeholder((dim,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + sched = te.create_schedule(B.op) node0 = {"op": "null", "name": "x", "inputs": []} node1 = {"op": "tvm_op", "name": "add", diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py index 65cb672..0c4ca13 100644 --- a/python/tvm/__init__.py +++ b/python/tvm/__init__.py @@ -24,7 +24,7 @@ import traceback # tvm._ffi from ._ffi.base import TVMError, __version__ from ._ffi.runtime_ctypes import TypeCode, DataType -from ._ffi.registry import register_object, register_func, register_extension +from ._ffi import register_object, register_func, register_extension, get_global_func # top-level alias # tvm.runtime @@ -47,10 +47,9 @@ from . import tir # tvm.target from . import target -from .target import build_config # tvm.te -from .te import decl_tensor_intrin, create_schedule, tag_scope +from . import te # tvm.testing from . import testing @@ -64,14 +63,6 @@ from . import hybrid # others from . import arith -# backward compact for topi, to be removed later -from .api import * -from .tir import expr, stmt, ir_builder, ir_pass, generic -from .te import tensor, schedule -from .tir.op import * -from . import intrin -from . import make - # Contrib initializers from .contrib import rocm as _rocm, nvcc as _nvcc, sdaccel as _sdaccel diff --git a/python/tvm/api.py b/python/tvm/api.py deleted file mode 100644 index 9afaf03..0000000 --- a/python/tvm/api.py +++ /dev/null @@ -1,38 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Functions defined in TVM.""" -# pylint: disable=invalid-name,unused-import,redefined-builtin -import tvm._ffi -import tvm.ir -import tvm.tir - -from tvm.runtime import convert, const, DataType -from tvm.ir import container as _container, Range -from tvm.tir import decl_buffer, layout, bijective_layout -from tvm.tir import min_value, max_value, indexdiv, indexmod, all, any -from tvm.te import placeholder, compute, scan, extern, var, size_var, thread_axis, reduce_axis - - -from ._ffi.base import string_types, TVMError -from ._ffi.registry import register_func, get_global_func, extract_ext_funcs - -from . import make as _make - -int8 = "int8" -int32 = "int32" -float32 = "float32" -handle = "handle" diff --git a/python/tvm/arith/analyzer.py b/python/tvm/arith/analyzer.py index 382a7e0..5a420ad 100644 --- a/python/tvm/arith/analyzer.py +++ b/python/tvm/arith/analyzer.py @@ -212,7 +212,7 @@ class Analyzer: -------- .. code-block:: python - x = tvm.var("x") + x = te.var("x") analyzer = tvm.arith.Analyzer() with analzyer.constraint_scope(x % 3 == 0): # constraint in effect diff --git a/python/tvm/autotvm/feature.py b/python/tvm/autotvm/feature.py index 4ff1139..c576ffd 100644 --- a/python/tvm/autotvm/feature.py +++ b/python/tvm/autotvm/feature.py @@ -28,8 +28,11 @@ There are two types of feature import struct import numpy as np +import tvm._ffi -from tvm import schedule, ir_pass, get_global_func, target as _target +from tvm import target as _target +from tvm.tir import ir_pass +from tvm.te import schedule from tvm.driver import build_module def ana_lower(sch, args, @@ -49,10 +52,12 @@ def ana_lower(sch, args, return stmt try: - _get_buffer_curve_sample_flatten = get_global_func( + _get_buffer_curve_sample_flatten = tvm._ffi.get_global_func( "autotvm.feature.GetCurveSampleFeatureFlatten") - _get_itervar_feature = get_global_func("autotvm.feature.GetItervarFeature") - _get_itervar_feature_flatten = get_global_func("autotvm.feature.GetItervarFeatureFlatten") + _get_itervar_feature = tvm._ffi.get_global_func( + "autotvm.feature.GetItervarFeature") + _get_itervar_feature_flatten = tvm._ffi.get_global_func( + "autotvm.feature.GetItervarFeatureFlatten") except ValueError as e: def raise_error(*args, **kwargs): # pylint: disable=unused-argument raise RuntimeError("Cannot load autotvm c++ API") @@ -64,8 +69,8 @@ def get_itervar_feature(sch, args, take_log=False): Parameters ---------- - sch: tvm.schedule.Schedule - args: Array of tvm.tensor.Tensor + sch: tvm.te.schedule.Schedule + args: Array of te.tensor.Tensor the buffer args for lower take_log: bool whether take log of numerical statics @@ -112,8 +117,8 @@ def get_itervar_feature_flatten(sch, args, take_log=True): Parameters ---------- - sch: tvm.schedule.Schedule - args: Array of tvm.tensor.Tensor + sch: tvm.te.schedule.Schedule + args: Array of te.tensor.Tensor the buffer args for lower take_log: bool whether take log of numerical statics @@ -185,8 +190,8 @@ def get_buffer_curve_sample_flatten(sch, args, sample_n=30): Parameters ---------- - sch: tvm.schedule.Schedule - args: Array of tvm.tensor.Tensor + sch: tvm.te.schedule.Schedule + args: Array of te.tensor.Tensor the buffer args for lower sample_n: int number of sample points along one dimension diff --git a/python/tvm/autotvm/graph_tuner/base_graph_tuner.py b/python/tvm/autotvm/graph_tuner/base_graph_tuner.py index 3e85e93..c6b79fa 100644 --- a/python/tvm/autotvm/graph_tuner/base_graph_tuner.py +++ b/python/tvm/autotvm/graph_tuner/base_graph_tuner.py @@ -23,6 +23,7 @@ import numpy as np import topi import tvm +from tvm import te from tvm import autotvm, relay from tvm.autotvm.task import get_config from tvm.autotvm.record import encode, load_from_file @@ -301,8 +302,8 @@ class BaseGraphTuner(object): _, out_layout = o_input_info[0] else: _, out_layout = o_output_info[0] - data_placeholder = tvm.placeholder(in_shape, name="data", - dtype=self._dtype) + data_placeholder = te.placeholder(in_shape, name="data", + dtype=self._dtype) args = [data_placeholder, in_layout, out_layout] callback(i_idx, o_idx, m, n, args) diff --git a/python/tvm/autotvm/measure/measure_methods.py b/python/tvm/autotvm/measure/measure_methods.py index 44e6de9..698ddbc 100644 --- a/python/tvm/autotvm/measure/measure_methods.py +++ b/python/tvm/autotvm/measure/measure_methods.py @@ -33,9 +33,13 @@ import tempfile import numpy as np -from ... import ir_pass, build, build_config, nd, TVMError, register_func, \ - rpc as _rpc, target as _target -from ...contrib import nvcc, ndk, tar +import tvm._ffi +from tvm import nd, rpc as _rpc, target as _target +from tvm.tir import ir_pass +from tvm.error import TVMError +from tvm.target import build_config +from tvm.driver import build +from tvm.contrib import nvcc, ndk, tar from ..util import get_const_tuple from ..env import AutotvmGlobalScope @@ -581,7 +585,7 @@ def check_remote(target, device_key, host=None, port=None, priority=100, timeout return not t.is_alive() -@register_func +@tvm._ffi.register_func def tvm_callback_cuda_compile(code): """use nvcc to generate ptx code for better optimization""" curr_cuda_target_arch = AutotvmGlobalScope.current.cuda_target_arch diff --git a/python/tvm/autotvm/task/code_hash.py b/python/tvm/autotvm/task/code_hash.py index 9410f52..3076970 100644 --- a/python/tvm/autotvm/task/code_hash.py +++ b/python/tvm/autotvm/task/code_hash.py @@ -22,7 +22,7 @@ code hashing is used to check the consistence of schedule code and the parameter import inspect import zlib -from tvm import schedule +from tvm.te import schedule def attach_code_hash(s): """Decorator for attaching a code hash to a schedule @@ -30,7 +30,7 @@ def attach_code_hash(s): Parameters ---------- s: Schedule - tvm.schedule.Schedule to attach the hash to + tvm.te.schedule.Schedule to attach the hash to """ def decorator(func): def wrapper(*args, **kwargs): diff --git a/python/tvm/autotvm/task/space.py b/python/tvm/autotvm/task/space.py index 47c2270..fbf474f 100644 --- a/python/tvm/autotvm/task/space.py +++ b/python/tvm/autotvm/task/space.py @@ -32,7 +32,7 @@ import math from collections import namedtuple, OrderedDict import numpy as np -from tvm import schedule, thread_axis +from tvm.te import schedule, thread_axis from tvm.autotvm.util import get_const_int Axis = namedtuple('Axis', ['space', 'index']) @@ -57,7 +57,7 @@ class TransformSpace(object): .. note:: We can regard our schedule code as a transformation graph of axes. - Starting from raw axes in the definition of tvm.compute, we can transform these axes + Starting from raw axes in the definition of te.compute, we can transform these axes by some operators. The operator includes 'split', 'reorder' and 'annotate'. Each operator has some tunable parameters (e.g. the split factor). Then the tuning process is just to find good parameters of these op. @@ -106,7 +106,7 @@ class VirtualAxis(TransformSpace): Parameters ---------- - var: int or tvm.schedule.IterVar + var: int or tvm.te.schedule.IterVar If is int, return a virtual axis whose length is the provided argument. If is IterVar, return a virtual axis whose length is extracted from the IterVar's extent domain. @@ -266,11 +266,11 @@ class SplitEntity(object): Parameters ---------- - sch: tvm.schedule.Schedule + sch: tvm.te.schedule.Schedule The tvm schedule - op: tvm.tensor.Operation + op: tvm.te.Operation The stage to be applied - axis: tvm.schedule.IterVar + axis: tvm.te.schedule.IterVar axis to split Returns @@ -390,11 +390,11 @@ class ReorderEntity(object): Parameters ---------- - sch: tvm.schedule.Schedule + sch: tvm.te.schedule.Schedule The tvm schedule - op: tvm.tensor.Operation + op: tvm.te.Operation The stage to be applied - axis: tvm.schedule.IterVar + axis: tvm.te.schedule.IterVar axis to split Returns @@ -513,11 +513,11 @@ class AnnotateEntity(object): Parameters ---------- - sch: tvm.schedule.Schedule + sch: tvm.te.schedule.Schedule The tvm schedule - op: tvm.tensor.Operation + op: tvm.te.Operation The stage to be applied - axes: Array of tvm.schedule.IterVar + axes: Array of tvm.te.schedule.IterVar axis to split axis_lens: Array of int, optional the length of axes @@ -532,7 +532,7 @@ class AnnotateEntity(object): Returns ------- - axes : list of tvm.schedule.IterVar + axes : list of tvm.te.schedule.IterVar The transformed axes """ if source is not None: # special case : attach cache_read/cache_write @@ -624,7 +624,7 @@ class ConfigSpace(object): Parameters ---------- - var: int or tvm.schedule.IterVar + var: int or tvm.te.schedule.IterVar If is int, return an axis whose length is the provided argument. If is IterVar, return an axis whose length is extracted from the IterVar's extent domain. @@ -640,7 +640,7 @@ class ConfigSpace(object): ---------- name: str name to index the entity of this space - axis: tvm.schedule.IterVar + axis: tvm.te.schedule.IterVar axis to split policy: str name of policy. @@ -681,7 +681,7 @@ class ConfigSpace(object): ---------- name: str name to index the entity of this space - axes: Array of tvm.schedule.IterVar + axes: Array of tvm.te.schedule.IterVar axes to reorder policy: str name of policy @@ -702,7 +702,7 @@ class ConfigSpace(object): ---------- name: str name to index the entity of this space - axes: Array of tvm.schedule.IterVar + axes: Array of tvm.te.schedule.IterVar axes to annotate policy: str name of policy diff --git a/python/tvm/autotvm/task/task.py b/python/tvm/autotvm/task/task.py index ca1ae0e..c75105b 100644 --- a/python/tvm/autotvm/task/task.py +++ b/python/tvm/autotvm/task/task.py @@ -21,10 +21,13 @@ Task can be constructed from tuple of func, args, and kwargs. func is a state-less function, or a string that registers the standard task. """ - import numpy as np -from ... import tensor, expr, container, placeholder, target as _target +from tvm import target as _target +from tvm.ir import container +from tvm.tir import expr +from tvm.te import tensor, placeholder + from ..util import get_const_int, get_const_tuple from .dispatcher import DispatchContext, ApplyConfig @@ -81,7 +84,7 @@ def deserialize_args(args): def args_to_workload(args, task_name=None): """Convert argument list to hashable workload tuple. This function will convert list to tuple, tvm node to python value and - flatten tvm.tensor.Tensor to a tuple + flatten te.tensor.Tensor to a tuple Parameters ---------- @@ -138,9 +141,9 @@ class Task(object): Returns ------- - sch: tvm.schedule.Schedule + sch: tvm.te.schedule.Schedule The tvm schedule - arg_bufs: Array of tvm.tensor.Tensor + arg_bufs: Array of te.tensor.Tensor The input/output buffers """ config.flop = 0 @@ -303,12 +306,12 @@ def register_customized_task(name, func=None): @autotvm.register_customized_task("matmul") def matmul(N, L, M, dtype): - A = tvm.placeholder((N, L), name='A', dtype=dtype) - B = tvm.placeholder((L, M), name='B', dtype=dtype) + A = te.placeholder((N, L), name='A', dtype=dtype) + B = te.placeholder((L, M), name='B', dtype=dtype) - k = tvm.reduce_axis((0, L), name='k') - C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C') - s = tvm.create_schedule(C.op) + k = te.reduce_axis((0, L), name='k') + C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C') + s = te.create_schedule(C.op) # schedule y, x = s[C].op.axis @@ -400,7 +403,7 @@ def compute_flop(sch): Parameters ---------- - sch: tvm.schedule.Schedule + sch: tvm.te.schedule.Schedule schedule Returns @@ -475,8 +478,8 @@ def compute_flop(sch): elif isinstance(op, tensor.PlaceholderOp): pass else: - raise FlopCalculationError("Only support tvm.compute currently. " - "Other ops like tvm.scan/tvm.extern is not supported") + raise FlopCalculationError("Only support te.compute currently. " + "Other ops like tvm.te.scan/te.extern is not supported") return ret try: diff --git a/python/tvm/autotvm/task/topi_integration.py b/python/tvm/autotvm/task/topi_integration.py index 45385fb..e1c0913 100644 --- a/python/tvm/autotvm/task/topi_integration.py +++ b/python/tvm/autotvm/task/topi_integration.py @@ -21,15 +21,15 @@ Decorators for registering tunable templates to TOPI. These decorators can make your simple implementation be able to use different configurations for different workloads. Here we directly use all arguments to the TOPI call as "workload", so make sure all the arguments -(except tvm.Tensor) in you calls are hashable. For tvm.Tensor, we will serialize it to a hashable -tuple. +(except tvm.te.Tensor) in you calls are hashable. For tvm.te.Tensor, +we will serialize it to a hashable tuple. See tvm/topi/python/topi/arm_cpu/depthwise_conv2d.py for example usage. """ import tvm.te._ffi_api from tvm import target as _target +from tvm.te import tensor -from ... import tensor from .task import args_to_workload, DispatchContext, \ register_task_compute, register_task_schedule, serialize_args diff --git a/python/tvm/autotvm/util.py b/python/tvm/autotvm/util.py index 54001d3..01d50e8 100644 --- a/python/tvm/autotvm/util.py +++ b/python/tvm/autotvm/util.py @@ -24,7 +24,7 @@ from random import randrange import numpy as np -from .. import expr, ir_pass +from tvm.tir import expr, ir_pass logger = logging.getLogger('autotvm') diff --git a/python/tvm/contrib/binutil.py b/python/tvm/contrib/binutil.py index 1f322ac..521e088 100644 --- a/python/tvm/contrib/binutil.py +++ b/python/tvm/contrib/binutil.py @@ -18,8 +18,9 @@ """Utilities for binary file manipulation""" import os import subprocess +import tvm._ffi from . import util -from ..api import register_func + RELOCATION_LD_SCRIPT_TEMPLATE = """ /* linker symbol for use in UTVMInit */ @@ -95,7 +96,7 @@ def run_cmd(cmd): return output -@register_func("tvm_callback_get_section_size") +@tvm._ffi.register_func("tvm_callback_get_section_size") def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): """Finds size of the section in the binary. Assumes `size` shell command exists (typically works only on Linux machines) @@ -162,7 +163,7 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): return section_size -@register_func("tvm_callback_relocate_binary") +@tvm._ffi.register_func("tvm_callback_relocate_binary") def tvm_callback_relocate_binary( binary_path, word_size, @@ -233,7 +234,7 @@ def tvm_callback_relocate_binary( return rel_bin -@register_func("tvm_callback_read_binary_section") +@tvm._ffi.register_func("tvm_callback_read_binary_section") def tvm_callback_read_binary_section(binary, section, toolchain_prefix): """Returns the contents of the specified section in the binary byte array @@ -273,7 +274,7 @@ def tvm_callback_read_binary_section(binary, section, toolchain_prefix): return section_bin -@register_func("tvm_callback_get_symbol_map") +@tvm._ffi.register_func("tvm_callback_get_symbol_map") def tvm_callback_get_symbol_map(binary, toolchain_prefix): """Obtains a map of symbols to addresses in the passed binary diff --git a/python/tvm/contrib/cblas.py b/python/tvm/contrib/cblas.py index 2337f84..e1a4a8a 100644 --- a/python/tvm/contrib/cblas.py +++ b/python/tvm/contrib/cblas.py @@ -16,7 +16,7 @@ # under the License. """External function interface to BLAS libraries.""" import tvm -from .. import api as _api +from tvm import te def matmul(lhs, rhs, transa=False, transb=False, **kwargs): @@ -41,7 +41,7 @@ def matmul(lhs, rhs, transa=False, transb=False, **kwargs): """ n = lhs.shape[1] if transa else lhs.shape[0] m = rhs.shape[0] if transb else rhs.shape[1] - return _api.extern( + return te.extern( (n, m), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( @@ -75,7 +75,7 @@ def batch_matmul(lhs, rhs, transa=False, transb=False, iterative=False, **kwargs b = lhs.shape[0] n = lhs.shape[2] if transa else lhs.shape[1] m = rhs.shape[1] if transb else rhs.shape[2] - return _api.extern( + return te.extern( (b, n, m), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( diff --git a/python/tvm/contrib/cublas.py b/python/tvm/contrib/cublas.py index 75290a8..7b42bec 100644 --- a/python/tvm/contrib/cublas.py +++ b/python/tvm/contrib/cublas.py @@ -16,7 +16,8 @@ # under the License. """External function interface to cuBLAS libraries.""" import tvm -from .. import api as _api +from tvm import te + def matmul(lhs, rhs, transa=False, transb=False, dtype=None): """Create an extern op that compute matrix mult of A and rhs with cuBLAS @@ -40,7 +41,7 @@ def matmul(lhs, rhs, transa=False, transb=False, dtype=None): n = lhs.shape[1] if transa else lhs.shape[0] m = rhs.shape[0] if transb else rhs.shape[1] dtype = dtype if dtype is not None else lhs.dtype - return _api.extern( + return te.extern( (n, m), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.cublas.matmul", @@ -69,7 +70,7 @@ def batch_matmul(lhs, rhs, transa=False, transb=False, dtype=None): n = lhs.shape[2] if transa else lhs.shape[1] m = rhs.shape[1] if transb else rhs.shape[2] dtype = dtype if dtype is not None else lhs.dtype - return _api.extern( + return te.extern( (b, n, m), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.cublas.batch_matmul", diff --git a/python/tvm/contrib/cublaslt.py b/python/tvm/contrib/cublaslt.py index 1000ede..3b36f47 100644 --- a/python/tvm/contrib/cublaslt.py +++ b/python/tvm/contrib/cublaslt.py @@ -16,7 +16,7 @@ # under the License. """External function interface to cuBLASlt libraries.""" import tvm -from .. import api as _api +from tvm import te def matmul(lhs, rhs, transa=False, transb=False, n=0, m=0, dtype=None): @@ -43,7 +43,7 @@ def matmul(lhs, rhs, transa=False, transb=False, n=0, m=0, dtype=None): if m == 0: m = rhs.shape[0] if transb else rhs.shape[1] dtype = dtype if dtype is not None else lhs.dtype - return _api.extern( + return te.extern( (n, m), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.cublaslt.matmul", diff --git a/python/tvm/contrib/cudnn.py b/python/tvm/contrib/cudnn.py index 20b42d7..e627245 100644 --- a/python/tvm/contrib/cudnn.py +++ b/python/tvm/contrib/cudnn.py @@ -19,8 +19,9 @@ import ctypes import numpy as np import tvm -from .. import api as _api -from .. import get_global_func as _get_global_func + +import tvm._ffi +from tvm import te # algos can be read from cudnn.h _FWD_ALGOS = [ @@ -217,7 +218,7 @@ def conv_output_shape(tensor_format, _prepare_global_func_params(dims - 2, pad, stride, dilation, x_shape, w_shape) oshape = np.zeros((dims), dtype=np.int32) - func = _get_global_func("tvm.contrib.cudnn.conv.output_shape") + func = tvm._ffi.get_global_func("tvm.contrib.cudnn.conv.output_shape") func(tensor_format, dims - 2, _get_np_int32_array_handle(pad), @@ -276,7 +277,7 @@ def conv_find_algo(tensor_format, pad, stride, dilation, xshape, wshape = \ _prepare_global_func_params(dims - 2, pad, stride, dilation, x_shape, w_shape) yshape = np.array(y_shape, dtype=np.int32) - func = _get_global_func("tvm.contrib.cudnn.conv.find_algo") + func = tvm._ffi.get_global_func("tvm.contrib.cudnn.conv.find_algo") return func(tensor_format, dims - 2, _get_np_int32_array_handle(pad), @@ -363,7 +364,7 @@ def conv_forward(x, conv_dtype) if dims == 4: - return _api.extern( + return te.extern( oshape, [x, w], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.cudnn.conv2d.forward", @@ -381,7 +382,7 @@ def conv_forward(x, outs[0], conv_dtype), name="y") - return _api.extern( + return te.extern( oshape, [x, w], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.cudnn.conv3d.forward", diff --git a/python/tvm/contrib/debugger/debug_result.py b/python/tvm/contrib/debugger/debug_result.py index 26c16e3..18920c6 100644 --- a/python/tvm/contrib/debugger/debug_result.py +++ b/python/tvm/contrib/debugger/debug_result.py @@ -21,6 +21,7 @@ import os import numpy as np import tvm + GRAPH_DUMP_FILE_NAME = '_tvmdbg_graph_dump.json' CHROME_TRACE_FILE_NAME = "_tvmdbg_execution_trace.json" diff --git a/python/tvm/contrib/miopen.py b/python/tvm/contrib/miopen.py index 7f024f7..04e35de 100644 --- a/python/tvm/contrib/miopen.py +++ b/python/tvm/contrib/miopen.py @@ -19,8 +19,9 @@ import ctypes import numpy as np import tvm -from .. import api as _api -from .. import get_global_func as _get_global_func +import tvm._ffi + +from tvm import te def _get_np_int32_array_handle(arr): @@ -91,7 +92,7 @@ def conv2d_forward(x, oshape = np.zeros((len(x.shape)), dtype=np.int32) xshape = x.shape wshape = w.shape - setup_func = _get_global_func("tvm.contrib.miopen.conv2d.setup") + setup_func = tvm._ffi.get_global_func("tvm.contrib.miopen.conv2d.setup") algo = setup_func(conv_mode, data_type, pad_h, @@ -111,7 +112,7 @@ def conv2d_forward(x, group_count, _get_np_int32_array_handle(oshape)) - return _api.extern( + return te.extern( list(oshape), [x, w], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.miopen.conv2d.forward", diff --git a/python/tvm/contrib/mps.py b/python/tvm/contrib/mps.py index 5d84e89..8f310b0 100644 --- a/python/tvm/contrib/mps.py +++ b/python/tvm/contrib/mps.py @@ -16,7 +16,8 @@ # under the License. """External function interface to MPS libraries.""" import tvm -from .. import api as _api +from tvm import te + # pylint: disable=C0103,W0612 @@ -47,7 +48,7 @@ def matmul(lhs, rhs, transa=False, transb=False): m = b if transb: n = c - return _api.extern( + return te.extern( (m, n), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.mps.matmul", ins[0], ins[1], outs[0], transa, transb), @@ -79,7 +80,7 @@ def conv2d(data, weight, pad='SAME', stride=1): ho = hi // stride wo = wi // stride - return _api.extern( + return te.extern( (n, ho, wo, co), [data, weight], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.mps.conv2d", ins[0], ins[1], outs[0], padding, stride), diff --git a/python/tvm/contrib/nnpack.py b/python/tvm/contrib/nnpack.py index a55a344..1ce1dcc 100644 --- a/python/tvm/contrib/nnpack.py +++ b/python/tvm/contrib/nnpack.py @@ -16,8 +16,8 @@ # under the License. """External function interface to NNPACK libraries.""" import tvm +from tvm import te import tvm._ffi -from .. import api as _api def is_available(): @@ -43,7 +43,7 @@ def fully_connected_inference(lhs, rhs, nthreads=1): lhs 1D array out[output_channels] of FP32 elements. """ m = rhs.shape[0] - return _api.extern( + return te.extern( (m, ), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.nnpack.fully_connected_inference", @@ -100,13 +100,13 @@ def convolution_inference( assert isinstance(stride, list) and len(stride) == 2 batch, _, input_height, input_width = data.shape output_channels, _, kernel_height, kernel_width = kernel.shape - idxdiv = _api.indexdiv + idxdiv = te.indexdiv output_height = idxdiv( input_height + padding[0] + padding[1] - kernel_height, stride[0]) + 1 output_width = idxdiv( input_width + padding[0] + padding[1] - kernel_width, stride[1]) + 1 - return _api.extern( + return te.extern( (batch, output_channels, output_height, output_width), [data, kernel, bias] if bias is not None else [data, kernel], lambda ins, outs: tvm.tir.call_packed( @@ -155,11 +155,11 @@ def convolution_inference_without_weight_transform( batch, _, input_height, input_width = data.shape output_channels, _, _, _ = transformed_kernel.shape kernel_height, kernel_width = (3, 3) - idxdiv = _api.indexdiv + idxdiv = te.indexdiv output_height = idxdiv(input_height + padding[0] + padding[1] - kernel_height, stride[0]) + 1 output_width = idxdiv(input_width + padding[0] + padding[1] - kernel_width, stride[1]) + 1 - return _api.extern( + return te.extern( (batch, output_channels, output_height, output_width), [data, transformed_kernel, bias] if bias is not None else [data, transformed_kernel], lambda ins, outs: tvm.tir.call_packed( @@ -194,7 +194,7 @@ def convolution_inference_weight_transform( transform_tile_size = 8 if not isinstance(dtype, str): dtype = dtype.dtype - return _api.extern( + return te.extern( (output_channels, input_channels, transform_tile_size, transform_tile_size), [kernel], lambda ins, outs: tvm.tir.call_packed( diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py index 8712f73..fc82320 100644 --- a/python/tvm/contrib/nvcc.py +++ b/python/tvm/contrib/nvcc.py @@ -21,10 +21,11 @@ from __future__ import absolute_import as _abs import subprocess import os import warnings + +import tvm._ffi from tvm.runtime import ndarray as nd from . import util -from ..api import register_func from .._ffi.base import py_str def compile_cuda(code, @@ -152,7 +153,7 @@ def get_cuda_version(cuda_path): raise RuntimeError("Cannot read cuda version file") -@register_func("tvm_callback_libdevice_path") +@tvm._ffi.register_func("tvm_callback_libdevice_path") def find_libdevice_path(arch): """Utility function to find libdevice diff --git a/python/tvm/contrib/peak.py b/python/tvm/contrib/peak.py index bc93afb..2906410 100644 --- a/python/tvm/contrib/peak.py +++ b/python/tvm/contrib/peak.py @@ -19,6 +19,7 @@ import logging import tvm +from tvm import te from . import util from .. import rpc @@ -79,17 +80,17 @@ def measure_bandwidth_sum(total_item, item_per_thread, stride, base_type = str(base_type) + str(bits) dtype = base_type if lanes == 1 else base_type + "x" + str(lanes) - k = tvm.reduce_axis((0, m), name="k") + k = te.reduce_axis((0, m), name="k") - x = tvm.placeholder((n,), dtype=dtype, name="x") - op = tvm.comm_reducer(lambda x, y: x*y, lambda t: tvm.const(1, dtype=t), name="sum") - y = tvm.compute((n // m,), - lambda i: op(x[i // stride * stride * m + i % stride + k * stride], axis=k)) - s = tvm.create_schedule(y.op) + x = te.placeholder((n,), dtype=dtype, name="x") + op = te.comm_reducer(lambda x, y: x*y, lambda t: tvm.tir.const(1, dtype=t), name="sum") + y = te.compute((n // m,), + lambda i: op(x[i // stride * stride * m + i % stride + k * stride], axis=k)) + s = te.create_schedule(y.op) yo, yi = s[y].split(y.op.axis[0], target.max_num_threads) - s[y].bind(yo, tvm.thread_axis("blockIdx.x")) - s[y].bind(yi, tvm.thread_axis("threadIdx.x")) + s[y].bind(yo, te.thread_axis("blockIdx.x")) + s[y].bind(yi, te.thread_axis("threadIdx.x")) s[y].unroll(k) try: @@ -207,10 +208,10 @@ def measure_compute_mad(total_item, item_per_thread, base_type, bits, lanes, def extern(ins, outs): # pylint: disable=unused-argument """construct measurement function by building IR directly""" - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() - bx = tvm.thread_axis("blockIdx.x") - tx = tvm.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") ib.scope_attr(bx, "thread_extent", n // max_threads) ib.scope_attr(tx, "thread_extent", max_threads) @@ -235,8 +236,8 @@ def measure_compute_mad(total_item, item_per_thread, base_type, bits, lanes, ib.emit(outs[0].vstore(idx, b[0])) return ib.get() - y = tvm.extern((n,), [], extern, name="y", dtype=dtype) - s = tvm.create_schedule(y.op) + y = te.extern((n,), [], extern, name="y", dtype=dtype) + s = te.create_schedule(y.op) try: func = tvm.build(s, [y], target, target_host=target_host) diff --git a/python/tvm/contrib/random.py b/python/tvm/contrib/random.py index bcc9b17..727b68b 100644 --- a/python/tvm/contrib/random.py +++ b/python/tvm/contrib/random.py @@ -16,8 +16,8 @@ # under the License. """External function interface to random library.""" import tvm +from tvm import te import tvm._ffi -from .. import api as _api def randint(low, high, size, dtype='int32'): @@ -38,7 +38,7 @@ def randint(low, high, size, dtype='int32'): A tensor with specified size and dtype """ assert 'int' in dtype, "the type of randint output must be int or uint" - return _api.extern(size, [], lambda ins, outs: tvm.tir.call_packed( + return te.extern(size, [], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.random.randint", int(low), int(high), outs[0]), dtype=dtype) @@ -66,7 +66,7 @@ def uniform(low, high, size): out : Tensor A tensor with specified size and dtype. """ - return _api.extern(size, [], lambda ins, outs: tvm.tir.call_packed( + return te.extern(size, [], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.random.uniform", float(low), float(high), outs[0]), dtype='float32') @@ -90,7 +90,7 @@ def normal(loc, scale, size): out : Tensor A tensor with specified size and dtype """ - return _api.extern(size, [], lambda ins, outs: tvm.tir.call_packed( + return te.extern(size, [], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.random.normal", float(loc), float(scale), outs[0]), dtype='float32') diff --git a/python/tvm/contrib/rocblas.py b/python/tvm/contrib/rocblas.py index e11be5a..86ffaea 100644 --- a/python/tvm/contrib/rocblas.py +++ b/python/tvm/contrib/rocblas.py @@ -16,7 +16,8 @@ # under the License. """External function interface to rocBLAS libraries.""" import tvm -from .. import api as _api +from tvm import te + def matmul(lhs, rhs, transa=False, transb=False): """Create an extern op that compute matrix mult of A and rhs with rocBLAS @@ -39,7 +40,7 @@ def matmul(lhs, rhs, transa=False, transb=False): """ n = lhs.shape[1] if transa else lhs.shape[0] m = rhs.shape[0] if transb else rhs.shape[1] - return _api.extern( + return te.extern( (n, m), [lhs, rhs], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.rocblas.matmul", diff --git a/python/tvm/contrib/rocm.py b/python/tvm/contrib/rocm.py index e5cebdd..7d4b4a2 100644 --- a/python/tvm/contrib/rocm.py +++ b/python/tvm/contrib/rocm.py @@ -18,11 +18,13 @@ import subprocess from os.path import join, exists +import tvm._ffi from tvm._ffi.base import py_str +import tvm.runtime import tvm.target from . import util -from ..api import register_func, convert + def find_lld(required=True): """Find ld.lld in system. @@ -85,7 +87,7 @@ def rocm_link(in_file, out_file, lld=None): raise RuntimeError(msg) -@register_func("tvm_callback_rocm_link") +@tvm._ffi.register_func("tvm_callback_rocm_link") def callback_rocm_link(obj_bin): """Links object file generated from LLVM to HSA Code Object @@ -108,7 +110,7 @@ def callback_rocm_link(obj_bin): cobj_bin = bytearray(open(tmp_cobj, "rb").read()) return cobj_bin -@register_func("tvm_callback_rocm_bitcode_path") +@tvm._ffi.register_func("tvm_callback_rocm_bitcode_path") def callback_rocm_bitcode_path(rocdl_dir="/opt/rocm/lib/"): """Utility function to find ROCm device library bitcodes @@ -138,4 +140,4 @@ def callback_rocm_bitcode_path(rocdl_dir="/opt/rocm/lib/"): "oclc_wavefrontsize64_on.amdgcn.bc" ] paths = [join(rocdl_dir, bitcode) for bitcode in bitcode_files] - return convert([path for path in paths if exists(path)]) + return tvm.runtime.convert([path for path in paths if exists(path)]) diff --git a/python/tvm/contrib/sdaccel.py b/python/tvm/contrib/sdaccel.py index 1234d54..3f9bf43 100644 --- a/python/tvm/contrib/sdaccel.py +++ b/python/tvm/contrib/sdaccel.py @@ -17,11 +17,12 @@ """Utility for Interacting with SDAccel Tools""" import subprocess import os + +import tvm._ffi from . import util -from ..api import register_func -@register_func("tvm_callback_sdaccel_compile") +@tvm._ffi.register_func("tvm_callback_sdaccel_compile") def compile_vhls(kernel_info, device_name): """Compile Vivado HLS code for SDAccel. diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py index 966e180..77f84b1 100644 --- a/python/tvm/contrib/sparse.py +++ b/python/tvm/contrib/sparse.py @@ -18,10 +18,9 @@ # pylint: disable=invalid-name import numpy as _np from tvm.runtime import ndarray as _nd - -from .. import expr as _expr -from .. import api as _api -from .. import tensor as _tensor +from tvm import te +from tvm.tir import expr as _expr +from tvm.te import tensor as _tensor float32 = "float32" @@ -136,9 +135,9 @@ class CSRPlaceholderOp(SparsePlaceholderOp): """ SparsePlaceholderOp.__init__(self, shape, nonzeros, dtype, name) self.stype = 'csr' - self.data = _api.placeholder((nonzeros,), dtype=dtype, name=self.name+'_data') - self.indices = _api.placeholder((nonzeros,), dtype=itype, name=self.name+'_indices') - self.indptr = _api.placeholder((self.shape[0]+1,), dtype=itype, name=self.name+'_indptr') + self.data = te.placeholder((nonzeros,), dtype=dtype, name=self.name+'_data') + self.indices = te.placeholder((nonzeros,), dtype=itype, name=self.name+'_indices') + self.indptr = te.placeholder((self.shape[0]+1,), dtype=itype, name=self.name+'_indptr') assert isinstance(self.data, _tensor.Tensor) assert isinstance(self.indices, _tensor.Tensor) assert isinstance(self.indptr, _tensor.Tensor) diff --git a/python/tvm/contrib/tedd.py b/python/tvm/contrib/tedd.py index f15b7d4..68e15f2 100644 --- a/python/tvm/contrib/tedd.py +++ b/python/tvm/contrib/tedd.py @@ -282,7 +282,7 @@ def dump_json(sch, need_range): def encode_itervar_relation(obj_manager, rel): """Extract and encode IterVar Relationship visualization data to a dictionary""" rel_type = type(rel) - if rel_type is tvm.schedule.Split: + if rel_type is tvm.te.schedule.Split: node_type = 'Split_Relation' rel_dict = { "type": node_type, @@ -290,7 +290,7 @@ def dump_json(sch, need_range): "outer": obj_manager.get_dom_path(rel.outer), "inner": obj_manager.get_dom_path(rel.inner), } - elif rel_type is tvm.schedule.Fuse: + elif rel_type is tvm.te.schedule.Fuse: node_type = 'Fuse_Relation' rel_dict = { "type": node_type, @@ -298,7 +298,7 @@ def dump_json(sch, need_range): "outer": obj_manager.get_dom_path(rel.outer), "inner": obj_manager.get_dom_path(rel.inner), } - elif rel_type is tvm.schedule.Singleton: + elif rel_type is tvm.te.schedule.Singleton: node_type = 'Singleton_Relation' rel_dict = { "type": node_type, @@ -377,12 +377,12 @@ def dump_json(sch, need_range): dict : dictionary A nested dictionary """ - assert isinstance(sch, tvm.schedule.Schedule - ), 'Input is not a tvm.schedule.Schedule object.' + assert isinstance(sch, tvm.te.schedule.Schedule + ), 'Input is not a tvm.te.schedule.Schedule object.' range_map = None if need_range: try: - range_map = tvm.schedule.InferBound(sch) + range_map = tvm.te.schedule.InferBound(sch) except tvm._ffi.base.TVMError as expt: warnings.warn( 'Ranges are not available, because InferBound fails with the following error:\n' diff --git a/python/tvm/driver/build_module.py b/python/tvm/driver/build_module.py index f529ee2..67eb224 100644 --- a/python/tvm/driver/build_module.py +++ b/python/tvm/driver/build_module.py @@ -89,7 +89,7 @@ def form_body(sch): """According to the given schedule, form the raw body Parameters ---------- - sch : tvm.schedule.Schedule + sch : tvm.te.schedule.Schedule The given scheduler to form the raw body Returns @@ -113,7 +113,7 @@ def lower(sch, Parameters ---------- - sch : tvm.schedule.Schedule + sch : tvm.te.schedule.Schedule The schedule to be built args : list of Buffer or Tensor or Var @@ -286,7 +286,7 @@ def build(inputs, Parameters ---------- - inputs : tvm.Schedule, LoweredFunc, or dict of target to LoweredFunc list + inputs : tvm.te.Schedule, LoweredFunc, or dict of target to LoweredFunc list The schedule to be built args : list of Buffer or Tensor or Var, optional @@ -325,10 +325,10 @@ def build(inputs, .. code-block:: python n = 2 - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = tvm.te.create_schedule(C.op) f = tvm.lower(s, [A, B, C], name="test_add") m = tvm.build(f, target="llvm") @@ -337,10 +337,10 @@ def build(inputs, .. code-block:: python n = 2 - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - s1 = tvm.create_schedule(C.op) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s1 = tvm.te.create_schedule(C.op) with tvm.target.cuda() as cuda_tgt: s2 = topi.cuda.schedule_injective(cuda_tgt, [C]) f1 = tvm.lower(s1, [A, B, C], name="test_add1") diff --git a/python/tvm/hybrid/calls.py b/python/tvm/hybrid/calls.py index 0933628..5b5c34d 100644 --- a/python/tvm/hybrid/calls.py +++ b/python/tvm/hybrid/calls.py @@ -16,6 +16,9 @@ # under the License. """Intrinsics of TVM-Python Hybrid Script for Python compilation time semantic support.""" + +from tvm.runtime import const, convert +import tvm.te from tvm.ir.container import Array from tvm import target as _tgt from tvm.tir import expr as _expr @@ -23,8 +26,6 @@ from tvm.tir import ir_pass from tvm.tir import call_pure_intrin from tvm.tir.stmt import For -from .. import api as _api - from .util import _internal_assert # pylint: disable=redefined-builtin @@ -42,11 +43,11 @@ def _range(annotation, args): """Handling TVM loop types""" n = args.__len__() if n == 1: - low, ext = _api.const(0, dtype='int32'), args[0] + low, ext = const(0, dtype='int32'), args[0] else: _internal_assert(n == 2, "A loop intrinsic should only have 1 or 2 arguments!") low, ext = args[0], args[1] - if not ir_pass.Equal(low, _api.const(0, dtype='int32')): + if not ir_pass.Equal(low, const(0, dtype='int32')): ext = ext - low for_type = LOOP_INTRIN[annotation] iter_var = None @@ -62,16 +63,16 @@ def bind(func_id, args): _internal_assert(args.__len__() == 2, "A loop bind should only have 2 arguments!") _internal_assert(isinstance(args[0], str), \ "A loop bind's first argument should be a string!") - low, ext = _api.const(0, "int32"), args[1] - iter_var = _api.thread_axis((low, ext), args[0]) + low, ext = const(0, "int32"), args[1] + iter_var = tvm.te.thread_axis((low, ext), args[0]) for_type = None return iter_var, low, ext, for_type def _math_intrin(func_id, args): # pylint: disable=import-outside-toplevel - import tvm.tir.op - return getattr(tvm.tir.op, func_id)(*args) + from tvm.tir import op + return getattr(op, func_id)(*args) sqrt = log = exp = tanh = sigmoid = power = popcount = _math_intrin #pylint: disable=invalid-name @@ -88,7 +89,7 @@ def _allocate_tensor(func_id, args): """Handling TVM tensor allocation. You may refer hybrid.intrin.allocate for more details.""" n = args.__len__() - _internal_assert(isinstance(_api.convert(args[0]), Array), \ + _internal_assert(isinstance(convert(args[0]), Array), \ "allocate's first argument should be a tuple of shape!") shape = args[0] for i in shape: @@ -119,10 +120,10 @@ def len(func_id, args): _internal_assert(args.__len__() == 1, "Only 1 argument is expected!") _internal_assert(func_id == "len", "This function cannot be directly invoked!") try: - return _api.convert(args[0].__len__()) + return convert(args[0].__len__()) except: #pylint: disable=bare-except _internal_assert(args[0].shape.__len__() == 1, "Only one-dimension array can get len") - return _api.convert(args[0].shape[0]) + return convert(args[0].shape[0]) def _cast(func_id, args): @@ -159,4 +160,4 @@ def max_num_threads(func_id, args): else: _internal_assert(isinstance(args[0], _expr.IntImm), "In tvm bool should be uint") res = _tgt.Target.current(args[0].value).max_num_threads - return _api.convert(res) + return convert(res) diff --git a/python/tvm/hybrid/parser.py b/python/tvm/hybrid/parser.py index cf8584a..0f8f3dd 100644 --- a/python/tvm/hybrid/parser.py +++ b/python/tvm/hybrid/parser.py @@ -25,7 +25,9 @@ import numbers from enum import Enum from tvm.ir import Array, Range +import tvm.runtime import tvm.tir +import tvm.te import tvm.te._ffi_api from tvm.tir import expr as _expr @@ -40,8 +42,6 @@ from . import calls from . import util from .preprocessor import determine_variable_usage -from .. import api as _api - def concat_list_to_block(lst): """Concatenate a list of Python IR nodes to HalideIR Block""" @@ -125,7 +125,7 @@ class HybridParser(ast.NodeVisitor): """ Parameters ---------- - args: A list of tvm.placeholder or tvm.var + args: A list of tvm.te.placeholder or te.var Provided by the user, the argument list of the function to be lowered. usage: A dict of variables used in last in this function @@ -210,9 +210,9 @@ class HybridParser(ast.NodeVisitor): _domain = [Range.make_by_min_extent(0, i) for i in _buf.shape] _dtype = _buf.dtype - _true = _api.convert(True) + _true = tvm.runtime.convert(True) body = tvm.tir.Realize(_buf.op, 0, _dtype, _domain, _true, body) - body = tvm.tir.AttrStmt(_buf.op, 'realize_scope', _api.convert(_scope), body) + body = tvm.tir.AttrStmt(_buf.op, 'realize_scope', tvm.runtime.convert(_scope), body) for elem in to_pop: self.symbols.pop(elem) @@ -256,10 +256,10 @@ class HybridParser(ast.NodeVisitor): def visit_Name(self, node): name = node.id if sys.version_info[0] == 2 and name in ['True', 'False']: - return _api.convert(ast.literal_eval(name)) + return tvm.runtime.convert(ast.literal_eval(name)) if name in self.closure_vars: - return _api.convert(self.closure_vars[name]) + return tvm.runtime.convert(self.closure_vars[name]) ty, entry = self.symbols[name] _internal_assert(name in self.symbols, "Unknown symbol %s!" % name) @@ -271,9 +271,9 @@ class HybridParser(ast.NodeVisitor): return entry if isinstance(node.ctx, ast.Load) else None if ty is Symbol.BufferVar: if isinstance(node.ctx, ast.Load): - return tvm.tir.Call(entry.dtype, entry.name, [_api.const(0, 'int32')], \ + return tvm.tir.Call(entry.dtype, entry.name, [tvm.runtime.const(0, 'int32')], \ _expr.Call.Halide, entry.op, entry.value_index) - return entry, [_api.const(0, 'int32')] + return entry, [tvm.runtime.const(0, 'int32')] # Do I need any assertion here? return entry @@ -287,11 +287,11 @@ class HybridParser(ast.NodeVisitor): _internal_assert(isinstance(node.n, bool), "The data type should be one of (int, float, bool)") dtype = "bool" - return _api.const(node.n, dtype) + return tvm.runtime.const(node.n, dtype) def visit_NameConstant(self, node): - return _api.convert(node.value) + return tvm.runtime.convert(node.value) def visit_AugAssign(self, node): @@ -301,7 +301,7 @@ class HybridParser(ast.NodeVisitor): _internal_assert(len(buf) == 2, "LHS is supposed to be (buf, args)!") buf, args = buf else: - args = [_api.const(0, 'int32')] + args = [tvm.runtime.const(0, 'int32')] _internal_assert(isinstance(buf, Tensor), "LHS is supposed to be Tensor!") read = tvm.tir.Call(buf.dtype, buf.name, args, _expr.Call.Halide, buf.op, buf.value_index) @@ -341,7 +341,7 @@ class HybridParser(ast.NodeVisitor): "This value should not be defined before this point!") if isinstance(rhs, tuple): shape, dtype, scope = rhs - ph = _api.placeholder(shape, dtype=dtype, name=lhs) + ph = tvm.te.placeholder(shape, dtype=dtype, name=lhs) self.add_symbol(lhs, getattr(Symbol, scope.title() + "Buffer"), ph) if scope == 'output': self.outputs.append(lhs) @@ -353,7 +353,7 @@ class HybridParser(ast.NodeVisitor): "Single variable not supported in devices' side!\n" + \ "If you are using GPU, please allocate a 'local' spad " + \ "outside the bind body") - ph = _api.placeholder((1, ), dtype=rhs.dtype, name=lhs) + ph = tvm.te.placeholder((1, ), dtype=rhs.dtype, name=lhs) self.add_symbol(lhs, Symbol.BufferVar, ph) lhs = self.visit(lhs_) if lhs is not None: @@ -524,8 +524,8 @@ class HybridParser(ast.NodeVisitor): if iter_var is None: _internal_assert(for_type is not None, "The loop iterating function parse error!") - offset = iter_var = _api.var(_name) - if not _ir_pass.Equal(low, _api.const(0, 'int32')): + offset = iter_var = tvm.te.var(_name) + if not _ir_pass.Equal(low, tvm.runtime.const(0, 'int32')): offset = iter_var + low self.add_symbol(_name, Symbol.LoopVar, offset) _body = visit_list_to_block(self.visit, node.body) @@ -543,7 +543,7 @@ class HybridParser(ast.NodeVisitor): else: _internal_assert(not isinstance(for_type, tuple), \ "Micro expansion should be handled before!") - res = tvm.tir.For(iter_var, _api.const(0, 'int32'), ext, for_type, 0, _body) + res = tvm.tir.For(iter_var, tvm.runtime.const(0, 'int32'), ext, for_type, 0, _body) self.symbols.pop(_name) return res @@ -579,7 +579,7 @@ class HybridParser(ast.NodeVisitor): def visit_Assert(self, node): test = self.visit(node.test) - mesg = _api.convert(self.visit(node.msg)) + mesg = tvm.runtime.convert(self.visit(node.msg)) return tvm.tir.AssertStmt(test, mesg, util.make_nop()) diff --git a/python/tvm/hybrid/util.py b/python/tvm/hybrid/util.py index 2b67956..6c01989 100644 --- a/python/tvm/hybrid/util.py +++ b/python/tvm/hybrid/util.py @@ -22,6 +22,7 @@ import logging import sys import numpy +import tvm.runtime from tvm._ffi.base import numeric_types from tvm.ir.container import Array @@ -29,8 +30,6 @@ from tvm.tir import expr as _expr from tvm.tir import stmt as _stmt from tvm.te.tensor import Tensor -from .. import api as _api - #pylint: disable=invalid-name np_arg_types = tuple(list(numeric_types) + [numpy.ndarray]) @@ -47,7 +46,7 @@ def _internal_assert(cond, err): # Useful constants. In avoid of runtime dependences, we use function calls to return them. def make_nop(): """Returns a 'no operation' node in HalideIR.""" - return _stmt.Evaluate(_api.const(0, dtype='int32')) + return _stmt.Evaluate(tvm.runtime.const(0, dtype='int32')) def is_docstring(node): @@ -73,7 +72,7 @@ def _pruned_source(func): def replace_io(body, rmap): """Replacing tensors usage according to the dict given""" # pylint: disable=import-outside-toplevel - from .. import ir_pass + from tvm.tir import ir_pass def replace(op): if isinstance(op, _stmt.Provide) and op.func in rmap.keys(): diff --git a/python/tvm/intrin.py b/python/tvm/intrin.py deleted file mode 100644 index 93e8fcb..0000000 --- a/python/tvm/intrin.py +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# pylint:disable=unused-wildcard-import, wildcard-import, redefined-builtin -"""Backwared compatible layer for intrin.""" -from .tir.op import * diff --git a/python/tvm/make.py b/python/tvm/make.py deleted file mode 100644 index 089c393..0000000 --- a/python/tvm/make.py +++ /dev/null @@ -1,52 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# pylint: disable=unused-import -"""namespace of IR node builder make function - -This namespace is used for developers. While you do not see any declarations. -The functions are automatically exported from C++ side via PackedFunc. - -Each api is a PackedFunc that can be called in a positional argument manner. -You can use make function to build the IR node. -""" -import tvm._ffi -import tvm.ir -from tvm.ir import make_node as node -from tvm.tir import Call - - -def make_by_min_extent(min_value, extent): - """Construct a Range by min and extent. - - This constructs a range in [min_value, min_value + extent) - - Parameters - ---------- - min_value : PrimExpr - The minimum value of the range. - - extent : PrimExpr - The extent of the range. - - Returns - ------- - rng : Range - The constructed range. - """ - return tvm.ir.Range.make_by_min_extent(min_value, extent) - -tvm._ffi._init_api("tvm.make") diff --git a/python/tvm/relay/__init__.py b/python/tvm/relay/__init__.py index 2ad210e..f4a7c75 100644 --- a/python/tvm/relay/__init__.py +++ b/python/tvm/relay/__init__.py @@ -18,7 +18,7 @@ """The Relay IR namespace containing the IR definition and compiler.""" import os from sys import setrecursionlimit -from ..api import register_func + from . import call_graph from . import base from . import ty diff --git a/python/tvm/relay/backend/_backend.py b/python/tvm/relay/backend/_backend.py index 9169ef4..df0347b 100644 --- a/python/tvm/relay/backend/_backend.py +++ b/python/tvm/relay/backend/_backend.py @@ -26,10 +26,10 @@ def lower(sch, inputs, func_name, source_func): Parameters ---------- - sch : tvm.Schedule + sch : tvm.te.Schedule The schedule. - inputs : List[tvm.Tensor] + inputs : List[tvm.te.Tensor] The inputs to the function. func_name : str diff --git a/python/tvm/relay/backend/compile_engine.py b/python/tvm/relay/backend/compile_engine.py index 6466dff..a51e4f7 100644 --- a/python/tvm/relay/backend/compile_engine.py +++ b/python/tvm/relay/backend/compile_engine.py @@ -21,6 +21,7 @@ from __future__ import absolute_import import logging import numpy as np import tvm +from tvm import te from ..base import register_relay_node, Object from ... import target as _target from ... import autotvm @@ -79,12 +80,12 @@ def get_shape(shape): """Convert the shape to correct dtype and vars.""" ret = [] for dim in shape: - if isinstance(dim, tvm.expr.IntImm): + if isinstance(dim, tvm.tir.IntImm): val = int(dim) assert val <= np.iinfo(np.int32).max - ret.append(tvm.expr.IntImm("int32", val)) - elif isinstance(dim, tvm.expr.Any): - ret.append(tvm.var("any_dim", "int32")) + ret.append(tvm.tir.IntImm("int32", val)) + elif isinstance(dim, tvm.tir.Any): + ret.append(te.var("any_dim", "int32")) else: ret.append(dim) return ret @@ -103,7 +104,7 @@ def get_valid_implementations(op, attrs, inputs, out_type, target): attrs : object The op attribute. - inputs : List[tvm.Tensor] + inputs : List[tvm.te.Tensor] Input tensors to the op. out_type : relay.Type @@ -129,7 +130,7 @@ def get_valid_implementations(op, attrs, inputs, out_type, target): flag = True for clause in spec.condition.clauses: clause = analyzer.canonical_simplify(clause) - if isinstance(clause, tvm.expr.IntImm) and clause.value: + if isinstance(clause, tvm.tir.IntImm) and clause.value: continue flag = False break @@ -162,7 +163,7 @@ def select_implementation(op, attrs, inputs, out_type, target, use_autotvm=True) attrs : object The op attribute. - inputs : List[tvm.Tensor] + inputs : List[tvm.te.Tensor] Input tensors to the op. out_type : relay.Type @@ -176,7 +177,7 @@ def select_implementation(op, attrs, inputs, out_type, target, use_autotvm=True) Returns ------- - ret : tuple(relay.op.OpImplementation, List[tvm.Tensor]) + ret : tuple(relay.op.OpImplementation, List[tvm.te.Tensor]) The best op implementation and the corresponding output tensors. """ all_impls = get_valid_implementations(op, attrs, inputs, out_type, target) diff --git a/python/tvm/relay/backend/graph_runtime_codegen.py b/python/tvm/relay/backend/graph_runtime_codegen.py index f58a9b0..762210d 100644 --- a/python/tvm/relay/backend/graph_runtime_codegen.py +++ b/python/tvm/relay/backend/graph_runtime_codegen.py @@ -36,7 +36,7 @@ contrib.graph_runtime or any other TVM runtime compatible systems. from tvm.runtime.ndarray import empty from tvm.relay import _build_module from tvm import target as _target -from tvm import expr as _expr +from tvm.tir import expr as _expr class GraphRuntimeCodegen(object): """The compiler from Relay to the TVM runtime system.""" diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py index 6d9c850..22e0b91 100644 --- a/python/tvm/relay/build_module.py +++ b/python/tvm/relay/build_module.py @@ -23,7 +23,7 @@ import numpy as np from tvm.ir import IRModule -from tvm import expr as tvm_expr +from tvm.tir import expr as tvm_expr from .. import nd as _nd, target as _target, autotvm from ..contrib import graph_runtime as _graph_rt from . import _build_module diff --git a/python/tvm/relay/debug.py b/python/tvm/relay/debug.py index a2f3533..838eab5 100644 --- a/python/tvm/relay/debug.py +++ b/python/tvm/relay/debug.py @@ -16,22 +16,20 @@ # under the License. # pylint: disable=wildcard-import, redefined-builtin, invalid-name """The Relay IR namespace containing the IR definition and compiler.""" -from __future__ import absolute_import -from ..api import register_func - +import tvm._ffi # pylint: disable=unused-argument, import-outside-toplevel def _debugger_init(expr, stack): import pdb pdb.set_trace() -@register_func("relay.debug") +@tvm._ffi.register_func("relay.debug") def _debug(*args): import pdb pdb.set_trace() # pylint: disable=unused-argument -@register_func("relay.debug_interp") +@tvm._ffi.register_func("relay.debug_interp") def _debug_interp(*args): _, _, _, ist = args print("Relay Debugger") diff --git a/python/tvm/relay/frontend/coreml.py b/python/tvm/relay/frontend/coreml.py index 99a3930..0e5b64c 100644 --- a/python/tvm/relay/frontend/coreml.py +++ b/python/tvm/relay/frontend/coreml.py @@ -17,7 +17,6 @@ # pylint: disable=invalid-name, import-self, unused-argument, unused-variable # pylint: disable=inconsistent-return-statements, import-outside-toplevel """CoreML frontend.""" -from __future__ import absolute_import as _abs import math import numpy as np import tvm diff --git a/python/tvm/relay/frontend/darknet.py b/python/tvm/relay/frontend/darknet.py index 7623df2..0dae645 100644 --- a/python/tvm/relay/frontend/darknet.py +++ b/python/tvm/relay/frontend/darknet.py @@ -19,7 +19,6 @@ DarkNet symbol frontend for Relay. """ -from __future__ import absolute_import as _abs from enum import Enum import numpy as np import tvm diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py index d74277b..2787cd6 100644 --- a/python/tvm/relay/frontend/mxnet.py +++ b/python/tvm/relay/frontend/mxnet.py @@ -16,8 +16,6 @@ # under the License. # pylint: disable=invalid-name, import-self, len-as-condition, no-else-return, too-many-lines """MXNet symbol frontend.""" -from __future__ import absolute_import as _abs - import json import numpy as np import tvm diff --git a/python/tvm/relay/frontend/pytorch.py b/python/tvm/relay/frontend/pytorch.py index af8715a..0b766a1 100644 --- a/python/tvm/relay/frontend/pytorch.py +++ b/python/tvm/relay/frontend/pytorch.py @@ -406,7 +406,7 @@ def _numtotensor(): val = inputs[0] dtype = type(val) - if isinstance(val, tvm.expr.IntImm): + if isinstance(val, tvm.tir.IntImm): val = val.__int__() dtype = int diff --git a/python/tvm/relay/frontend/tensorflow.py b/python/tvm/relay/frontend/tensorflow.py index 5532e3a..6f27d73 100644 --- a/python/tvm/relay/frontend/tensorflow.py +++ b/python/tvm/relay/frontend/tensorflow.py @@ -18,9 +18,6 @@ # pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition, broad-except # pylint: disable=import-outside-toplevel """TF: Tensorflow frontend.""" -from __future__ import absolute_import as _abs -from __future__ import print_function - import warnings from collections import defaultdict @@ -1012,7 +1009,7 @@ def _gather(): 'Attribute batch_dims is not supported') new_input = inputs[0:2] return AttrCvt(op_name="take", - extras={'axis': tvm.const(axis, 'int32')}, + extras={'axis': tvm.tir.const(axis, 'int32')}, ignores=['Tindices', 'Tparams', 'validate_indices', 'Taxis', '_class', 'batch_dims'])(new_input, attr) return _impl diff --git a/python/tvm/relay/frontend/tflite.py b/python/tvm/relay/frontend/tflite.py index e132d4c..3a17083 100644 --- a/python/tvm/relay/frontend/tflite.py +++ b/python/tvm/relay/frontend/tflite.py @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. # pylint: disable=invalid-name, unused-argument, too-many-lines, import-outside-toplevel - """Tensorflow lite frontend.""" import math import numpy as np diff --git a/python/tvm/relay/op/_reduce.py b/python/tvm/relay/op/_reduce.py index 9d52ed3..ab8b7c2 100644 --- a/python/tvm/relay/op/_reduce.py +++ b/python/tvm/relay/op/_reduce.py @@ -17,9 +17,9 @@ """Backend compiler related feature registration""" from __future__ import absolute_import +from tvm.runtime import convert from topi.util import get_const_int, get_const_tuple from . import op as _reg -from ...api import convert from ...hybrid import script _reg.register_reduce_schedule("argmax") diff --git a/python/tvm/relay/op/_tensor.py b/python/tvm/relay/op/_tensor.py index 7c8ccb7..0fbbaef 100644 --- a/python/tvm/relay/op/_tensor.py +++ b/python/tvm/relay/op/_tensor.py @@ -16,14 +16,14 @@ # under the License. #pylint: disable=invalid-name, unused-argument, len-as-condition """Backend compiler related feature registration""" -from __future__ import absolute_import import topi + +from tvm.runtime import convert from topi.util import get_const_tuple from .op import register_compute, register_shape_func from .op import register_broadcast_schedule, register_injective_schedule from .op import register_pattern, OpPattern from ...hybrid import script -from ...api import convert register_broadcast_schedule("log") diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index 42c9434..4b35009 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -18,13 +18,14 @@ # pylint: disable=invalid-name,unused-argument, len-as-condition, too-many-nested-blocks, too-many-local-variables, too-many-arguments from __future__ import absolute_import import tvm +from tvm import te +from tvm.runtime import convert import topi from topi.util import get_const_int, get_const_tuple from . import op as _reg from . import strategy from .op import OpPattern from ...hybrid import script -from ...api import convert _reg.register_broadcast_schedule("broadcast_to") _reg.register_broadcast_schedule("broadcast_to_like") @@ -79,7 +80,7 @@ def compute_argwhere(attrs, inputs, output_type): output_shape.append(s) else: # see Any, replace it with a var - output_shape.append(tvm.var("any_dim", "int32")) + output_shape.append(te.var("any_dim", "int32")) new_output_type = tvm.relay.ty.TensorType(output_shape, "int32") return [topi.argwhere(new_output_type, inputs[0])] @@ -473,7 +474,7 @@ def squeeze_shape_func(attrs, inputs, _): if keep_axes: out = _squeeze_shape_func(inputs[0], convert(keep_axes)) else: - out = tvm.compute((), lambda *indices: 0) + out = te.compute((), lambda *indices: 0) return [out] @script diff --git a/python/tvm/relay/op/algorithm.py b/python/tvm/relay/op/algorithm.py index 6f87591..17fab80 100644 --- a/python/tvm/relay/op/algorithm.py +++ b/python/tvm/relay/op/algorithm.py @@ -28,7 +28,7 @@ def argsort(data, axis=-1, is_ascend=1, dtype="int32"): data : relay.Expr The input data tensor. - valid_count : tvm.Tensor + valid_count : tvm.te.Tensor The number of valid elements to be sorted. axis : int, optional diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py index 97a5fa6..a4fde28 100644 --- a/python/tvm/relay/op/nn/_nn.py +++ b/python/tvm/relay/op/nn/_nn.py @@ -20,11 +20,12 @@ from __future__ import absolute_import import topi from topi.util import get_const_tuple + +from tvm.runtime import convert from .. import op as reg from .. import strategy from ..op import OpPattern from .._tensor import elemwise_shape_func -from ....api import convert from ....hybrid import script # relu diff --git a/python/tvm/relay/op/op.py b/python/tvm/relay/op/op.py index 4fd88f4..6be7d4d 100644 --- a/python/tvm/relay/op/op.py +++ b/python/tvm/relay/op/op.py @@ -21,7 +21,6 @@ from tvm.driver import lower, build from ..base import register_relay_node from ..expr import RelayExpr -from ...api import register_func from ...target import get_native_generic_func, GenericFunc from ...runtime import Object from . import _make @@ -155,7 +154,7 @@ class OpImplementation(Object): attrs : Attrs Op attributes. - inputs : list[tvm.tensor.Tensor] + inputs : list[te.tensor.Tensor] The input tensors. out_type : relay.Type @@ -163,7 +162,7 @@ class OpImplementation(Object): Returns ------- - outs : list[tvm.tensor.Tensor] + outs : list[te.tensor.Tensor] The output tensors. """ return _OpImplementationCompute(self, attrs, inputs, out_type) @@ -176,7 +175,7 @@ class OpImplementation(Object): attrs : Attrs Op attributes. - outs : list[tvm.tensor.Tensor] + outs : list[te.tensor.Tensor] The output tensors. target : tvm.target.Target @@ -184,7 +183,7 @@ class OpImplementation(Object): Returns ------- - schedule : tvm.Schedule + schedule : tvm.te.Schedule The schedule. """ return _OpImplementationSchedule(self, attrs, outs, target) @@ -454,11 +453,11 @@ def register_shape_func(op_name, data_dependant, shape_func=None, level=10): get(op_name).set_attr("TShapeDataDependant", data_dependant, level) return register(op_name, "FShapeFunc", shape_func, level) -@register_func("relay.op.compiler._lower") +@tvm._ffi.register_func("relay.op.compiler._lower") def _lower(name, schedule, inputs, outputs): return lower(schedule, list(inputs) + list(outputs), name=name) -@register_func("relay.op.compiler._build") +@tvm._ffi.register_func("relay.op.compiler._build") def _build(lowered_funcs): return build(lowered_funcs, target="llvm") @@ -473,7 +472,7 @@ def debug(expr, debug_func=None): if debug_func: name = "debugger_func{}".format(__DEBUG_COUNTER__) - register_func(name, debug_func) + tvm._ffi.register_func(name, debug_func) __DEBUG_COUNTER__ += 1 else: name = '' diff --git a/python/tvm/relay/param_dict.py b/python/tvm/relay/param_dict.py index 4c3f6d1..b7fee8c 100644 --- a/python/tvm/relay/param_dict.py +++ b/python/tvm/relay/param_dict.py @@ -17,9 +17,11 @@ # pylint: disable=invalid-name """Helper utility to save parameter dicts.""" import tvm +import tvm._ffi -_save_param_dict = tvm.get_global_func("tvm.relay._save_param_dict") -_load_param_dict = tvm.get_global_func("tvm.relay._load_param_dict") + +_save_param_dict = tvm._ffi.get_global_func("tvm.relay._save_param_dict") +_load_param_dict = tvm._ffi.get_global_func("tvm.relay._load_param_dict") def save_param_dict(params): """Save parameter dictionary to binary bytes. diff --git a/python/tvm/relay/quantize/quantize.py b/python/tvm/relay/quantize/quantize.py index be8a3a3..56a4645 100644 --- a/python/tvm/relay/quantize/quantize.py +++ b/python/tvm/relay/quantize/quantize.py @@ -16,12 +16,12 @@ # under the License. #pylint: disable=unused-argument, not-context-manager """Automatic quantization toolkit.""" -from __future__ import absolute_import +import tvm.ir + from . import _quantize from ._calibrate import calibrate from .. import expr as _expr from .. import transform as _transform -from ... import make as _make from ..base import Object, register_relay_node @@ -181,7 +181,7 @@ def qconfig(**kwargs): """ node_args = {k: v if k not in kwargs else kwargs[k] for k, v in QConfig._node_defaults.items()} - return _make.node("relay.quantize.QConfig", **node_args) + return tvm.ir.make_node("relay.quantize.QConfig", **node_args) class QuantizeContext(object): diff --git a/python/tvm/relay/testing/__init__.py b/python/tvm/relay/testing/__init__.py index bff01e8..54c9091 100644 --- a/python/tvm/relay/testing/__init__.py +++ b/python/tvm/relay/testing/__init__.py @@ -20,6 +20,7 @@ from __future__ import absolute_import as _abs import numpy as np import tvm +from tvm import te import tvm.relay as relay import tvm.relay.op as op from tvm.relay import transform diff --git a/python/tvm/relay/testing/config.py b/python/tvm/relay/testing/config.py index 68756e0..93a08db 100644 --- a/python/tvm/relay/testing/config.py +++ b/python/tvm/relay/testing/config.py @@ -20,6 +20,7 @@ from __future__ import absolute_import as _abs import os import tvm + def ctx_list(): """Get context list for testcases""" device_list = os.environ.get("RELAY_TEST_TARGETS", "") diff --git a/python/tvm/relay/transform.py b/python/tvm/relay/transform.py index 08b41b2..45535af 100644 --- a/python/tvm/relay/transform.py +++ b/python/tvm/relay/transform.py @@ -23,6 +23,7 @@ import inspect import functools import tvm +from tvm import te from tvm.runtime import ndarray as _nd from tvm.ir.transform import PassInfo, PassContext, Pass, ModulePass, Sequential, module_pass diff --git a/python/tvm/runtime/vm.py b/python/tvm/runtime/vm.py index 211bee3..2643ff1 100644 --- a/python/tvm/runtime/vm.py +++ b/python/tvm/runtime/vm.py @@ -106,6 +106,7 @@ class Executable(object): import numpy as np import tvm +from tvm import te from tvm import relay # define a simple network. x = relay.var('x', shape=(10, 10)) diff --git a/python/tvm/target/build_config.py b/python/tvm/target/build_config.py index 8782d24..c105175 100644 --- a/python/tvm/target/build_config.py +++ b/python/tvm/target/build_config.py @@ -35,7 +35,7 @@ class DumpIR(object): ----------- .. code-block:: python - with tvm.build_config(dump_pass_ir=True) + with tvm.target.build_config(dump_pass_ir=True) run() """ scope_level = 0 diff --git a/python/tvm/target/generic_func.py b/python/tvm/target/generic_func.py index 1936ff1..bfcd2dd 100644 --- a/python/tvm/target/generic_func.py +++ b/python/tvm/target/generic_func.py @@ -116,6 +116,7 @@ def override_native_generic_func(func_name): .. code-block:: python import tvm +from tvm import te # wrap function as target generic @tvm.target.override_native_generic_func("my_func") def my_func(a): @@ -210,6 +211,7 @@ def generic_func(fdefault): .. code-block:: python import tvm +from tvm import te # wrap function as target generic @tvm.target.generic_func def my_func(a): diff --git a/python/tvm/te/__init__.py b/python/tvm/te/__init__.py index 5970315..065cf4e 100644 --- a/python/tvm/te/__init__.py +++ b/python/tvm/te/__init__.py @@ -18,6 +18,7 @@ """Namespace for Tensor Expression Language """ # expose all operators in tvm tir.op +from tvm.tir import any, all, min_value, max_value, trace from tvm.tir import exp, erf, tanh, sigmoid, log, cos, sin, atan, sqrt, rsqrt, floor, ceil from tvm.tir import trunc, abs, round, nearbyint, isnan, power, popcount, fmod, if_then_else from tvm.tir import div, indexdiv, indexmod, truncdiv, truncmod, floordiv, floormod @@ -29,3 +30,5 @@ from .tensor_intrin import decl_tensor_intrin from .tag import tag_scope from .operation import placeholder, compute, scan, extern, var, size_var from .operation import thread_axis, reduce_axis + +from .tensor import PlaceholderOp, ComputeOp, TensorComputeOp, ScanOp, ExternOp, HybridOp diff --git a/python/tvm/te/operation.py b/python/tvm/te/operation.py index 3c5b610..3ccab5b 100644 --- a/python/tvm/te/operation.py +++ b/python/tvm/te/operation.py @@ -167,13 +167,13 @@ def scan(init, update, state_placeholder, inputs=None, name="scan", tag="", attr .. code-block:: python # The following code is equivalent to numpy.cumsum - m = tvm.var("m") - n = tvm.var("n") - X = tvm.placeholder((m, n), name="X") - s_state = tvm.placeholder((m, n)) - s_init = tvm.compute((1, n), lambda _, i: X[0, i]) - s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) - res = tvm.scan(s_init, s_update, s_state, X) + m = te.var("m") + n = te.var("n") + X = te.placeholder((m, n), name="X") + s_state = te.placeholder((m, n)) + s_init = te.compute((1, n), lambda _, i: X[0, i]) + s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) + res = tvm.te.scan(s_init, s_update, s_state, X) """ if _tag.TagScope.get_current() is not None: if tag != "": @@ -264,10 +264,10 @@ def extern(shape, .. code-block:: python - A = tvm.placeholder((n, l), name="A") - B = tvm.placeholder((l, m), name="B") - C = tvm.extern((n, m), [A, B], - lambda ins, outs: tvm.call_packed( + A = te.placeholder((n, l), name="A") + B = te.placeholder((l, m), name="B") + C = te.extern((n, m), [A, B], + lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.cblas.matmul", ins[0], ins[1], outs[0], 0, 0), name="C") """ diff --git a/python/tvm/te/tag.py b/python/tvm/te/tag.py index 189076d..1022875 100644 --- a/python/tvm/te/tag.py +++ b/python/tvm/te/tag.py @@ -73,19 +73,19 @@ def tag_scope(tag): ------- .. code-block:: python - n = tvm.var('n') - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((n, l), name='A') - B = tvm.placeholder((m, l), name='B') - k = tvm.reduce_axis((0, l), name='k') + n = te.var('n') + m = te.var('m') + l = te.var('l') + A = te.placeholder((n, l), name='A') + B = te.placeholder((m, l), name='B') + k = te.reduce_axis((0, l), name='k') - with tvm.tag_scope(tag='matmul'): - C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k] * B[j, k], axis=k)) + with tvm.te.tag_scope(tag='matmul'): + C = te.compute((n, m), lambda i, j: te.sum(A[i, k] * B[j, k], axis=k)) # or use tag_scope as decorator - @tvm.tag_scope(tag="conv") + @tvm.te.tag_scope(tag="conv") def compute_relu(data): - return tvm.compute(data.shape, lambda *i: tvm.select(data(*i) < 0, 0.0, data(*i))) + return te.compute(data.shape, lambda *i: tvm.select(data(*i) < 0, 0.0, data(*i))) """ return TagScope(tag) diff --git a/python/tvm/tir/__init__.py b/python/tvm/tir/__init__.py index ab78ca6..a5c81ac 100644 --- a/python/tvm/tir/__init__.py +++ b/python/tvm/tir/__init__.py @@ -17,20 +17,22 @@ # pylint: disable=unused-import, redefined-builtin """Namespace for Tensor-level IR""" from tvm.ir import PrimExpr +from tvm.runtime import const + from .buffer import Buffer, decl_buffer from .data_layout import Layout, BijectiveLayout, bijective_layout, layout from .expr import Var, SizeVar, Reduce, FloatImm, IntImm, StringImm, Cast from .expr import Add, Sub, Mul, Div, Mod, FloorDiv, FloorMod from .expr import Min, Max, EQ, NE, LT, LE, GT, GE, And, Or, Not from .expr import Select, Load, Ramp, Broadcast, Shuffle, Call, Let -from .expr import IterVar +from .expr import IterVar, Any from .stmt import Stmt, LetStmt, AssertStmt, ProducerConsumer, For from .stmt import Store, Provide, Allocate, AttrStmt, Free, Realize, SeqStmt from .stmt import IfThenElse, Evaluate, Prefetch, LoweredFunc, stmt_seq, stmt_list from .op import call_packed, call_pure_intrin, call_intrin, call_pure_extern, call_extern -from .op import call_llvm_intrin, all, any, min_value, max_value +from .op import call_llvm_intrin, all, any, min_value, max_value, trace from .op import exp, erf, tanh, sigmoid, log, cos, sin, atan, sqrt, rsqrt, floor, ceil from .op import trunc, abs, round, nearbyint, isnan, power, popcount, fmod, if_then_else from .op import div, indexdiv, indexmod, truncdiv, truncmod, floordiv, floormod diff --git a/python/tvm/tir/buffer.py b/python/tvm/tir/buffer.py index d0d01d7..0c7753e 100644 --- a/python/tvm/tir/buffer.py +++ b/python/tvm/tir/buffer.py @@ -201,15 +201,15 @@ def decl_buffer(shape, .. code-block:: python - m0, m1, m2 = tvm.var("m0"), tvm.var("m1"), tvm.var("m2") - n0, n1, n2 = tvm.var("n0"), tvm.var("n1"), tvm.var("n2") - o0, o1, o2 = tvm.var("o0"), tvm.var("o1"), tvm.var("o2") - A = tvm.placeholder((m0, m1, m2), name='A') - B = tvm.placeholder((n0, n1, n2), name='B') - C = tvm.compute((o0, o1, o2), lambda i, j, k: A[i, j, k] + B[i, j, k], name='C') + m0, m1, m2 = te.var("m0"), te.var("m1"), te.var("m2") + n0, n1, n2 = te.var("n0"), te.var("n1"), te.var("n2") + o0, o1, o2 = te.var("o0"), te.var("o1"), te.var("o2") + A = te.placeholder((m0, m1, m2), name='A') + B = te.placeholder((n0, n1, n2), name='B') + C = te.compute((o0, o1, o2), lambda i, j, k: A[i, j, k] + B[i, j, k], name='C') Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast") Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast") - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) fadd = tvm.build(s, [A, B, C], target='llvm', name='bcast_add', binds={A:Ab, B:Bb}) ctx = tvm.cpu(0) a = tvm.nd.array(np.random.uniform(size=(2, 4, 3)).astype(A.dtype), ctx) diff --git a/python/tvm/tir/expr.py b/python/tvm/tir/expr.py index acf5f51..bcf5967 100644 --- a/python/tvm/tir/expr.py +++ b/python/tvm/tir/expr.py @@ -25,7 +25,7 @@ For example, you can use addexp.a to get the left operand of an Add node. .. code-block:: python - x = tvm.var("n") + x = te.var("n") y = x + 2 assert(isinstance(y, tvm.tir.Add)) assert(y.a == x) @@ -169,7 +169,7 @@ class ExprOp(object): def __nonzero__(self): raise ValueError("Cannot use and / or / not operator to Expr, hint: " + - "use tvm.all / tvm.any instead") + "use tvm.tir.all / tvm.tir.any instead") def __bool__(self): return self.__nonzero__() @@ -346,8 +346,8 @@ class IterVar(Object, ExprOp): See Also -------- - tvm.thread_axis: Create thread axis IterVar. - tvm.reduce_axis: Create reduce axis IterVar. + te.thread_axis: Create thread axis IterVar. + te.reduce_axis: Create reduce axis IterVar. """ DataPar = 0 ThreadIndex = 1 @@ -812,7 +812,7 @@ class Select(PrimExprWithOp): Note ---- Select may compute both true_value and false_value. - Use :py:class:`tvm.if_then_else` instead if you want to + Use :py:class:`tvm.tir.if_then_else` instead if you want to get a conditional expression that only evaluates the correct branch. diff --git a/python/tvm/tir/generic.py b/python/tvm/tir/generic.py index 8a9cf8e..88be5b1 100644 --- a/python/tvm/tir/generic.py +++ b/python/tvm/tir/generic.py @@ -16,7 +16,7 @@ # under the License. """Generic opertors in TVM. We follow the numpy naming convention for this interface -(e.g., tvm.generic.multitply ~ numpy.multiply). +(e.g., tvm.tir.generic.multitply ~ numpy.multiply). The default implementation is used by tvm.ExprOp. """ # pylint: disable=unused-argument diff --git a/python/tvm/tir/ir_builder.py b/python/tvm/tir/ir_builder.py index b56e153..885b847 100644 --- a/python/tvm/tir/ir_builder.py +++ b/python/tvm/tir/ir_builder.py @@ -98,8 +98,8 @@ class IRBuilder(object): -------- .. code-block:: python - ib = tvm.ir_builder.create() - n = tvm.var("n") + ib = tvm.tir.ir_builder.create() + n = te.var("n") A = ib.allocate("float32", n, name="A") with ib.for_range(0, n, name="i") as i: with ib.if_scope((i % 2) == 0): @@ -158,8 +158,8 @@ class IRBuilder(object): -------- .. code-block:: python - ib = tvm.ir_builder.create() - i = tvm.var("i") + ib = tvm.tir.ir_builder.create() + i = te.var("i") x = ib.pointer("float32") ib.scope_attr(x, "storage_scope", "global") x[i] = x[i - 1] + 1 @@ -200,7 +200,7 @@ class IRBuilder(object): -------- .. code-block:: python - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() x = ib.pointer("float32") with ib.for_range(1, 10, name="i") as i: x[i] = x[i - 1] + 1 @@ -243,8 +243,8 @@ class IRBuilder(object): -------- .. code-block:: python - ib = tvm.ir_builder.create() - i = tvm.var("i") + ib = tvm.tir.ir_builder.create() + i = te.var("i") x = ib.pointer("float32") with ib.if_scope((i % 2) == 0): x[i] = x[i - 1] + 1 @@ -268,8 +268,8 @@ class IRBuilder(object): -------- .. code-block:: python - ib = tvm.ir_builder.create() - i = tvm.var("i") + ib = tvm.tir.ir_builder.create() + i = te.var("i") x = ib.pointer("float32") with ib.if_scope((i % 2) == 0): x[i] = x[i - 1] + 1 diff --git a/python/tvm/tir/op.py b/python/tvm/tir/op.py index 66e70c5..4a52787 100644 --- a/python/tvm/tir/op.py +++ b/python/tvm/tir/op.py @@ -64,7 +64,7 @@ def call_packed(*args): See Also -------- - tvm.extern : Create tensor with extern function call. + te.extern : Create tensor with extern function call. """ call_args = [_pack_buffer(x) if isinstance(x, Buffer) else x for x in args] return Call( @@ -194,7 +194,7 @@ def call_llvm_intrin(dtype, name, *args): from tvm.target import codegen llvm_id = codegen.llvm_lookup_intrinsic_id(name) assert llvm_id != 0, "%s is not an LLVM intrinsic" % name - return call_pure_intrin(dtype, 'llvm_intrin', tvm.const(llvm_id, 'uint32'), *args) + return call_pure_intrin(dtype, 'llvm_intrin', tvm.tir.const(llvm_id, 'uint32'), *args) def any(*args): @@ -274,7 +274,7 @@ def trace(args, trace_action="tvm.default_trace_action"): tvm.tir.call_packed : Creates packed function. """ if not isinstance(args, list): - raise Exception("tvm.trace consumes the args as list type") + raise Exception("tvm.tir.trace consumes the args as list type") call_args = [_pack_buffer(x) if isinstance(x, Buffer) else x for x in args] call_args.insert(0, trace_action) return tvm.tir.Call( @@ -556,9 +556,9 @@ def round(x): def nearbyint(x): """Round elements of the array to the nearest integer. This intrinsic uses llvm.nearbyint instead of llvm.round - which is faster but will results different from tvm.round. + which is faster but will results different from te.round. Notably nearbyint rounds according to the rounding mode, - whereas tvm.round (llvm.round) ignores that. + whereas te.round (llvm.round) ignores that. For differences between the two see: https://en.cppreference.com/w/cpp/numeric/math/round https://en.cppreference.com/w/cpp/numeric/math/nearbyint @@ -855,13 +855,13 @@ def comm_reducer(fcombine, fidentity, name="reduce"): ------- .. code-block:: python - n = tvm.var("n") - m = tvm.var("m") - mysum = tvm.comm_reducer(lambda x, y: x+y, - lambda t: tvm.const(0, dtype=t), name="mysum") - A = tvm.placeholder((n, m), name="A") - k = tvm.reduce_axis((0, m), name="k") - B = tvm.compute((n,), lambda i: mysum(A[i, k], axis=k), name="B") + n = te.var("n") + m = te.var("m") + mysum = te.comm_reducer(lambda x, y: x+y, + lambda t: tvm.tir.const(0, dtype=t), name="mysum") + A = te.placeholder((n, m), name="A") + k = te.reduce_axis((0, m), name="k") + B = te.compute((n,), lambda i: mysum(A[i, k], axis=k), name="B") """ def _reduce_directly(*args): num = len(args) @@ -943,14 +943,14 @@ def comm_reducer(fcombine, fidentity, name="reduce"): ------- .. code-block:: python - m = tvm.var("m") - n = tvm.var("n") - A = tvm.placeholder((m, n), name="A") - k = tvm.reduce_axis((0, n), name="k") + m = te.var("m") + n = te.var("n") + A = te.placeholder((m, n), name="A") + k = te.reduce_axis((0, n), name="k") # there are two way to use this {0} reducer: # mode 1, accept (expr, axis, where) to produce an Reduce Expr - B = tvm.compute((m,), lambda i: tvm.{0}(A[i, k], axis=k), name="B") + B = te.compute((m,), lambda i: tvm.{0}(A[i, k], axis=k), name="B") # mode 2, simply use it with multiple Exprs: {0}_res = tvm.{0}(m, n) diff --git a/python/tvm/tir/stmt.py b/python/tvm/tir/stmt.py index bc02b7d..65c72dd 100644 --- a/python/tvm/tir/stmt.py +++ b/python/tvm/tir/stmt.py @@ -23,8 +23,8 @@ Each statement node have subfields that can be visited from python side. .. code-block:: python - x = tvm.var("n") - a = tvm.var("array", tvm.handle) + x = te.var("n") + a = te.var("array", "handle") st = tvm.tir.stmt.Store(a, x + 1, 1) assert isinstance(st, tvm.tir.stmt.Store) assert(st.buffer_var == a) diff --git a/rust/frontend/examples/resnet/src/build_resnet.py b/rust/frontend/examples/resnet/src/build_resnet.py index e713818..49c67bf 100644 --- a/rust/frontend/examples/resnet/src/build_resnet.py +++ b/rust/frontend/examples/resnet/src/build_resnet.py @@ -25,6 +25,7 @@ import sys import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import testing from tvm.contrib import graph_runtime, cc diff --git a/rust/frontend/tests/basics/src/tvm_add.py b/rust/frontend/tests/basics/src/tvm_add.py index 287084b..3911d40 100755 --- a/rust/frontend/tests/basics/src/tvm_add.py +++ b/rust/frontend/tests/basics/src/tvm_add.py @@ -20,20 +20,21 @@ import os.path as osp import sys import tvm +from tvm import te from tvm.contrib import cc def main(target, out_dir): - n = tvm.var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda i: A[i] + B[i], name='C') - s = tvm.create_schedule(C.op) + n = te.var('n') + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda i: A[i] + B[i], name='C') + s = te.create_schedule(C.op) if target == 'cuda': bx, tx = s[C].split(C.op.axis[0], factor=64) - s[C].bind(bx, tvm.thread_axis('blockIdx.x')) - s[C].bind(tx, tvm.thread_axis('threadIdx.x')) + s[C].bind(bx, te.thread_axis('blockIdx.x')) + s[C].bind(tx, te.thread_axis('threadIdx.x')) fadd = tvm.build(s, [A, B, C], target, target_host='llvm', name='myadd') diff --git a/rust/runtime/tests/build_model.py b/rust/runtime/tests/build_model.py index e3da95f..d1dffad 100755 --- a/rust/runtime/tests/build_model.py +++ b/rust/runtime/tests/build_model.py @@ -22,6 +22,7 @@ from os import path as osp import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import testing diff --git a/rust/runtime/tests/test_nn/src/build_test_graph.py b/rust/runtime/tests/test_nn/src/build_test_graph.py index dd7621b..832dddf 100755 --- a/rust/runtime/tests/test_nn/src/build_test_graph.py +++ b/rust/runtime/tests/test_nn/src/build_test_graph.py @@ -23,6 +23,7 @@ import sys import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import testing diff --git a/rust/runtime/tests/test_tvm_basic/src/build_test_lib.py b/rust/runtime/tests/test_tvm_basic/src/build_test_lib.py index 38c1f3a..bf7e60a 100755 --- a/rust/runtime/tests/test_tvm_basic/src/build_test_lib.py +++ b/rust/runtime/tests/test_tvm_basic/src/build_test_lib.py @@ -22,13 +22,14 @@ from os import path as osp import sys import tvm +from tvm import te def main(): - n = tvm.var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - s = tvm.create_schedule(C.op) + n = te.var('n') + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = tvm.te.create_schedule(C.op) s[C].parallel(s[C].op.axis[0]) print(tvm.lower(s, [A, B, C], simple_mode=True)) tvm.build(s, [A, B, C], 'llvm --system-lib').save(osp.join(sys.argv[1], 'test.o')) diff --git a/rust/runtime/tests/test_tvm_dso/src/build_test_lib.py b/rust/runtime/tests/test_tvm_dso/src/build_test_lib.py index 63b43a5..cb7353f 100755 --- a/rust/runtime/tests/test_tvm_dso/src/build_test_lib.py +++ b/rust/runtime/tests/test_tvm_dso/src/build_test_lib.py @@ -22,14 +22,15 @@ from os import path as osp import sys import tvm +from tvm import te from tvm.contrib import cc def main(): - n = tvm.var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - s = tvm.create_schedule(C.op) + n = te.var('n') + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = tvm.te.create_schedule(C.op) s[C].parallel(s[C].op.axis[0]) print(tvm.lower(s, [A, B, C], simple_mode=True)) obj_file = osp.join(sys.argv[1], 'test.o') diff --git a/tests/python/contrib/test_binutil.py b/tests/python/contrib/test_binutil.py index 44739bb..3106e73 100644 --- a/tests/python/contrib/test_binutil.py +++ b/tests/python/contrib/test_binutil.py @@ -24,6 +24,7 @@ Specifically, we test the following capabilities: """ import tvm +from tvm import te import subprocess from tvm.contrib import util from tvm.contrib import cc diff --git a/tests/python/contrib/test_cblas.py b/tests/python/contrib/test_cblas.py index 99614a8..18ea57a 100644 --- a/tests/python/contrib/test_cblas.py +++ b/tests/python/contrib/test_cblas.py @@ -15,19 +15,20 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np import topi.testing from tvm.contrib import cblas -def verify_matmul_add(m, l, n, transa=False, transb=False, dtype=tvm.float32): - bias = tvm.var('bias', dtype=dtype) +def verify_matmul_add(m, l, n, transa=False, transb=False, dtype="float32"): + bias = te.var('bias', dtype=dtype) ashape = (l, n) if transa else (n, l) bshape = (m, l) if transb else (l, m) - A = tvm.placeholder(ashape, name='A', dtype=dtype) - B = tvm.placeholder(bshape, name='B', dtype=dtype) + A = te.placeholder(ashape, name='A', dtype=dtype) + B = te.placeholder(bshape, name='B', dtype=dtype) C = cblas.matmul(A, B, transa, transb) - D = tvm.compute(C.shape, lambda i, j: C[i,j] + bias, name="D") - s = tvm.create_schedule(D.op) + D = te.compute(C.shape, lambda i, j: C[i,j] + bias, name="D") + s = te.create_schedule(D.op) def get_numpy(a, b, bb, transa, transb): if transa: @@ -64,14 +65,14 @@ def test_matmul_add(): verify_matmul_add(1, 16, 3, False, False) verify_matmul_add(1, 16, 3, True, True) -def verify_batch_matmul(batch, m, l, n, transa=False, transb=False, iterative=False, dtype=tvm.float32): +def verify_batch_matmul(batch, m, l, n, transa=False, transb=False, iterative=False, dtype="float32"): ashape = (batch, l, n) if transa else (batch, n, l) bshape = (batch, m, l) if transb else (batch, l, m) - A = tvm.placeholder(ashape, name='A', dtype=dtype) - B = tvm.placeholder(bshape, name='B', dtype=dtype) + A = te.placeholder(ashape, name='A', dtype=dtype) + B = te.placeholder(bshape, name='B', dtype=dtype) C = cblas.batch_matmul(A, B, transa, transb) - D = tvm.compute(C.shape, lambda k, i, j: C[k, i,j], name="D") - s = tvm.create_schedule(D.op) + D = te.compute(C.shape, lambda k, i, j: C[k, i,j], name="D") + s = te.create_schedule(D.op) def get_numpy(a, b, transa, transb): if transa: diff --git a/tests/python/contrib/test_cublas.py b/tests/python/contrib/test_cublas.py index a3baa8c..517e6e1 100644 --- a/tests/python/contrib/test_cublas.py +++ b/tests/python/contrib/test_cublas.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm.contrib import cublas from tvm.contrib import cublaslt @@ -23,10 +24,10 @@ def verify_matmul_add(in_dtype, out_dtype, rtol=1e-5): n = 1024 l = 128 m = 236 - A = tvm.placeholder((n, l), name='A', dtype=in_dtype) - B = tvm.placeholder((l, m), name='B', dtype=in_dtype) + A = te.placeholder((n, l), name='A', dtype=in_dtype) + B = te.placeholder((l, m), name='B', dtype=in_dtype) C = cublas.matmul(A, B, dtype=out_dtype) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) def verify(target="cuda"): if not tvm.runtime.enabled(target): @@ -56,11 +57,11 @@ def verify_matmul_add_igemm(in_dtype, out_dtype, rtol=1e-5): N = roundoff(n, 8) N_out = roundoff(n, 32) - A = tvm.placeholder((N, L), name='A', dtype=in_dtype) - B = tvm.placeholder((m, L), name='B', dtype=in_dtype) + A = te.placeholder((N, L), name='A', dtype=in_dtype) + B = te.placeholder((m, L), name='B', dtype=in_dtype) # C has CUBLASLT_ORDER_COL32 layout, thus a different shape C = cublaslt.matmul(A, B, False, True, m, N_out, dtype=out_dtype) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) def verify(target="cuda"): if not tvm.runtime.enabled(target): @@ -108,10 +109,10 @@ def verify_batch_matmul(in_dtype, out_dtype, rtol=1e-5): n = 1024 l = 128 m = 236 - A = tvm.placeholder((j, n, l), name='A', dtype=in_dtype) - B = tvm.placeholder((j, l, m), name='B', dtype=in_dtype) + A = te.placeholder((j, n, l), name='A', dtype=in_dtype) + B = te.placeholder((j, l, m), name='B', dtype=in_dtype) C = cublas.batch_matmul(A, B, dtype=out_dtype) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) def verify(target="cuda"): if not tvm.runtime.enabled(target): diff --git a/tests/python/contrib/test_cudnn.py b/tests/python/contrib/test_cudnn.py index 1a22f90..58e7b49 100644 --- a/tests/python/contrib/test_cudnn.py +++ b/tests/python/contrib/test_cudnn.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm.contrib import cudnn import numpy as np import topi.testing @@ -48,8 +49,8 @@ def verify_conv2d(data_dtype, conv_dtype, tensor_format=0): xshape = [batch, height, weight, in_channel] wshape = [out_channel, filter_h, filter_w, in_channel] - X = tvm.placeholder(xshape, name='X', dtype=data_dtype) - W = tvm.placeholder(wshape, name='W', dtype=data_dtype) + X = te.placeholder(xshape, name='X', dtype=data_dtype) + W = te.placeholder(wshape, name='W', dtype=data_dtype) Y = cudnn.conv_forward(X, W, [pad_h, pad_w], @@ -60,7 +61,7 @@ def verify_conv2d(data_dtype, conv_dtype, tensor_format=0): conv_dtype=conv_dtype, algo=-1) yshape = [x.value for x in Y.shape] - s = tvm.create_schedule(Y.op) + s = te.create_schedule(Y.op) def verify(): ctx = tvm.gpu(0) @@ -120,8 +121,8 @@ def verify_conv3d(data_dtype, conv_dtype, tensor_format=0): xshape = [batch, in_channel, depth, height, weight] wshape = [out_channel, in_channel, filter_d, filter_h, filter_w] - X = tvm.placeholder(xshape, name='X', dtype=data_dtype) - W = tvm.placeholder(wshape, name='W', dtype=data_dtype) + X = te.placeholder(xshape, name='X', dtype=data_dtype) + W = te.placeholder(wshape, name='W', dtype=data_dtype) Y = cudnn.conv_forward(X, W, [pad_d, pad_h, pad_w], @@ -132,7 +133,7 @@ def verify_conv3d(data_dtype, conv_dtype, tensor_format=0): algo=-1, conv_dtype=conv_dtype) yshape = [x.value for x in Y.shape] - s = tvm.create_schedule(Y.op) + s = te.create_schedule(Y.op) def verify(): ctx = tvm.gpu(0) diff --git a/tests/python/contrib/test_dlpack.py b/tests/python/contrib/test_dlpack.py index f395955..453556c 100644 --- a/tests/python/contrib/test_dlpack.py +++ b/tests/python/contrib/test_dlpack.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm.contrib.dlpack import to_pytorch_func @@ -34,17 +35,17 @@ def test(): np.testing.assert_equal(y.asnumpy(), tvm_x.asnumpy()) np.testing.assert_equal(torch.utils.dlpack.from_dlpack(y.to_dlpack()).numpy(), tvm_x.asnumpy()) - n = tvm.convert(137) + n = tvm.runtime.convert(137) xx = torch.rand(137,137) yy = torch.rand(137,137) zz2 = torch.empty(137,137) zz = xx.mm(yy) - XX = tvm.placeholder((n,n), name='X') - YY = tvm.placeholder((n,n), name='Y') + XX = te.placeholder((n,n), name='X') + YY = te.placeholder((n,n), name='Y') - k = tvm.reduce_axis((0, n), name='k') - ZZ = tvm.compute((n,n), lambda i,j : tvm.sum(XX[i,k]*YY[k,j], axis=k)) - s = tvm.create_schedule(ZZ.op) + k = te.reduce_axis((0, n), name='k') + ZZ = te.compute((n,n), lambda i,j : te.sum(XX[i,k]*YY[k,j], axis=k)) + s = te.create_schedule(ZZ.op) f = tvm.build(s, [XX, YY, ZZ], target_host='llvm', name='f') f_pytorch = to_pytorch_func(f) diff --git a/tests/python/contrib/test_edgetpu_runtime.py b/tests/python/contrib/test_edgetpu_runtime.py index a5d9e34..625dc94 100644 --- a/tests/python/contrib/test_edgetpu_runtime.py +++ b/tests/python/contrib/test_edgetpu_runtime.py @@ -16,6 +16,7 @@ # under the License. import os import tvm +from tvm import te import numpy as np from tvm import rpc from tvm.contrib import util, tflite_runtime diff --git a/tests/python/contrib/test_gemm_acc16.py b/tests/python/contrib/test_gemm_acc16.py index d83ecdc..1fd5974 100644 --- a/tests/python/contrib/test_gemm_acc16.py +++ b/tests/python/contrib/test_gemm_acc16.py @@ -16,6 +16,7 @@ # under the License. # pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition import tvm +from tvm import te import numpy as np from topi.x86.tensor_intrin import dot_16x1x16_uint8_int8_int16 @@ -25,8 +26,8 @@ def benchmark_fc_int8_acc16(): n = 128 k = 128 - X = tvm.placeholder((m, k), name='X', dtype="uint8") - W = tvm.placeholder((n, k), name='W', dtype="int8") + X = te.placeholder((m, k), name='X', dtype="uint8") + W = te.placeholder((n, k), name='W', dtype="int8") peak = 512/16*2*2*2 gops_per_mm = 2*n*m*k @@ -38,15 +39,15 @@ def benchmark_fc_int8_acc16(): return ctx = tvm.context(target, 0) - X = tvm.placeholder((m, k), name='X', dtype="uint8") - W = tvm.placeholder((n, k), name='W', dtype="int8") + X = te.placeholder((m, k), name='X', dtype="uint8") + W = te.placeholder((n, k), name='W', dtype="int8") pc = dot_16x1x16_uint8_int8_int16() - ak = tvm.reduce_axis((0, k), name='k') + ak = te.reduce_axis((0, k), name='k') - packedW = tvm.placeholder((n//128, 128*(k//2), 2), name='packedW', dtype="int8") - t_fc = tvm.compute((m, n), lambda i, j: tvm.sum(X[i, ak].astype("int16") * packedW[j//128, (ak//2)*128+j%128, ak%2].astype("int16"), axis=ak), name="F") + packedW = te.placeholder((n//128, 128*(k//2), 2), name='packedW', dtype="int8") + t_fc = te.compute((m, n), lambda i, j: te.sum(X[i, ak].astype("int16") * packedW[j//128, (ak//2)*128+j%128, ak%2].astype("int16"), axis=ak), name="F") - t_sch = tvm.create_schedule(t_fc.op) + t_sch = te.create_schedule(t_fc.op) a_x, a_y = t_fc.op.axis a_k, = t_fc.op.reduce_axis diff --git a/tests/python/contrib/test_gemm_acc32_vnni.py b/tests/python/contrib/test_gemm_acc32_vnni.py index e810da7..f723ccb 100644 --- a/tests/python/contrib/test_gemm_acc32_vnni.py +++ b/tests/python/contrib/test_gemm_acc32_vnni.py @@ -17,6 +17,7 @@ # pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition import tvm +from tvm import te import numpy as np from topi.x86.tensor_intrin import dot_16x1x16_uint8_int8_int32_cascadelake from topi.x86.tensor_intrin import dot_16x1x16_uint8_int8_int32 @@ -29,8 +30,8 @@ def test_fc_int8_acc32(): n = 1024 k = 1024 - X = tvm.placeholder((m, k), name='X', dtype="uint8") - W = tvm.placeholder((n, k), name='W', dtype="int8") + X = te.placeholder((m, k), name='X', dtype="uint8") + W = te.placeholder((n, k), name='W', dtype="int8") peak = 280 print("Peak {} Gops/s".format(peak)) @@ -47,13 +48,13 @@ def test_fc_int8_acc32(): ctx = tvm.context(target, 0) pc = dot_16x1x16_uint8_int8_int32_cascadelake() - ak = tvm.reduce_axis((0, k), name='k') - packedW = tvm.placeholder( + ak = te.reduce_axis((0, k), name='k') + packedW = te.placeholder( (n // 16, 16 * (k // 4), 4), name='packedW', dtype="int8") - t_fc = tvm.compute((m, n), lambda i, j: tvm.sum(X[i, ak].astype( + t_fc = te.compute((m, n), lambda i, j: te.sum(X[i, ak].astype( "int32") * packedW[j / 16, (ak / 4) * 16 + j % 16, ak % 4].astype("int32"), axis=ak), name="F") - t_sch = tvm.create_schedule(t_fc.op) + t_sch = te.create_schedule(t_fc.op) a_x, a_y = t_fc.op.axis a_k, = t_fc.op.reduce_axis diff --git a/tests/python/contrib/test_miopen.py b/tests/python/contrib/test_miopen.py index d7a46e5..b4bedd8 100644 --- a/tests/python/contrib/test_miopen.py +++ b/tests/python/contrib/test_miopen.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm.contrib import miopen import numpy as np @@ -40,8 +41,8 @@ def test_conv2d(): return wshape = (out_channel, in_channel, filter_h, filter_w) - X = tvm.placeholder(xshape, name='X') - W = tvm.placeholder(wshape, name='W') + X = te.placeholder(xshape, name='X') + W = te.placeholder(wshape, name='W') Y = miopen.conv2d_forward(X, W, stride_h, diff --git a/tests/python/contrib/test_mps.py b/tests/python/contrib/test_mps.py index fc85290..b524365 100644 --- a/tests/python/contrib/test_mps.py +++ b/tests/python/contrib/test_mps.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm.contrib import mps @@ -25,19 +26,19 @@ def test_matmul(): n = 1024 l = 128 m = 256 - A = tvm.placeholder((n, l), name='A') - B = tvm.placeholder((l, m), name='B') + A = te.placeholder((n, l), name='A') + B = te.placeholder((l, m), name='B') C = mps.matmul(A, B) - D = tvm.compute( + D = te.compute( C.shape, lambda *i: C(*i) + 1. ) - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) yo, xo = D.op.axis - block_y = tvm.thread_axis("blockIdx.y") - block_x = tvm.thread_axis("blockIdx.x") - thread_y = tvm.thread_axis("threadIdx.y") - thread_x = tvm.thread_axis("threadIdx.x") + block_y = te.thread_axis("blockIdx.y") + block_x = te.thread_axis("blockIdx.x") + thread_y = te.thread_axis("threadIdx.y") + thread_x = te.thread_axis("threadIdx.x") by, ty = s[D].split(yo, factor=16) bx, tx = s[D].split(xo, factor=16) s[D].bind(by, block_y) @@ -73,10 +74,10 @@ def test_conv2d(): kh = 3 kw = 3 stride = 2 - A = tvm.placeholder((n, h, w, ci), name="x") - B = tvm.placeholder((co, kh, kw, ci), name="w") + A = te.placeholder((n, h, w, ci), name="x") + B = te.placeholder((co, kh, kw, ci), name="w") C = mps.conv2d(A, B, 'SAME', 2) - s1 = tvm.create_schedule(C.op) + s1 = te.create_schedule(C.op) def verify(A, B, C, target="llvm"): if not tvm.get_global_func("tvm.contrib.mps.conv2d", True): diff --git a/tests/python/contrib/test_mxnet_bridge.py b/tests/python/contrib/test_mxnet_bridge.py index 9f1be7e..37c1644 100644 --- a/tests/python/contrib/test_mxnet_bridge.py +++ b/tests/python/contrib/test_mxnet_bridge.py @@ -24,17 +24,18 @@ def mxnet_check(): import mxnet as mx import topi import tvm + from tvm import te import numpy as np from tvm.contrib.mxnet import to_mxnet_func # build a TVM function through topi n = 20 shape = (20,) - scale = tvm.var("scale", dtype="float32") - x = tvm.placeholder(shape) - y = tvm.placeholder(shape) + scale = te.var("scale", dtype="float32") + x = te.placeholder(shape) + y = te.placeholder(shape) z = topi.broadcast_add(x, y) - zz = tvm.compute(shape, lambda *i: z(*i) * scale) + zz = te.compute(shape, lambda *i: z(*i) * scale) target = tvm.target.cuda() diff --git a/tests/python/contrib/test_nnpack.py b/tests/python/contrib/test_nnpack.py index af8ae13..505199a 100644 --- a/tests/python/contrib/test_nnpack.py +++ b/tests/python/contrib/test_nnpack.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np import scipy.signal from topi.nn.util import get_pad_tuple @@ -26,12 +27,12 @@ def test_fully_connected_inference(): n = 1024 l = 128 m = 235 - bias = tvm.var('bias', dtype=tvm.float32) - A = tvm.placeholder((l, ), name='A') - B = tvm.placeholder((m, l), name='B') + bias = te.var('bias', dtype="float32") + A = te.placeholder((l, ), name='A') + B = te.placeholder((m, l), name='B') C = nnpack.fully_connected_inference(A, B) - D = tvm.compute(C.shape, lambda i: C[i] + bias, name="D") - s = tvm.create_schedule(D.op) + D = te.compute(C.shape, lambda i: C[i] + bias, name="D") + s = te.create_schedule(D.op) def verify(target="llvm"): if not tvm.runtime.enabled(target): @@ -98,9 +99,9 @@ def test_convolution_inference(): bshape = (OC, ) oshape = (BATCH, OC, OH, OW) - data = tvm.placeholder(dshape, name='data') - kernel = tvm.placeholder(kshape, name='kernel') - bias = tvm.placeholder(bshape, name='bias') + data = te.placeholder(dshape, name='data') + kernel = te.placeholder(kshape, name='kernel') + bias = te.placeholder(bshape, name='bias') def verify(target="llvm", algorithm=nnpack.ConvolutionAlgorithm.AUTO, with_bias=True): @@ -116,7 +117,7 @@ def test_convolution_inference(): data, kernel, bias if with_bias else None, [PAD, PAD, PAD, PAD], [STRIDE, STRIDE], algorithm=algorithm) - s = tvm.create_schedule(output.op) + s = te.create_schedule(output.op) f = tvm.build(s, [data, kernel, bias, output], target) @@ -160,9 +161,9 @@ def test_convolution_inference_without_weight_transform(): bshape = (OC, ) oshape = (BATCH, OC, OH, OW) - data = tvm.placeholder(dshape, name='data') - kernel = tvm.placeholder(kshape, name='kernel') - bias = tvm.placeholder(bshape, name='bias') + data = te.placeholder(dshape, name='data') + kernel = te.placeholder(kshape, name='kernel') + bias = te.placeholder(bshape, name='bias') def verify(target="llvm", algorithm=nnpack.ConvolutionAlgorithm.AUTO, with_bias=True): @@ -181,7 +182,7 @@ def test_convolution_inference_without_weight_transform(): [PAD, PAD, PAD, PAD], [STRIDE, STRIDE], algorithm=algorithm) - s = tvm.create_schedule(output.op) + s = te.create_schedule(output.op) f = tvm.build(s, [data, kernel, bias, output], target) diff --git a/tests/python/contrib/test_random.py b/tests/python/contrib/test_random.py index f86a424..9efdc3e 100644 --- a/tests/python/contrib/test_random.py +++ b/tests/python/contrib/test_random.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm.contrib import random @@ -22,7 +23,7 @@ def test_randint(): m = 1024 n = 1024 A = random.randint(-127, 128, size=(m, n), dtype='int32') - s = tvm.create_schedule(A.op) + s = te.create_schedule(A.op) def verify(target="llvm"): if not tvm.runtime.enabled(target): @@ -46,7 +47,7 @@ def test_uniform(): m = 1024 n = 1024 A = random.uniform(0, 1, size=(m, n)) - s = tvm.create_schedule(A.op) + s = te.create_schedule(A.op) def verify(target="llvm"): if not tvm.runtime.enabled(target): @@ -70,7 +71,7 @@ def test_normal(): m = 1024 n = 1024 A = random.normal(3, 4, size=(m, n)) - s = tvm.create_schedule(A.op) + s = te.create_schedule(A.op) def verify(target="llvm"): if not tvm.runtime.enabled(target): diff --git a/tests/python/contrib/test_rocblas.py b/tests/python/contrib/test_rocblas.py index 2b6d001..af9d6dd 100644 --- a/tests/python/contrib/test_rocblas.py +++ b/tests/python/contrib/test_rocblas.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm.contrib import rocblas @@ -22,10 +23,10 @@ def test_matmul_add(): n = 1024 l = 128 m = 235 - A = tvm.placeholder((n, l), name='A') - B = tvm.placeholder((l, m), name='B') + A = te.placeholder((n, l), name='A') + B = te.placeholder((l, m), name='B') C = rocblas.matmul(A, B) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) def verify(target="rocm"): if not tvm.runtime.enabled(target): diff --git a/tests/python/contrib/test_rpc_proxy.py b/tests/python/contrib/test_rpc_proxy.py index df0ee2b..6cd865e 100644 --- a/tests/python/contrib/test_rpc_proxy.py +++ b/tests/python/contrib/test_rpc_proxy.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import logging import numpy as np import time diff --git a/tests/python/contrib/test_rpc_tracker.py b/tests/python/contrib/test_rpc_tracker.py index 11e7766..2443c70 100644 --- a/tests/python/contrib/test_rpc_tracker.py +++ b/tests/python/contrib/test_rpc_tracker.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import logging import numpy as np import time diff --git a/tests/python/contrib/test_sort.py b/tests/python/contrib/test_sort.py index 87cdac0..9297a32 100644 --- a/tests/python/contrib/test_sort.py +++ b/tests/python/contrib/test_sort.py @@ -15,18 +15,19 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_sort(): n = 2 l = 5 m = 3 - data = tvm.placeholder((n, l, m), name='data') - sort_num = tvm.placeholder((n, m), name="sort_num", dtype="int32") + data = te.placeholder((n, l, m), name='data') + sort_num = te.placeholder((n, m), name="sort_num", dtype="int32") axis = 1 is_ascend = False - out = tvm.extern(data.shape, [data, sort_num], - lambda ins, outs: tvm.call_packed( + out = te.extern(data.shape, [data, sort_num], + lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.sort.argsort_nms", ins[0], ins[1], outs[0], axis, is_ascend), dtype='int32', name="sort_tensor") @@ -38,7 +39,7 @@ def test_sort(): ctx = tvm.cpu(0) target = "llvm" - s = tvm.create_schedule(out.op) + s = te.create_schedule(out.op) f = tvm.build(s, [data, sort_num, out], target) a = tvm.nd.array(np.array(input).astype(data.dtype), ctx) b = tvm.nd.array(np.array(sort_num_input).astype(sort_num.dtype), ctx) @@ -51,17 +52,17 @@ def test_sort_np(): axis = 4 reduced_shape = (1, 2, 3, 4, 6) is_ascend = True - data = tvm.placeholder(dshape, name='data') - sort_num = tvm.placeholder(reduced_shape, name="sort_num", dtype="int32") - out = tvm.extern(data.shape, [data, sort_num], - lambda ins, outs: tvm.call_packed( + data = te.placeholder(dshape, name='data') + sort_num = te.placeholder(reduced_shape, name="sort_num", dtype="int32") + out = te.extern(data.shape, [data, sort_num], + lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.sort.argsort_nms", ins[0], ins[1], outs[0], axis, is_ascend), dtype='int32', name="sort_tensor") ctx = tvm.cpu(0) target = "llvm" - s = tvm.create_schedule(out.op) + s = te.create_schedule(out.op) f = tvm.build(s, [data, sort_num, out], target) np_data = np.random.uniform(size=dshape) diff --git a/tests/python/contrib/test_sparse.py b/tests/python/contrib/test_sparse.py index 7cc4a00..5e0ca5c 100644 --- a/tests/python/contrib/test_sparse.py +++ b/tests/python/contrib/test_sparse.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import tvm.contrib.sparse as tvmsp import tvm.runtime.ndarray as _nd import numpy as np @@ -25,18 +26,18 @@ def test_static_tensor(): stype = 'csr' target = 'llvm' ctx = tvm.context(target, 0) - m = tvm.size_var('m') - n = tvm.size_var('n') + m = te.size_var('m') + n = te.size_var('n') A = tvmsp.placeholder(shape=(m, n), name='A', dtype=dtype) assert(A.stype == 'csr') n = 3 a = np.maximum(np.random.uniform(size=(n,n)).astype(dtype)-.6, 0.) a = tvmsp.array(a, ctx) - A.data = tvm.placeholder(a.data.shape, dtype, name='A_data') - Ab = tvm.decl_buffer(a.data.shape, dtype, name='A_data') + A.data = te.placeholder(a.data.shape, dtype, name='A_data') + Ab = tvm.tir.decl_buffer(a.data.shape, dtype, name='A_data') binds = {A.data: Ab} - C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter') - s = tvm.create_schedule(C.op) + C = te.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter') + s = te.create_schedule(C.op) f = tvm.build(s, [A.data, C], target, binds=binds) c = tvmsp.array(np.zeros((n,n), dtype), ctx) c.data = tvm.nd.empty(a.data.shape, dtype) @@ -50,18 +51,18 @@ def test_dynamic_tensor(): stype = 'csr' target = 'llvm' ctx = tvm.context(target, 0) - nr, nc, n = tvm.size_var('nr'), tvm.size_var('nc'), tvm.size_var('n') + nr, nc, n = te.size_var('nr'), te.size_var('nc'), te.size_var('n') A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, name='A', dtype=dtype) assert(A.stype == 'csr') - C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter') - s = tvm.create_schedule(C.op) + C = te.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter') + s = te.create_schedule(C.op) _nr, _nc = 3, 5 a = np.maximum(np.random.uniform(size=(_nr, _nc)).astype(dtype)-.6, 0.) a = tvmsp.array(a, ctx) assert a.data.dtype == a.dtype Ab = namedtuple('CSRBuffer', ['data', 'indices', 'indptr']) - Ab.data = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_data') - Ab.indices = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_indices') + Ab.data = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_data') + Ab.indices = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_indices') binds = {A.data: Ab.data, A.indices: Ab.indices} f = tvm.build(s, [nr, A.data, C], target, binds=binds) c = tvmsp.array(np.zeros((_nr, _nc), dtype), ctx) @@ -76,11 +77,11 @@ def test_sparse_array_tuple(): stype = 'csr' target = 'llvm' ctx = tvm.context(target, 0) - nr, nc, n = tvm.size_var('nr'), tvm.size_var('nc'), tvm.size_var('n') + nr, nc, n = te.size_var('nr'), te.size_var('nc'), te.size_var('n') A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, name='A', dtype=dtype) assert(A.stype == 'csr') - C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter') - s = tvm.create_schedule(C.op) + C = te.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter') + s = te.create_schedule(C.op) _nr, _nc = 3, 5 a = np.maximum(np.random.uniform(size=(_nr, _nc)).astype(dtype)-.6, 0.) # convert to sparse array tuple @@ -98,8 +99,8 @@ def test_sparse_array_tuple(): a = tvmsp.array(a_init, shape=source_array.shape, ctx=ctx) assert a.data.dtype == a.dtype Ab = namedtuple('CSRBuffer', ['data', 'indices', 'indptr']) - Ab.data = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_data') - Ab.indices = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_indices') + Ab.data = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_data') + Ab.indices = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_indices') binds = {A.data: Ab.data, A.indices: Ab.indices} f = tvm.build(s, [nr, A.data, C], target, binds=binds) c = tvmsp.array(np.zeros((_nr, _nc), dtype), ctx) diff --git a/tests/python/contrib/test_tedd.py b/tests/python/contrib/test_tedd.py index d4d3ce4..6e5f3a4 100644 --- a/tests/python/contrib/test_tedd.py +++ b/tests/python/contrib/test_tedd.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import tvm +from tvm import te import numpy as np import re import topi @@ -31,10 +31,10 @@ def checkdepdency(): return not {'graphviz', 'ipython'} - {pkg.key for pkg in pkg_resources.working_set} def test_dfg(): - A = tvm.placeholder((1024, 4096), dtype='float32', name='A') + A = te.placeholder((1024, 4096), dtype='float32', name='A') B = topi.nn.softmax(A) # confirm lower works - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) def verify(): from tvm.contrib import tedd @@ -49,7 +49,7 @@ def test_dfg(): findany(r"Stage_2:O_0 -> Tensor_2_0", str) findany(r"Tensor_2_0 -> Stage_3:I_0", str) findany(r"Stage_3:O_0 -> Tensor_3_0", str) - findany(r"Tensor_2_0 -> Stage_4:I_0", str) + findany(r"Tensor_2_0 -> Stage_4:I_0", str) findany(r"Tensor_3_0 -> Stage_4:I_1", str) findany(r"Stage_4:O_0 -> Tensor_4_0", str) if checkdepdency(): @@ -57,13 +57,13 @@ def test_dfg(): def test_itervar_relationship_graph(): - n = tvm.var("n") - m = tvm.var("m") - A = tvm.placeholder((n, m), name='A') - k = tvm.reduce_axis((0, m), "k") - B = tvm.compute((n, ), lambda i: tvm.sum(A[i, k], axis=k), name="B") + n = te.var("n") + m = te.var("m") + A = te.placeholder((n, m), name='A') + k = te.reduce_axis((0, m), "k") + B = te.compute((n, ), lambda i: te.sum(A[i, k], axis=k), name="B") - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) s[B].split(B.op.reduce_axis[0], factor=16) def verify(): @@ -89,18 +89,18 @@ def test_itervar_relationship_graph(): def test_schedule_tree(): - block_x = tvm.thread_axis('blockIdx.x') - thread_x = tvm.thread_axis('threadIdx.x') - n = tvm.var("n") - m = tvm.var("m") - l = tvm.var("l") - A = tvm.placeholder((n, m, l), name='A') - B = tvm.compute((n, m, l), lambda bi, bj, bk: A[bi, bj, bk] + 1, name='B') - r = tvm.reduce_axis((0, m), "r") - C = tvm.compute((n, m,), - lambda ci, cj: tvm.sum(B[ci, cj, r], axis=r), - name="C") - s = tvm.create_schedule(C.op) + block_x = te.thread_axis('blockIdx.x') + thread_x = te.thread_axis('threadIdx.x') + n = te.var("n") + m = te.var("m") + l = te.var("l") + A = te.placeholder((n, m, l), name='A') + B = te.compute((n, m, l), lambda bi, bj, bk: A[bi, bj, bk] + 1, name='B') + r = te.reduce_axis((0, m), "r") + C = te.compute((n, m,), + lambda ci, cj: te.sum(B[ci, cj, r], axis=r), + name="C") + s = te.create_schedule(C.op) s.cache_read(A, 'shared', [B]) s[B].vectorize(B.op.axis[-1]) s[C].reorder(C.op.reduce_axis[0], C.op.axis[0]) @@ -115,7 +115,7 @@ def test_schedule_tree(): str = tedd.viz_schedule_tree(s, False, '', True) findany(r"digraph \"Schedule Tree\"", str) findany(r"subgraph cluster_legend", str) - # Check the A_shared stage, including memory scope, itervars, + # Check the A_shared stage, including memory scope, itervars, # and compute findany(r"Stage_1.*A\.shared
Scope: shared.+>0.+>" \ r"ax0\(kDataPar\).+>1.+ax1\(kDataPar\).+>2.+>ax2\(kDataPar\).+>" \ @@ -134,4 +134,4 @@ def test_schedule_tree(): if __name__ == "__main__": test_dfg() test_itervar_relationship_graph() - test_schedule_tree() \ No newline at end of file + test_schedule_tree() diff --git a/tests/python/contrib/test_tflite_runtime.py b/tests/python/contrib/test_tflite_runtime.py index 9d396be..8c883b0 100644 --- a/tests/python/contrib/test_tflite_runtime.py +++ b/tests/python/contrib/test_tflite_runtime.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm import rpc from tvm.contrib import util, tflite_runtime @@ -28,7 +29,7 @@ def skipped_test_tflite_runtime(): root = tf.Module() root.const = tf.constant([1., 2.], tf.float32) root.f = tf.function(lambda x: root.const * x) - + input_signature = tf.TensorSpec(shape=[2, ], dtype=tf.float32) concrete_func = root.f.get_concrete_function(input_signature) converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func]) @@ -48,13 +49,13 @@ def skipped_test_tflite_runtime(): interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() - + input_shape = input_details[0]['shape'] tflite_input = np.array(np.random.random_sample(input_shape), dtype=np.float32) interpreter.set_tensor(input_details[0]['index'], tflite_input) interpreter.invoke() tflite_output = interpreter.get_tensor(output_details[0]['index']) - + # inference via tvm tflite runtime with open(tflite_model_path, 'rb') as model_fin: runtime = tflite_runtime.create(model_fin.read(), tvm.cpu(0)) @@ -76,7 +77,7 @@ def skipped_test_tflite_runtime(): interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() - + input_shape = input_details[0]['shape'] tflite_input = np.array(np.random.random_sample(input_shape), dtype=np.float32) interpreter.set_tensor(input_details[0]['index'], tflite_input) diff --git a/tests/python/frontend/caffe2/test_forward.py b/tests/python/frontend/caffe2/test_forward.py index 92258bb..f052872 100644 --- a/tests/python/frontend/caffe2/test_forward.py +++ b/tests/python/frontend/caffe2/test_forward.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm.contrib import graph_runtime from tvm.relay.testing.config import ctx_list from tvm import relay diff --git a/tests/python/frontend/coreml/test_forward.py b/tests/python/frontend/coreml/test_forward.py index b4ad300..3a15638 100644 --- a/tests/python/frontend/coreml/test_forward.py +++ b/tests/python/frontend/coreml/test_forward.py @@ -20,6 +20,7 @@ from coremltools.models.neural_network import NeuralNetworkBuilder from coremltools.models import datatypes import tvm +from tvm import te from tvm.contrib import graph_runtime import topi import topi.testing diff --git a/tests/python/frontend/darknet/test_forward.py b/tests/python/frontend/darknet/test_forward.py index 22dd08a..fcaeaec 100644 --- a/tests/python/frontend/darknet/test_forward.py +++ b/tests/python/frontend/darknet/test_forward.py @@ -23,6 +23,7 @@ by the script. """ import numpy as np import tvm +from tvm import te from tvm.contrib import graph_runtime from tvm.contrib.download import download_testdata download_testdata.__test__ = False diff --git a/tests/python/frontend/keras/test_forward.py b/tests/python/frontend/keras/test_forward.py index f7dcb29..db0c2c6 100644 --- a/tests/python/frontend/keras/test_forward.py +++ b/tests/python/frontend/keras/test_forward.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import relay from tvm.contrib import graph_runtime from tvm.relay.testing.config import ctx_list diff --git a/tests/python/frontend/mxnet/test_forward.py b/tests/python/frontend/mxnet/test_forward.py index 504f700..f676295 100644 --- a/tests/python/frontend/mxnet/test_forward.py +++ b/tests/python/frontend/mxnet/test_forward.py @@ -18,6 +18,7 @@ import numpy as np import operator import tvm +from tvm import te from tvm.contrib import graph_runtime from tvm.relay.testing.config import ctx_list from tvm import relay diff --git a/tests/python/frontend/mxnet/test_graph.py b/tests/python/frontend/mxnet/test_graph.py index 6e87000..0008799 100644 --- a/tests/python/frontend/mxnet/test_graph.py +++ b/tests/python/frontend/mxnet/test_graph.py @@ -17,6 +17,7 @@ import mxnet as mx import tvm +from tvm import te from tvm import relay from tvm.relay import transform import model_zoo diff --git a/tests/python/frontend/mxnet/test_qnn_ops_utils.py b/tests/python/frontend/mxnet/test_qnn_ops_utils.py index 4ee5f2e..3204256 100644 --- a/tests/python/frontend/mxnet/test_qnn_ops_utils.py +++ b/tests/python/frontend/mxnet/test_qnn_ops_utils.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py index 6243178..20d7003 100644 --- a/tests/python/frontend/onnx/test_forward.py +++ b/tests/python/frontend/onnx/test_forward.py @@ -23,6 +23,7 @@ import torchvision import topi import topi.testing import tvm +from tvm import te from tvm import relay from tvm.contrib import graph_runtime from tvm.relay.testing.config import ctx_list diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py index 715ae78..ba1d7bb 100644 --- a/tests/python/frontend/pytorch/test_forward.py +++ b/tests/python/frontend/pytorch/test_forward.py @@ -25,6 +25,7 @@ import numpy as np import torch from torch.nn import Module import tvm +from tvm import te import torchvision from tvm import relay @@ -720,7 +721,7 @@ def test_vgg11(): def test_vgg11_bn(): torch.set_grad_enabled(False) verify_model("vgg11_bn") - + #TODO: Need to update schedule in tophub file after PR #4787 updated workloads def test_mobilenet_v2(): torch.set_grad_enabled(False) diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py index 2340bd4..9cd978e 100644 --- a/tests/python/frontend/tensorflow/test_forward.py +++ b/tests/python/frontend/tensorflow/test_forward.py @@ -34,6 +34,7 @@ from tensorflow.python.ops import variables from tensorflow.python.ops import init_ops from distutils.version import LooseVersion import tvm +from tvm import te from tvm import relay import tvm.relay.testing.tf as tf_testing @@ -2717,7 +2718,7 @@ def test_forward_reduce_any(): in_data = tf.placeholder(tf.bool, (5, 7, 11), name="in_data") tf.reduce_any(in_data, name="any") compare_tf_with_tvm([np_data], ['in_data:0'], 'any:0') - + def test_forward_reduce_max(): def check_max(ishape, axis, keepdims, dtype): tf.reset_default_graph() diff --git a/tests/python/frontend/tflite/test_forward.py b/tests/python/frontend/tflite/test_forward.py index f4b7ee0..4a16325 100644 --- a/tests/python/frontend/tflite/test_forward.py +++ b/tests/python/frontend/tflite/test_forward.py @@ -24,6 +24,7 @@ from __future__ import print_function from functools import partial import numpy as np import tvm +from tvm import te from tvm import relay import tensorflow as tf from tensorflow.python.framework import constant_op diff --git a/tests/python/integration/test_dot.py b/tests/python/integration/test_dot.py index f95787d..c66e596 100644 --- a/tests/python/integration/test_dot.py +++ b/tests/python/integration/test_dot.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def lower(s, args, name="mydot"): @@ -22,18 +23,18 @@ def lower(s, args, name="mydot"): arg_list = [] for x in args: - assert isinstance(x, tvm.tensor.Tensor) - buf = tvm.decl_buffer(x.shape, dtype=x.dtype, name=x.op.name) + assert isinstance(x, te.tensor.Tensor) + buf = tvm.tir.decl_buffer(x.shape, dtype=x.dtype, name=x.op.name) binds[x] = buf arg_list.append(buf) s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 16) - stmt = tvm.ir_pass.CanonicalSimplify(stmt) - stmt = tvm.ir_pass.Simplify(stmt) - fapi = tvm.ir_pass.MakeAPI(stmt, name, arg_list, 0, True) - fapi = tvm.ir_pass.LowerTVMBuiltin(fapi) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, binds, 16) + stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt) + stmt = tvm.tir.ir_pass.Simplify(stmt) + fapi = tvm.tir.ir_pass.MakeAPI(stmt, name, arg_list, 0, True) + fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi) return fapi @@ -43,12 +44,12 @@ def mybuild(fapi, target="llvm"): def test_dot(): nn = 12 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - k = tvm.reduce_axis((0, n), 'k') - C = tvm.compute((1,), lambda _: tvm.sum(A[k] * B[k], axis=k), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + k = te.reduce_axis((0, n), 'k') + C = te.compute((1,), lambda _: te.sum(A[k] * B[k], axis=k), name='C') + s = te.create_schedule(C.op) fapi = lower(s, [A, B, C]) def verify(target): diff --git a/tests/python/integration/test_ewise.py b/tests/python/integration/test_ewise.py index ecfc83c..a8f2db1 100644 --- a/tests/python/integration/test_ewise.py +++ b/tests/python/integration/test_ewise.py @@ -15,21 +15,22 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm.contrib import nvcc import numpy as np import time def test_exp(): # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: tvm.exp(A(*i)), name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: te.exp(A(*i)), name='B') + s = te.create_schedule(B.op) # create iter var and assign them tags. num_thread = 8 bx, tx = s[B].split(B.op.axis[0], factor=num_thread) - s[B].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B].bind(tx, tvm.thread_axis("threadIdx.x")) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(tx, te.thread_axis("threadIdx.x")) # one line to build the function. def check_device(device, host="stackvm"): @@ -57,11 +58,11 @@ def test_exp(): def test_fmod(): # graph def run(dtype): - n = tvm.size_var('n') - A = tvm.placeholder((n,), name='A', dtype=dtype) - B = tvm.placeholder((n,), name='B', dtype=dtype) - C = tvm.compute(A.shape, lambda *i: tvm.fmod(A(*i), B(*i)), name='C') - s = tvm.create_schedule(C.op) + n = te.size_var('n') + A = te.placeholder((n,), name='A', dtype=dtype) + B = te.placeholder((n,), name='B', dtype=dtype) + C = te.compute(A.shape, lambda *i: te.fmod(A(*i), B(*i)), name='C') + s = te.create_schedule(C.op) # create iter var and assign them tags. num_thread = 8 bx, tx = s[C].split(C.op.axis[0], factor=num_thread) @@ -73,8 +74,8 @@ def test_fmod(): return target = tvm.target.create(device) if "cpu" not in target.keys: - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) fmod = tvm.build(s, [A, B, C], device, name="myfmod") # launch the kernel. @@ -96,23 +97,23 @@ def test_fmod(): def test_multiple_cache_write(): # graph - n = tvm.convert(1024) - A0 = tvm.placeholder((n,), name='A0', dtype = "float32") - A1 = tvm.placeholder((n,), name='A1', dtype = "float32") - B0, B1 = tvm.compute((n,), + n = tvm.runtime.convert(1024) + A0 = te.placeholder((n,), name='A0', dtype = "float32") + A1 = te.placeholder((n,), name='A1', dtype = "float32") + B0, B1 = te.compute((n,), lambda *i: (A0(*i) + A1(*i), A0(*i) * A1(*i)), name='B') - C = tvm.compute((n,), lambda *i: B0(*i) + B1(*i), + C = te.compute((n,), lambda *i: B0(*i) + B1(*i), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) # create iter var and assign them tags. num_thread = 8 B0_cache, B1_cache = s.cache_write([B0, B1], "local") bx, tx = s[C].split(C.op.axis[0], factor=num_thread) s[B0].compute_at(s[C], bx) s[B0_cache].compute_at(s[C], bx) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) # one line to build the function. def check_device(device, host="stackvm"): if not tvm.runtime.enabled(host): @@ -140,10 +141,10 @@ def test_multiple_cache_write(): def test_log_pow_llvm(): # graph - n = tvm.size_var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: tvm.power(tvm.log(A(*i)), 2.0), name='B') - s = tvm.create_schedule(B.op) + n = te.size_var('n') + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: te.power(te.log(A(*i)), 2.0), name='B') + s = te.create_schedule(B.op) # create iter var and assign them tags. bx, tx = s[B].split(B.op.axis[0], factor=32) # one line to build the function. @@ -168,10 +169,10 @@ def test_log_pow_llvm(): def test_popcount(): def run(dtype): # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A', dtype=dtype) - B = tvm.compute(A.shape, lambda *i: tvm.popcount(A(*i)), name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A', dtype=dtype) + B = te.compute(A.shape, lambda *i: tvm.tir.popcount(A(*i)), name='B') + s = te.create_schedule(B.op) # simple schedule num_thread = 8 bx, tx = s[B].split(B.op.axis[0], factor=num_thread) @@ -183,8 +184,8 @@ def test_popcount(): return target = tvm.target.create(device) if "cpu" not in target.keys: - s[B].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B].bind(tx, tvm.thread_axis("threadIdx.x")) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(tx, te.thread_axis("threadIdx.x")) func = tvm.build(s, [A, B], device) # launch the kernel. n = 1024 @@ -207,21 +208,21 @@ def test_popcount(): def test_add(): def run(dtype): # graph - n = tvm.size_var('n') - A = tvm.placeholder((n,), name='A', dtype=dtype) - B = tvm.placeholder((n,), name='B', dtype=dtype) - bias = tvm.var("bias", dtype=dtype) - scale = tvm.var("scale", dtype=dtype) - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + n = te.size_var('n') + A = te.placeholder((n,), name='A', dtype=dtype) + B = te.placeholder((n,), name='B', dtype=dtype) + bias = te.var("bias", dtype=dtype) + scale = te.var("scale", dtype=dtype) + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') # schedule - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) # create iter var and assign them tags. num_thread = 16 bx, x = s[C].split(C.op.axis[0], factor=num_thread*4) tx, x = s[C].split(x, nparts=num_thread) _, x = s[C].split(x, factor=4) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) s[C].vectorize(x) # one line to build the function. @@ -259,16 +260,16 @@ def test_add(): def try_warp_memory(): """skip this in default test because it require higher arch""" m = 128 - A = tvm.placeholder((m,), name='A') - B = tvm.compute((m,), lambda i: A[i] + 3, name='B') + A = te.placeholder((m,), name='A') + B = te.compute((m,), lambda i: A[i] + 3, name='B') warp_size = 32 - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) AA = s.cache_read(A, "warp", [B]) xo, xi = s[B].split(B.op.axis[0], warp_size * 2) xi0, xi1 = s[B].split(xi, factor=warp_size) - tx = tvm.thread_axis("threadIdx.x") + tx = te.thread_axis("threadIdx.x") s[B].bind(xi1, tx) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) s[AA].compute_at(s[B], xo) xo, xi = s[AA].split(s[AA].op.axis[0], warp_size) s[AA].bind(xi, tx) diff --git a/tests/python/integration/test_ewise_fpga.py b/tests/python/integration/test_ewise_fpga.py index b2c7834..7883a4c 100644 --- a/tests/python/integration/test_ewise_fpga.py +++ b/tests/python/integration/test_ewise_fpga.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np import os @@ -29,13 +30,13 @@ def tvm_callback_vhls_postproc(code): def test_exp(): # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: tvm.exp(A(*i)), name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: te.exp(A(*i)), name='B') + s = te.create_schedule(B.op) # create iter var and assign them tags. px, x = s[B].split(B.op.axis[0], nparts=1) - s[B].bind(px, tvm.thread_axis("pipeline")) + s[B].bind(px, te.thread_axis("pipeline")) # one line to build the function. def check_device(device, host="llvm"): @@ -64,17 +65,17 @@ def test_exp(): def test_multi_kernel(): # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - D = tvm.compute(A.shape, lambda *i: A(*i) + C(*i), name='D') - s = tvm.create_schedule(D.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + D = te.compute(A.shape, lambda *i: A(*i) + C(*i), name='D') + s = te.create_schedule(D.op) # create iter var and assign them tags. px, x = s[C].split(C.op.axis[0], nparts=1) - s[C].bind(px, tvm.thread_axis("pipeline")) + s[C].bind(px, te.thread_axis("pipeline")) px, x = s[D].split(D.op.axis[0], nparts=1) - s[D].bind(px, tvm.thread_axis("pipeline")) + s[D].bind(px, te.thread_axis("pipeline")) # one line to build the function. def check_device(device, host="llvm"): diff --git a/tests/python/integration/test_gemm.py b/tests/python/integration/test_gemm.py index d61335f..12026da 100644 --- a/tests/python/integration/test_gemm.py +++ b/tests/python/integration/test_gemm.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np import time @@ -22,26 +23,26 @@ import time def test_gemm(): # graph nn = 1024 - n = tvm.convert(nn) + n = tvm.runtime.convert(nn) m = n l = n - A = tvm.placeholder((n, l), name='A') - B = tvm.placeholder((m, l), name='B') - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute( + A = te.placeholder((n, l), name='A') + B = te.placeholder((m, l), name='B') + k = te.reduce_axis((0, l), name='k') + C = te.compute( (n, m), - lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k), + lambda ii, jj: te.sum(A[ii, k] * B[jj, k], axis=k), name='CC') # schedule - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) xtile, ytile = 32, 32 scale = 8 num_thread = 8 block_factor = scale * num_thread - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis("threadIdx.x") - block_y = tvm.thread_axis("blockIdx.y") - thread_y = tvm.thread_axis("threadIdx.y") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis("threadIdx.x") + block_y = te.thread_axis("blockIdx.y") + thread_y = te.thread_axis("threadIdx.y") CC = s.cache_write(C, "local") AA = s.cache_read(A, "shared", [CC]) diff --git a/tests/python/integration/test_reduce.py b/tests/python/integration/test_reduce.py index 62c0290..82ade44 100644 --- a/tests/python/integration/test_reduce.py +++ b/tests/python/integration/test_reduce.py @@ -15,25 +15,26 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_reduce_prims(): def test_prim(reducer, np_reducer): # graph - n = tvm.size_var('n') - m = tvm.size_var('m') - A = tvm.placeholder((n, m), name='A') - R = tvm.compute((n, ), lambda i: tvm.tir.Select((i > 1), 1, 0), name='R') - k = tvm.reduce_axis((0, m)) - B = tvm.compute((n,), lambda i: reducer(A[i, k], axis=k, where=(R[i]==1)), name='B') + n = tvm.te.size_var('n') + m = tvm.te.size_var('m') + A = te.placeholder((n, m), name='A') + R = te.compute((n, ), lambda i: tvm.tir.Select((i > 1), 1, 0), name='R') + k = te.reduce_axis((0, m)) + B = te.compute((n,), lambda i: reducer(A[i, k], axis=k, where=(R[i]==1)), name='B') # schedule - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) # create iter var and assign them tags. num_thread = 1 xo, xi = s[B].split(B.op.axis[0], factor=num_thread) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) - s[B].bind(xi, tvm.thread_axis("threadIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) + s[B].bind(xi, te.thread_axis("threadIdx.x")) s[R].compute_inline() # one line to build the function. @@ -64,18 +65,18 @@ def test_reduce_prims(): check_device("vulkan") check_device("cuda") check_device("opencl") - test_prim(tvm.sum, np.sum) - test_prim(tvm.min, np.amin) - test_prim(tvm.max, np.amax) + test_prim(te.sum, np.sum) + test_prim(tvm.te.min, np.amin) + test_prim(tvm.te.max, np.amax) def test_rfactor(): - n = tvm.convert(1027) - A = tvm.placeholder((n,), name='A') - k = tvm.reduce_axis((0, n)) - B = tvm.compute((1,), lambda i: tvm.sum(A[k], axis=k), name='B') + n = tvm.runtime.convert(1027) + A = te.placeholder((n,), name='A') + k = te.reduce_axis((0, n)) + B = te.compute((1,), lambda i: te.sum(A[k], axis=k), name='B') # schedule - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) kf, ki = s[B].split(k, nparts=4) BF = s.rfactor(B, kf) s[BF].parallel(BF.op.axis[0]) @@ -100,12 +101,12 @@ def test_rfactor(): check_target() def test_rfactor_factor_axis(): - n = tvm.convert(1027) - A = tvm.placeholder((n,), name='A') - k = tvm.reduce_axis((0, n)) - B = tvm.compute((1,), lambda i: tvm.sum(A[k], axis=k), name='B') + n = tvm.runtime.convert(1027) + A = te.placeholder((n,), name='A') + k = te.reduce_axis((0, n)) + B = te.compute((1,), lambda i: te.sum(A[k], axis=k), name='B') # schedule - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) kf, ki = s[B].split(k, nparts=4) BF = s.rfactor(B, kf, 1) s[BF].parallel(BF.op.axis[0]) @@ -133,21 +134,21 @@ def test_rfactor_factor_axis(): def test_rfactor_threads(): nn = 1027 mm = 10 - n = tvm.convert(nn) - m = tvm.convert(mm) - A = tvm.placeholder((m, n), name='A') - k = tvm.reduce_axis((0, n)) + n = tvm.runtime.convert(nn) + m = tvm.runtime.convert(mm) + A = te.placeholder((m, n), name='A') + k = te.reduce_axis((0, n)) nthread = 16 - B = tvm.compute((m,), lambda i: tvm.sum(A[i, k], axis=k, where=(i>1)), name='B') + B = te.compute((m,), lambda i: te.sum(A[i, k], axis=k, where=(i>1)), name='B') # schedule - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) ko, kf = s[B].split(k, factor=nthread) BF = s.rfactor(B, kf) bx, ty = s[B].split(s[B].op.axis[0], factor=nthread) - s[B].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B].bind(ty, tvm.thread_axis("threadIdx.y")) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(ty, te.thread_axis("threadIdx.y")) tx = s[B].op.reduce_axis[0] - thread_x = tvm.thread_axis("threadIdx.x") + thread_x = te.thread_axis("threadIdx.x") s[B].bind(tx, thread_x) s[BF].compute_at(s[B], tx) s[B].set_store_predicate(thread_x.var.equal(0)) @@ -183,23 +184,23 @@ def test_rfactor_threads(): def test_rfactor_elemwise_threads(): n = 1025 m = 10 - A = tvm.placeholder((m, n), name='A') - k = tvm.reduce_axis((0, n)) + A = te.placeholder((m, n), name='A') + k = te.reduce_axis((0, n)) nthread = 16 - B = tvm.compute((m,), lambda i: tvm.sum(A[i, k], axis=k), name='B') - BB = tvm.compute((m,), lambda i: B[i] + 1, name='BB') - C = tvm.compute((m,), lambda i: BB[i] + 1, name='C') + B = te.compute((m,), lambda i: te.sum(A[i, k], axis=k), name='B') + BB = te.compute((m,), lambda i: B[i] + 1, name='BB') + C = te.compute((m,), lambda i: BB[i] + 1, name='C') # schedule - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) s[BB].compute_inline() bx, ty = s[C].split(s[C].op.axis[0], factor=nthread) ko, kf = s[B].split(k, factor=nthread) BF = s.rfactor(B, kf) s[B].compute_at(s[C], ty) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(ty, tvm.thread_axis("threadIdx.y")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(ty, te.thread_axis("threadIdx.y")) tx = s[B].op.reduce_axis[0] - thread_x = tvm.thread_axis("threadIdx.x") + thread_x = te.thread_axis("threadIdx.x") s[B].bind(tx, thread_x) s[BF].compute_at(s[B], tx) # Since thread_x is shared across reductions @@ -237,18 +238,18 @@ def test_argmax(): return lhs, rhs def fidentity(t0, t1): - return tvm.const(-1, t0), tvm.min_value(t1) + return tvm.tir.const(-1, t0), tvm.te.min_value(t1) - argmax = tvm.comm_reducer(fcombine, + argmax = te.comm_reducer(fcombine, fidentity, name='argmax') - m = tvm.size_var('m') - n = tvm.size_var('n') - idx = tvm.placeholder((m, n), name='idx', dtype='int32') - val = tvm.placeholder((m, n), name='val', dtype='float32') - k = tvm.reduce_axis((0, n), 'k') - T0, T1 = tvm.compute((m,), lambda i: argmax((idx[i,k], val[i,k]), axis=k), name='T') - s = tvm.create_schedule(T0.op) + m = te.size_var('m') + n = te.size_var('n') + idx = te.placeholder((m, n), name='idx', dtype='int32') + val = te.placeholder((m, n), name='val', dtype='float32') + k = te.reduce_axis((0, n), 'k') + T0, T1 = te.compute((m,), lambda i: argmax((idx[i,k], val[i,k]), axis=k), name='T') + s = te.create_schedule(T0.op) def check_target(): device = 'cpu' @@ -284,31 +285,31 @@ def test_rfactor_argmax(): return lhs, rhs def fidentity(t0, t1): - return tvm.const(-1, t0), tvm.min_value(t1) + return tvm.tir.const(-1, t0), tvm.te.min_value(t1) - argmax = tvm.comm_reducer(fcombine, + argmax = te.comm_reducer(fcombine, fidentity, name='argmax') nn = 1027 mm = 10 - n = tvm.convert(nn) - m = tvm.convert(mm) - A0 = tvm.placeholder((m, n), name='A0', dtype='int32') - A1 = tvm.placeholder((m, n), name='A1', dtype='float32') - k = tvm.reduce_axis((0, n)) - B0, B1 = tvm.compute((m,), lambda i: argmax((A0[i, k], A1[i, k]), axis=k), name='B') + n = tvm.runtime.convert(nn) + m = tvm.runtime.convert(mm) + A0 = te.placeholder((m, n), name='A0', dtype='int32') + A1 = te.placeholder((m, n), name='A1', dtype='float32') + k = te.reduce_axis((0, n)) + B0, B1 = te.compute((m,), lambda i: argmax((A0[i, k], A1[i, k]), axis=k), name='B') # schedule - s = tvm.create_schedule(B0.op) + s = te.create_schedule(B0.op) nthread = 16 ko, kf = s[B0].split(k, factor=nthread) BF0, BF1 = s.rfactor(B0, kf) bx, ty = s[B0].split(s[B0].op.axis[0], factor=nthread) - s[B0].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B0].bind(ty, tvm.thread_axis("threadIdx.y")) + s[B0].bind(bx, te.thread_axis("blockIdx.x")) + s[B0].bind(ty, te.thread_axis("threadIdx.y")) tx = s[B0].op.reduce_axis[0] - thread_x = tvm.thread_axis("threadIdx.x") + thread_x = te.thread_axis("threadIdx.x") s[B0].bind(tx, thread_x) s[BF0.op].compute_at(s[B0], tx) s[B0].set_store_predicate(thread_x.var.equal(0)) diff --git a/tests/python/integration/test_scan.py b/tests/python/integration/test_scan.py index 366ed3d..99553c3 100644 --- a/tests/python/integration/test_scan.py +++ b/tests/python/integration/test_scan.py @@ -15,24 +15,25 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_scan(): - m = tvm.size_var("m") - n = tvm.size_var("n") - X = tvm.placeholder((m, n), name="X") - s_state = tvm.placeholder((m, n)) - s_init = tvm.compute((1, n), lambda _, i: X[0, i]) - s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) - scan = tvm.scan(s_init, s_update, s_state) + m = te.size_var("m") + n = te.size_var("n") + X = te.placeholder((m, n), name="X") + s_state = te.placeholder((m, n)) + s_init = te.compute((1, n), lambda _, i: X[0, i]) + s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) + scan = tvm.te.scan(s_init, s_update, s_state) # test scan + compute case - res = tvm.compute((m, n), lambda i, j: scan[i, j]) + res = te.compute((m, n), lambda i, j: scan[i, j]) # schedule - s = tvm.create_schedule(res.op) + s = te.create_schedule(res.op) num_thread = 256 - block_x = tvm.thread_axis(None, "blockIdx.x") - thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") + block_x = te.thread_axis(None, "blockIdx.x") + thread_x = te.thread_axis((0, num_thread), "threadIdx.x") xo, xi = s[s_init].split(s_init.op.axis[1], factor=num_thread) s[s_init].bind(xo, block_x) s[s_init].bind(xi, thread_x) diff --git a/tests/python/integration/test_tuning.py b/tests/python/integration/test_tuning.py index 99f8b47..60a372c 100644 --- a/tests/python/integration/test_tuning.py +++ b/tests/python/integration/test_tuning.py @@ -21,6 +21,7 @@ import logging import time import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.tuner import RandomTuner @@ -30,20 +31,20 @@ def conv2d_no_batching(N, H, W, CI, CO, KH, KW): """An example template for testing""" assert N == 1, "Only consider batch_size = 1 in this template" - data = tvm.placeholder((N, CI, H, W), name='data') - kernel = tvm.placeholder((CO, CI, KH, KW), name='kernel') + data = te.placeholder((N, CI, H, W), name='data') + kernel = te.placeholder((CO, CI, KH, KW), name='kernel') - rc = tvm.reduce_axis((0, CI), name='rc') - ry = tvm.reduce_axis((0, KH), name='ry') - rx = tvm.reduce_axis((0, KW), name='rx') + rc = te.reduce_axis((0, CI), name='rc') + ry = te.reduce_axis((0, KH), name='ry') + rx = te.reduce_axis((0, KW), name='rx') - conv = tvm.compute( + conv = te.compute( (N, CO, H - KH + 1, W - KW + 1), - lambda nn, ff, yy, xx: tvm.sum( + lambda nn, ff, yy, xx: te.sum( data[nn, rc, yy + ry, xx + rx] * kernel[ff, rc, ry, rx], axis=[rc, ry, rx]), tag="conv2d_nchw") - s = tvm.create_schedule([conv.op]) + s = te.create_schedule([conv.op]) output = conv OL = s.cache_write(conv, 'local') @@ -65,15 +66,15 @@ def conv2d_no_batching(N, H, W, CI, CO, KH, KW): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) kernel_scope = n # this is the scope to attach global config inside this kernel - s[output].bind(bf, tvm.thread_axis("blockIdx.z")) - s[output].bind(by, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(tf, tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bf, te.thread_axis("blockIdx.z")) + s[output].bind(by, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(tf, te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[output].reorder(n, bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi) s[OL].compute_at(s[output], tx) @@ -100,9 +101,9 @@ def conv2d_no_batching(N, H, W, CI, CO, KH, KW): tz, fused = s[load].split(fused, nparts=cfg["tile_f"].size[2]) ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2]) tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # tune unroll cfg.define_knob("auto_unroll_max_step", [0, 512, 1500]) diff --git a/tests/python/integration/test_winograd_nnpack.py b/tests/python/integration/test_winograd_nnpack.py index 5e45c61..7dad2ca 100644 --- a/tests/python/integration/test_winograd_nnpack.py +++ b/tests/python/integration/test_winograd_nnpack.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import FallbackConfigEntity from tvm.contrib import nnpack @@ -32,9 +33,9 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W') - bias = tvm.placeholder((num_filter, 1, 1), name='bias') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') + W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W') + bias = te.placeholder((num_filter, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/tests/python/nightly/quantization/test_quantization_accuracy.py b/tests/python/nightly/quantization/test_quantization_accuracy.py index f047952..4818cc6 100644 --- a/tests/python/nightly/quantization/test_quantization_accuracy.py +++ b/tests/python/nightly/quantization/test_quantization_accuracy.py @@ -16,6 +16,7 @@ # under the License. from collections import namedtuple import tvm +from tvm import te from tvm import relay from tvm.relay import quantize as qtz import mxnet as mx diff --git a/tests/python/relay/benchmarking/benchmark_vm.py b/tests/python/relay/benchmarking/benchmark_vm.py index 55d7887..1e9030c 100644 --- a/tests/python/relay/benchmarking/benchmark_vm.py +++ b/tests/python/relay/benchmarking/benchmark_vm.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm.contrib import graph_runtime from tvm import relay from tvm.runtime import container @@ -73,7 +74,7 @@ def benchmark_execution(mod, prof_res = np.array(ftimer("main", data).results) * 1000 print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res))) - + return result.asnumpy().astype(dtype) # random input diff --git a/tests/python/relay/test_adt.py b/tests/python/relay/test_adt.py index 8f631f8..deeb733 100644 --- a/tests/python/relay/test_adt.py +++ b/tests/python/relay/test_adt.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.backend.interpreter import ConstructorValue from tvm.relay import create_executor diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py index 24176e4..aa81e31 100644 --- a/tests/python/relay/test_any.py +++ b/tests/python/relay/test_any.py @@ -18,6 +18,7 @@ import numpy as np import pytest import tvm +from tvm import te from tvm import relay from tvm.relay.loops import while_loop from tvm.relay.testing import run_infer_type as infer_type diff --git a/tests/python/relay/test_backend_compile_engine.py b/tests/python/relay/test_backend_compile_engine.py index 4e4122a..eb018fe 100644 --- a/tests/python/relay/test_backend_compile_engine.py +++ b/tests/python/relay/test_backend_compile_engine.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te import tvm.testing from tvm import relay from tvm import autotvm @@ -69,7 +70,7 @@ def _tmp_strategy(attrs, inputs, out_type, target): return strategy def _create_record(task_name, dshape, wshape, target, cost): - args = [tvm.placeholder(dshape), tvm.placeholder(wshape), (1, 1), (1, 1, 1, 1), + args = [te.placeholder(dshape), te.placeholder(wshape), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'] task = autotvm.task.create(task_name, args, target) cfg = autotvm.ConfigEntity(0, None, {}, []) @@ -89,7 +90,7 @@ def test_get_valid_implementations(): return relay.backend.compile_engine.get_valid_implementations( relay.op.get("nn.conv2d"), out.attrs, - [tvm.placeholder(dshape), tvm.placeholder(wshape)], + [te.placeholder(dshape), te.placeholder(wshape)], out.checked_type, target) @@ -110,7 +111,7 @@ def test_select_implementation(): return relay.backend.compile_engine.select_implementation( relay.op.get("nn.conv2d"), out.attrs, - [tvm.placeholder(dshape), tvm.placeholder(wshape)], + [te.placeholder(dshape), te.placeholder(wshape)], out.checked_type, target, use_autotvm) diff --git a/tests/python/relay/test_backend_graph_runtime.py b/tests/python/relay/test_backend_graph_runtime.py index d5d29b6..71428a6 100644 --- a/tests/python/relay/test_backend_graph_runtime.py +++ b/tests/python/relay/test_backend_graph_runtime.py @@ -17,6 +17,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.contrib import graph_runtime from tvm.relay.scope_builder import ScopeBuilder diff --git a/tests/python/relay/test_backend_interpreter.py b/tests/python/relay/test_backend_interpreter.py index 9b548f1..360b6bd 100644 --- a/tests/python/relay/test_backend_interpreter.py +++ b/tests/python/relay/test_backend_interpreter.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te import tvm.testing from tvm import nd from tvm import relay diff --git a/tests/python/relay/test_change_batch.py b/tests/python/relay/test_change_batch.py index e822bbb..e53887b 100644 --- a/tests/python/relay/test_change_batch.py +++ b/tests/python/relay/test_change_batch.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.testing import resnet from tvm.relay import transform diff --git a/tests/python/relay/test_cpp_build_module.py b/tests/python/relay/test_cpp_build_module.py index 674e214..171b6b0 100644 --- a/tests/python/relay/test_cpp_build_module.py +++ b/tests/python/relay/test_cpp_build_module.py @@ -17,6 +17,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.contrib.nvcc import have_fp16 diff --git a/tests/python/relay/test_error_reporting.py b/tests/python/relay/test_error_reporting.py index aef93ad..d697448 100644 --- a/tests/python/relay/test_error_reporting.py +++ b/tests/python/relay/test_error_reporting.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay def check_type_err(expr, msg): diff --git a/tests/python/relay/test_expr_functor.py b/tests/python/relay/test_expr_functor.py index 5c92365..ea7f8f6 100644 --- a/tests/python/relay/test_expr_functor.py +++ b/tests/python/relay/test_expr_functor.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay import ExprFunctor, ExprMutator, ExprVisitor diff --git a/tests/python/relay/test_external_codegen.py b/tests/python/relay/test_external_codegen.py index b086df0..e378998 100644 --- a/tests/python/relay/test_external_codegen.py +++ b/tests/python/relay/test_external_codegen.py @@ -20,6 +20,7 @@ import sys import numpy as np import tvm +from tvm import te import tvm.relay.testing import tvm.relay.transform from tvm import relay diff --git a/tests/python/relay/test_external_runtime.py b/tests/python/relay/test_external_runtime.py index 713aca9..0942cbb 100644 --- a/tests/python/relay/test_external_runtime.py +++ b/tests/python/relay/test_external_runtime.py @@ -21,6 +21,7 @@ import sys import numpy as np import tvm +from tvm import te import tvm.runtime._ffi_api from tvm import relay from tvm.contrib import util diff --git a/tests/python/relay/test_feature.py b/tests/python/relay/test_feature.py index 9066e85..3ef53d3 100644 --- a/tests/python/relay/test_feature.py +++ b/tests/python/relay/test_feature.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import detect_feature from tvm.relay.transform import gradient diff --git a/tests/python/relay/test_ir_bind.py b/tests/python/relay/test_ir_bind.py index df280e2..45474b6 100644 --- a/tests/python/relay/test_ir_bind.py +++ b/tests/python/relay/test_ir_bind.py @@ -16,6 +16,7 @@ # under the License. """ test bind function.""" import tvm +from tvm import te from tvm import relay diff --git a/tests/python/relay/test_ir_module.py b/tests/python/relay/test_ir_module.py index 939672d..bab8247 100644 --- a/tests/python/relay/test_ir_module.py +++ b/tests/python/relay/test_ir_module.py @@ -16,6 +16,7 @@ # under the License. """Tests for module functionality.""" import tvm +from tvm import te from tvm import relay from tvm.relay.prelude import Prelude from tvm.relay.testing import add_nat_definitions diff --git a/tests/python/relay/test_ir_nodes.py b/tests/python/relay/test_ir_nodes.py index b7d7eb9..cc663a1 100644 --- a/tests/python/relay/test_ir_nodes.py +++ b/tests/python/relay/test_ir_nodes.py @@ -17,6 +17,7 @@ """ test ir""" import pytest import tvm +from tvm import te from tvm import relay from tvm.tir.expr import * from tvm.relay import op @@ -57,7 +58,7 @@ def test_span(): # Types def test_tensor_type(): - shape = tvm.convert([1, 2, 3]) + shape = tvm.runtime.convert([1, 2, 3]) dtype = 'float32' tt = relay.TensorType(shape, dtype) assert tt.dtype == dtype @@ -76,9 +77,9 @@ def test_type_param(): def test_func_type(): - type_params = tvm.convert([]) - type_constraints = tvm.convert([]) # TODO: fill me in - arg_types = tvm.convert([]) + type_params = tvm.runtime.convert([]) + type_constraints = tvm.runtime.convert([]) # TODO: fill me in + arg_types = tvm.runtime.convert([]) ret_type = relay.TensorType((1, 2, 3), 'float32') tf = relay.FuncType(arg_types, ret_type, type_params, type_constraints) assert tf.type_params == type_params @@ -93,9 +94,9 @@ def test_func_type(): def test_tuple_type(): tp = relay.TypeVar('tp', relay.TypeKind.Type) - tf = relay.FuncType(tvm.convert([]), None, tvm.convert([]), tvm.convert([])) - tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32') - fields = tvm.convert([tp, tf, tt]) + tf = relay.FuncType(tvm.runtime.convert([]), None, tvm.runtime.convert([]), tvm.runtime.convert([])) + tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32') + fields = tvm.runtime.convert([tp, tf, tt]) tup_ty = relay.TupleType(fields) assert tup_ty.fields == fields @@ -105,9 +106,9 @@ def test_tuple_type(): def test_type_relation(): tp = relay.TypeVar('tp', relay.TypeKind.Type) - tf = relay.FuncType(tvm.convert([]), None, tvm.convert([]), tvm.convert([])) - tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32') - args = tvm.convert([tp, tf, tt]) + tf = relay.FuncType(tvm.runtime.convert([]), None, tvm.runtime.convert([]), tvm.runtime.convert([])) + tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32') + args = tvm.runtime.convert([tp, tf, tt]) num_inputs = 2 func = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Broadcast") @@ -130,7 +131,7 @@ def test_constant(): def test_tuple(): - fields = tvm.convert([]) + fields = tvm.runtime.convert([]) tup = relay.Tuple(fields) assert tup.fields == fields assert tup.span == None @@ -163,10 +164,10 @@ def test_global_var(): def test_function(): param_names = ['a', 'b', 'c', 'd'] - params = tvm.convert([relay.Var(n) for n in param_names]) - ret_type = relay.TupleType(tvm.convert([])) - body = relay.Tuple(tvm.convert([])) - type_params = tvm.convert([]) + params = tvm.runtime.convert([relay.Var(n) for n in param_names]) + ret_type = relay.TupleType(tvm.runtime.convert([])) + body = relay.Tuple(tvm.runtime.convert([])) + type_params = tvm.runtime.convert([]) fn = relay.Function(params, body, ret_type, type_params) fn = fn.set_attribute("test_attribute", tvm.tir.StringImm("value")) assert fn.params == params @@ -180,10 +181,10 @@ def test_function(): @pytest.mark.skip(reason="AttrsEqualHandler doesn't handle Map so far.") def test_function_attrs(): param_names = ['a', 'b', 'c', 'd'] - params = tvm.convert([relay.var(n, shape=(5, 2)) for n in param_names]) - ret_type = relay.TupleType(tvm.convert([])) - body = relay.Tuple(tvm.convert([])) - type_params = tvm.convert([]) + params = tvm.runtime.convert([relay.var(n, shape=(5, 2)) for n in param_names]) + ret_type = relay.TupleType(tvm.runtime.convert([])) + body = relay.Tuple(tvm.runtime.convert([])) + type_params = tvm.runtime.convert([]) fn = relay.Function(params, body, ret_type, type_params) model_params = {} for param in params[:1]: @@ -210,7 +211,7 @@ def test_function_attrs(): def test_call(): op = relay.Var('f') arg_names = ['a', 'b', 'c', 'd'] - args = tvm.convert([relay.Var(n) for n in arg_names]) + args = tvm.runtime.convert([relay.Var(n) for n in arg_names]) call = relay.Call(op, args, None, None) assert call.op == op assert call.args == args diff --git a/tests/python/relay/test_ir_parser.py b/tests/python/relay/test_ir_parser.py index bcce9b4..ba1f8d8 100644 --- a/tests/python/relay/test_ir_parser.py +++ b/tests/python/relay/test_ir_parser.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import graph_equal, assert_graph_equal from tvm.relay.analysis import alpha_equal, assert_alpha_equal diff --git a/tests/python/relay/test_ir_text_printer.py b/tests/python/relay/test_ir_text_printer.py index e2a0bdc..3bdd803 100644 --- a/tests/python/relay/test_ir_text_printer.py +++ b/tests/python/relay/test_ir_text_printer.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay import tvm.relay.testing import numpy as np @@ -70,7 +71,7 @@ def test_env(): def test_meta_data(): - n, c, h, w = tvm.size_var("n"), 10, 224, 224 + n, c, h, w = te.size_var("n"), 10, 224, 224 x = relay.var("x", shape=(n, c, h, w)) w = relay.var("w") z = relay.nn.conv2d(x, w, diff --git a/tests/python/relay/test_ir_well_formed.py b/tests/python/relay/test_ir_well_formed.py index fbbfbd2..db953d5 100644 --- a/tests/python/relay/test_ir_well_formed.py +++ b/tests/python/relay/test_ir_well_formed.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import well_formed from tvm.relay.prelude import Prelude diff --git a/tests/python/relay/test_json_compact.py b/tests/python/relay/test_json_compact.py index 40b686a..6316791 100644 --- a/tests/python/relay/test_json_compact.py +++ b/tests/python/relay/test_json_compact.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te from tvm import relay import json diff --git a/tests/python/relay/test_memory_alloc.py b/tests/python/relay/test_memory_alloc.py index 18b1500..08fc39d 100644 --- a/tests/python/relay/test_memory_alloc.py +++ b/tests/python/relay/test_memory_alloc.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License import tvm +from tvm import te import numpy as np from tvm import relay from tvm.relay import memory_alloc diff --git a/tests/python/relay/test_op_grad_level1.py b/tests/python/relay/test_op_grad_level1.py index 3be62a3..0eb1cec 100644 --- a/tests/python/relay/test_op_grad_level1.py +++ b/tests/python/relay/test_op_grad_level1.py @@ -18,6 +18,7 @@ import numpy as np import pytest import tvm +from tvm import te from tvm import relay from tvm.relay.testing import check_grad, ctx_list, run_infer_type from tvm.relay.transform import gradient diff --git a/tests/python/relay/test_op_grad_level2.py b/tests/python/relay/test_op_grad_level2.py index 57b1e2c..2b5a1c2 100644 --- a/tests/python/relay/test_op_grad_level2.py +++ b/tests/python/relay/test_op_grad_level2.py @@ -19,6 +19,7 @@ import numpy as np import topi import topi.testing import tvm +from tvm import te from tvm import relay from tvm.relay.testing import check_grad, ctx_list, run_infer_type from tvm.relay.transform import gradient @@ -92,8 +93,8 @@ def verify_global_avg_pool2d_grad(x_shape): data = np.random.rand(*x_shape).astype("float32") y_shape = topi.util.get_const_tuple(fwd_func.ret_type.shape) out_grad = np.ones(shape=y_shape) - ref_grad = topi.testing.pool_grad_nchw(data, out_grad, pool_size=(x_shape[2], x_shape[3]), - strides=(1, 1), padding=[0, 0, 0, 0], pool_type='avg', + ref_grad = topi.testing.pool_grad_nchw(data, out_grad, pool_size=(x_shape[2], x_shape[3]), + strides=(1, 1), padding=[0, 0, 0, 0], pool_type='avg', ceil_mode=False) for target, ctx in ctx_list(): diff --git a/tests/python/relay/test_op_grad_level3.py b/tests/python/relay/test_op_grad_level3.py index 430c3dd..d13687f 100644 --- a/tests/python/relay/test_op_grad_level3.py +++ b/tests/python/relay/test_op_grad_level3.py @@ -18,6 +18,7 @@ import numpy as np import pytest import tvm +from tvm import te from tvm import relay from tvm.relay.testing import check_grad, ctx_list, run_infer_type from tvm.relay.transform import gradient diff --git a/tests/python/relay/test_op_level1.py b/tests/python/relay/test_op_level1.py index 194b095..0fa0749 100644 --- a/tests/python/relay/test_op_level1.py +++ b/tests/python/relay/test_op_level1.py @@ -17,6 +17,7 @@ import numpy as np import pytest import tvm +from tvm import te import scipy from tvm import relay from tvm.relay import transform @@ -86,7 +87,7 @@ def test_binary_op(): def check_binary_op(opfunc, ref, dtype): # TODO(@jroesch): this piece of code improperly uses type variables. - n = tvm.var("n") + n = te.var("n") s1 = (5, n, 5) s2 = (n, 1) t1 = relay.TensorType(s1) @@ -173,7 +174,7 @@ def test_bias_add(): def test_expand_dims_infer_type(): for dtype in ['float16', 'float32']: - n, t, d = tvm.size_var("n"), tvm.size_var("t"), 100 + n, t, d = te.size_var("n"), te.size_var("t"), 100 x = relay.var("x", shape=(n, t, d), dtype=dtype) y = relay.expand_dims(x, axis=2) assert "axis=2" in y.astext() @@ -223,23 +224,23 @@ def test_log_softmax(): def test_concatenate(): for dtype in ['float16', 'float32']: - n, t, d = tvm.size_var("n"), tvm.size_var("t"), 100 + n, t, d = te.size_var("n"), te.size_var("t"), 100 x = relay.var("x", shape=(n, t, d)) y = relay.var("y", shape=(n, t, d)) z = relay.concatenate((x, y), axis=-1) assert "axis=" in z.astext() zz = run_infer_type(z) assert zz.checked_type == relay.TensorType((n, t, 200)) - + x = relay.exp(x) z = relay.concatenate((x, y), axis=2) zz = run_infer_type(z) assert zz.checked_type == relay.TensorType((n, t, 200)) - + z = relay.concatenate((x, y), axis=1) zz = run_infer_type(z) assert zz.checked_type == relay.TensorType((n, t + t, 100)) - + # check shape mismatches (the following case is expected to raise tvm._ffi.base.TVMError. try: x = relay.var('p1', shape=(2, 5)) @@ -251,7 +252,7 @@ def test_concatenate(): pass else: assert False - + x = relay.var("x", shape=(10, 5), dtype=dtype) y = relay.var("y", shape=(10, 5), dtype=dtype) t = relay.var("z", shape=(), dtype=dtype) @@ -263,7 +264,7 @@ def test_concatenate(): y_data = np.random.rand(10, 5).astype(dtype) t_data = np.random.uniform(size=()).astype(dtype) ref_res = np.concatenate((x_data, y_data), axis=1) + t_data - + for target, ctx in ctx_list(): if dtype == 'float16' and target == 'cuda' and not have_fp16(tvm.gpu(0).compute_version): continue @@ -276,7 +277,7 @@ def test_concatenate(): def test_dropout(): for dtype in ['float16', 'float32']: - n, t, d = tvm.size_var("n"), tvm.size_var("t"), tvm.size_var("d") + n, t, d = te.size_var("n"), te.size_var("t"), te.size_var("d") input_ty = relay.TensorType((n, t, d), dtype) x = relay.var("x", input_ty) y = relay.nn.dropout(x, rate=0.75) @@ -297,7 +298,7 @@ def test_batch_norm(): center=False, scale=False) yy = run_infer_type(y.astuple()) assert "center=" in yy.astext() - assert yy.checked_type == relay.ty.TupleType(tvm.convert([ + assert yy.checked_type == relay.ty.TupleType(tvm.runtime.convert([ relay.TensorType((3, 2, 1), dtype), relay.TensorType((2,), dtype), relay.TensorType((2,), dtype) @@ -311,7 +312,7 @@ def test_batch_norm(): y = relay.nn.batch_norm(data, gamma, beta, moving_mean, moving_var, axis=0, center=False, scale=False) yy = run_infer_type(y.astuple()) - assert yy.checked_type == relay.ty.TupleType(tvm.convert([ + assert yy.checked_type == relay.ty.TupleType(tvm.runtime.convert([ relay.ty.TensorType((3, 2, 1), dtype), relay.ty.TensorType((3,), dtype), relay.ty.TensorType((3,), dtype) @@ -326,7 +327,7 @@ def test_batch_norm(): y = relay.nn.batch_norm(data, gamma, beta, moving_mean, moving_var, axis=-1, center=False, scale=False) yy = run_infer_type(y.astuple()) - assert yy.checked_type == relay.ty.TupleType(tvm.convert([ + assert yy.checked_type == relay.ty.TupleType(tvm.runtime.convert([ relay.ty.TensorType((1, 2, 3), dtype), relay.ty.TensorType((3,), dtype), relay.ty.TensorType((3,), dtype) @@ -348,7 +349,7 @@ def test_dense(): # Dense accuracy for float16 is poor if dtype == 'float16': return - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", relay.TensorType((n, c, h, w), dtype)) w = relay.var("w", relay.TensorType((2, w), dtype)) y = relay.nn.dense(x, w, units=2) @@ -356,15 +357,15 @@ def test_dense(): yy = run_infer_type(y) assert yy.checked_type == relay.TensorType((n, c, h, 2), dtype) - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), 2 + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), 2 x = relay.var("x", relay.TensorType((n, c, h, w), dtype)) - wh, ww = tvm.size_var("wh"), tvm.size_var("ww") + wh, ww = te.size_var("wh"), te.size_var("ww") w = relay.var("w", relay.TensorType((ww, wh), dtype)) y = relay.nn.dense(x, w) yy = run_infer_type(y) assert yy.checked_type == relay.TensorType((n, c, h, ww), dtype) - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), 2 + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), 2 x = relay.var("x", relay.TensorType((n, c, h, w), dtype)) w = relay.var("w", relay.IncompleteType()) y = relay.nn.dense(x, w, units=2) @@ -394,7 +395,7 @@ def test_dense_dtype(): data_dtype = 'uint8' weight_dtype = 'int8' out_dtype = 'uint8' - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", relay.TensorType((n, c, h, w), data_dtype)) w = relay.var("w", relay.TensorType((2, w), weight_dtype)) y = relay.nn.dense(x, w, units=2, out_dtype=out_dtype) @@ -406,7 +407,7 @@ def test_dense_dtype(): def test_bitserial_dense(): - m, k = tvm.size_var("m"), tvm.size_var("k") + m, k = te.size_var("m"), te.size_var("k") x = relay.var("x", relay.TensorType((m, k), "int16")) w = relay.var("w", relay.TensorType((k, 32), "int16")) y = relay.nn.bitserial_dense(x, w, units=32) diff --git a/tests/python/relay/test_op_level10.py b/tests/python/relay/test_op_level10.py index c3033e9..1e4be74 100644 --- a/tests/python/relay/test_op_level10.py +++ b/tests/python/relay/test_op_level10.py @@ -18,6 +18,7 @@ """ import numpy as np import tvm +from tvm import te import topi.testing from tvm import relay from tvm.relay import transform @@ -250,7 +251,7 @@ def verify_slice_like(data, slice_like, axes, output, dtype="float32"): tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5) def test_slice_like(): - d1, d2, d3, d4 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3"), tvm.var("d4") + d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4") verify_slice_like(data=(d1, d2, d3), slice_like=(1, 2, 3), axes=None, output=(1, 2, 3)) verify_slice_like(data=(1, 2, 3), slice_like=(d1, d2, d3), axes=None, output=(d1, d2, d3)) verify_slice_like(data=(d2, d3, d4), slice_like=(d1, d2, d3), axes=(1,2), output=(d2, d2, d3)) @@ -304,7 +305,7 @@ def verify_batch_matmul(x_shape, y_shape, out_shape, dtype="float32"): tvm.testing.assert_allclose(z.asnumpy(), z_np, rtol=1e-5) def test_batch_matmul(): - b, m, n, k = tvm.size_var("b"), tvm.size_var("m"), tvm.size_var("n"), tvm.size_var("k") + b, m, n, k = te.size_var("b"), te.size_var("m"), te.size_var("n"), te.size_var("k") x = relay.var("x", relay.TensorType((b, m, k), "float32")) y = relay.var("y", relay.TensorType((b, n, k), "float32")) z = relay.nn.batch_matmul(x, y) diff --git a/tests/python/relay/test_op_level2.py b/tests/python/relay/test_op_level2.py index d545d0c..7a42fc3 100644 --- a/tests/python/relay/test_op_level2.py +++ b/tests/python/relay/test_op_level2.py @@ -18,6 +18,7 @@ """ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm import relay from tvm.relay import transform @@ -28,7 +29,7 @@ import topi.testing def test_conv1d_infer_type(): # symbolic in batch dimension - n, c, w = tvm.var("n"), 10, 224 + n, c, w = te.var("n"), 10, 224 x = relay.var("x", relay.ty.TensorType((n, c, w), "float32")) w = relay.var("w") y = relay.nn.conv1d(x, w, @@ -42,7 +43,7 @@ def test_conv1d_infer_type(): (2, 10, 3), "float32") # infer by shape of w, mixed precision - n, c, w = tvm.var("n"), 10, 224 + n, c, w = te.var("n"), 10, 224 x = relay.var("x", relay.TensorType((n, c, w), "int8")) w = relay.var("w", relay.TensorType((2, 10, 3), "int8")) y = relay.nn.conv1d(x, w, out_dtype="int32") @@ -52,7 +53,7 @@ def test_conv1d_infer_type(): (n, 2, 222), "int32") # infer shape in case of different dtypes for input and weight. - n, c, w = tvm.var("n"), 10, 224 + n, c, w = te.var("n"), 10, 224 x = relay.var("x", relay.TensorType((n, c, w), "uint8")) w = relay.var("w", relay.TensorType((2, 10, 3), "int8")) y = relay.nn.conv1d(x, w, out_dtype="int32") @@ -122,7 +123,7 @@ def test_conv1d_run(): def test_conv2d_infer_type(): # symbolic in batch dimension - n, c, h, w = tvm.size_var("n"), 10, 224, 224 + n, c, h, w = te.size_var("n"), 10, 224, 224 x = relay.var("x", relay.ty.TensorType((n, c, h, w), "float32")) w = relay.var("w") y = relay.nn.conv2d(x, w, @@ -136,7 +137,7 @@ def test_conv2d_infer_type(): (2, 10, 3, 3), "float32") # infer by shape of w, mixed precision - n, c, h, w = tvm.size_var("n"), 10, 224, 224 + n, c, h, w = te.size_var("n"), 10, 224, 224 x = relay.var("x", relay.TensorType((n, c, h, w), "int8")) w = relay.var("w", relay.TensorType((2, 10, 3, 3), "int8")) y = relay.nn.conv2d(x, w, out_dtype="int32") @@ -146,7 +147,7 @@ def test_conv2d_infer_type(): (n, 2, 222, 222), "int32") # infer shape in case of different dtypes for input and weight. - n, c, h, w = tvm.size_var("n"), 10, 224, 224 + n, c, h, w = te.size_var("n"), 10, 224, 224 x = relay.var("x", relay.TensorType((n, c, h, w), "uint8")) w = relay.var("w", relay.TensorType((2, 10, 3, 3), "int8")) y = relay.nn.conv2d(x, w, out_dtype="int32") @@ -385,7 +386,7 @@ def test_conv2d_winograd(): def test_conv3d_infer_type(): # symbolic in batch dimension - n, c, d, h, w = tvm.size_var("n"), 10, 224, 224, 224 + n, c, d, h, w = te.size_var("n"), 10, 224, 224, 224 x = relay.var("x", relay.ty.TensorType((n, c, d, h, w), "float32")) w = relay.var("w") y = relay.nn.conv3d(x, w, @@ -399,7 +400,7 @@ def test_conv3d_infer_type(): (2, 10, 3, 3, 3), "float32") # infer by shape of w, mixed precision - n, c, d, h, w = tvm.size_var("n"), 10, 224, 224, 224 + n, c, d, h, w = te.size_var("n"), 10, 224, 224, 224 x = relay.var("x", relay.TensorType((n, c, d, h, w), "int8")) w = relay.var("w", relay.TensorType((2, 10, 3, 3, 3), "int8")) y = relay.nn.conv3d(x, w, out_dtype="int32") @@ -409,7 +410,7 @@ def test_conv3d_infer_type(): (n, 2, 222, 222, 222), "int32") # infer shape in case of different dtypes for input and weight. - n, c, d, h, w = tvm.size_var("n"), 10, 224, 224, 224 + n, c, d, h, w = te.size_var("n"), 10, 224, 224, 224 x = relay.var("x", relay.TensorType((n, c, d, h, w), "uint8")) w = relay.var("w", relay.TensorType((2, 10, 3, 3, 3), "int8")) y = relay.nn.conv3d(x, w, out_dtype="int32") @@ -524,7 +525,7 @@ def test_conv3d_ndhwc_run(): def test_conv2d_transpose_infer_type(): # symbolic in batch dimension - n, c, h, w = tvm.size_var("n"), 10, 10, 12 + n, c, h, w = te.size_var("n"), 10, 10, 12 x = relay.var("x", relay.TensorType((n, c, h, w), "float32")) w = relay.var("w", relay.IncompleteType()) y = relay.nn.conv2d_transpose(x, w, @@ -539,7 +540,7 @@ def test_conv2d_transpose_infer_type(): (10, 15, 3, 3), "float32") # infer by shape of w, mixed precision - n, h, w, c = tvm.size_var("n"), 10, 10, 12 + n, h, w, c = te.size_var("n"), 10, 10, 12 x = relay.var("x", relay.TensorType((n, h, w, c), "float32")) w = relay.var("w", relay.TensorType((12, 11, 5, 5), "float32")) y = relay.nn.conv2d_transpose(x, w, @@ -624,41 +625,41 @@ def test_conv1d_transpose_ncw_run(): def test_upsampling_infer_type(): - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") - scale = tvm.const(2.0, "float64") + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") + scale = tvm.tir.const(2.0, "float64") x = relay.var("x", relay.TensorType((n, c, h, w), "float32")) y = relay.nn.upsampling(x, scale_h=2, scale_w=2, layout="NCHW", method="bilinear") "method=\"BINLINEAR\"" in y.astext() yy = run_infer_type(y) - assert yy.checked_type == relay.TensorType((n, c, tvm.tir.Cast("int32", tvm.round(h*scale)), - tvm.tir.Cast("int32", tvm.round(w*scale))), + assert yy.checked_type == relay.TensorType((n, c, tvm.tir.Cast("int32", te.round(h*scale)), + tvm.tir.Cast("int32", te.round(w*scale))), "float32") - n, c = tvm.size_var("n"), tvm.size_var("c") + n, c = te.size_var("n"), te.size_var("c") x = relay.var("x", relay.TensorType((n, c, 100, 200), "float32")) y = relay.nn.upsampling(x, scale_h=2, scale_w=2, layout="NCHW", method="bilinear") yy = run_infer_type(y) assert yy.checked_type == relay.TensorType((n, c, 200, 400), "float32") def test_upsampling3d_infer_type(): - n, c, d, h, w = tvm.size_var("n"), tvm.size_var("c"),\ - tvm.size_var("d"), tvm.size_var("h"), tvm.size_var("w") - scale = tvm.const(2.0, "float64") + n, c, d, h, w = te.size_var("n"), te.size_var("c"),\ + te.size_var("d"), te.size_var("h"), te.size_var("w") + scale = tvm.tir.const(2.0, "float64") x = relay.var("x", relay.TensorType((n, c, d, h, w), "float32")) y = relay.nn.upsampling3d(x, scale_d=2, scale_h=2, scale_w=2, layout="NCDHW", method="trilinear") yy = run_infer_type(y) - assert yy.checked_type == relay.TensorType((n, c, tvm.tir.Cast("int32", tvm.round(d*scale)), - tvm.tir.Cast("int32", tvm.round(h*scale)), - tvm.tir.Cast("int32", tvm.round(w*scale))), + assert yy.checked_type == relay.TensorType((n, c, tvm.tir.Cast("int32", te.round(d*scale)), + tvm.tir.Cast("int32", te.round(h*scale)), + tvm.tir.Cast("int32", te.round(w*scale))), "float32") - n, c = tvm.size_var("n"), tvm.size_var("c") + n, c = te.size_var("n"), te.size_var("c") x = relay.var("x", relay.TensorType((n, c, 100, 100, 200), "float32")) y = relay.nn.upsampling3d(x, scale_d=2, scale_h=2, scale_w=2, layout="NCDHW", method="trilinear") yy = run_infer_type(y) assert yy.checked_type == relay.TensorType((n, c, 200, 200, 400), "float32") def _test_pool2d(opfunc, reffunc): - n, c, h, w = tvm.size_var("n"), 10, 224, 224 + n, c, h, w = te.size_var("n"), 10, 224, 224 x = relay.var("x", relay.TensorType((n, c, h, w), "float32")) y = opfunc(x, pool_size=(1, 1)) assert "pool_size=" in y.astext() @@ -678,7 +679,7 @@ def _test_pool2d(opfunc, reffunc): tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5) def _test_pool2d_int(opfunc, reffunc, dtype): - n, c, h, w = tvm.size_var("n"), 10, 224, 224 + n, c, h, w = te.size_var("n"), 10, 224, 224 x = relay.var("x", relay.TensorType((n, c, h, w), dtype)) y = opfunc(x, pool_size=(1, 1)) assert "pool_size=" in y.astext() @@ -698,13 +699,13 @@ def _test_pool2d_int(opfunc, reffunc, dtype): tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5) def _test_global_pool2d(opfunc, reffunc): - n, c, h, w = tvm.size_var("n"), tvm.size_var("c"), 224, 224 + n, c, h, w = te.size_var("n"), te.size_var("c"), 224, 224 x = relay.var("x", relay.TensorType((n, h, w, c), "float32")) y = opfunc(x, layout="NHWC") yy = run_infer_type(y) assert yy.checked_type == relay.TensorType((n, 1, 1, c), "float32") - n, c, h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", relay.TensorType((n, c, h, w), "float32")) y = opfunc(x) yy = run_infer_type(y) @@ -735,7 +736,7 @@ def test_pool2d(): def test_pool1d(): def _test_pool1d(opfunc): - n, c, w = tvm.var("n"), 10, 224 + n, c, w = te.var("n"), 10, 224 x = relay.var("x", relay.TensorType((n, c, w), "float32")) y = opfunc(x, pool_size=(1,)) assert "pool_size=" in y.astext() @@ -763,7 +764,7 @@ def test_pool1d(): def test_pool3d(): def _test_pool3d(opfunc, padding=(0, 0, 0, 0, 0, 0), out_shape=(1, 3, 16, 16, 16)): - n, c, d, h, w = tvm.size_var("n"), 10, 5, 224, 224 + n, c, d, h, w = te.size_var("n"), 10, 5, 224, 224 x = relay.var("x", relay.TensorType((n, c, d, h, w), "float32")) y = opfunc(x, pool_size=(1, 1, 1)) assert "pool_size=" in y.astext() @@ -833,7 +834,7 @@ def test_avg_pool2d_no_count_pad(): tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5) def test_flatten_infer_type(): - d1, d2, d3, d4 = tvm.size_var("d1"), tvm.size_var("d2"), tvm.size_var("d3"), tvm.size_var("d4") + d1, d2, d3, d4 = te.size_var("d1"), te.size_var("d2"), te.size_var("d3"), te.size_var("d4") x = relay.var("x", relay.TensorType((d1, d2, d3, d4), "float32")) y = relay.nn.batch_flatten(x) yy = run_infer_type(y) @@ -878,7 +879,7 @@ def test_pad_infer_type(): assert yy.checked_type == relay.TensorType((3, 6, 9, 12), "float32") # some symbolic values - n, c, h, w = tvm.size_var("n"), 2, 3, tvm.size_var("w") + n, c, h, w = te.size_var("n"), 2, 3, te.size_var("w") t = relay.var("t", relay.TensorType((n, c, h, w), "float32")) y = relay.nn.pad(t, ((1, 1), (2, 2), (3, 3), (4, 4))) yy = run_infer_type(y) @@ -901,7 +902,7 @@ def test_pad_run(): _test_run('int32') def test_lrn(): - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", shape=(n, c , h, w)) y = relay.nn.lrn(x, size=10, axis=2, bias=0.5, alpha=.00001, beta=0.75) "alpha=" in y.astext() @@ -932,7 +933,7 @@ def test_lrn(): tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5) def test_l2_normalize(): - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", shape=(n, c , h, w)) y = relay.nn.l2_normalize(x, eps=0.001, axis=[1]) "axis=" in y.astext() @@ -982,7 +983,7 @@ def test_batch_flatten(): def _test_upsampling(layout, method, align_corners=False): - n, c, h, w = tvm.size_var("n"), 16, 32, 32 + n, c, h, w = te.size_var("n"), 16, 32, 32 scale_h = 2.0 scale_w = 2.0 dtype = "float32" @@ -1021,7 +1022,7 @@ def test_upsampling(): _test_upsampling("NHWC", "bilinear", True) def _test_upsampling3d(layout, method, coordinate_transformation_mode="half_pixel"): - n, c, d, h, w = tvm.size_var("n"), 8, 16, 16, 16 + n, c, d, h, w = te.size_var("n"), 8, 16, 16, 16 scale_d = 2.0 scale_h = 2.0 scale_w = 2.0 @@ -1220,7 +1221,7 @@ def test_depthwise_conv2d_int8(): def test_bitserial_conv2d_infer_type(): # Basic shape test with ambiguous batch. - n, c, h, w = tvm.size_var("n"), 32, 224, 224 + n, c, h, w = te.size_var("n"), 32, 224, 224 x = relay.var("x", relay.ty.TensorType((n, c, h, w), "int16")) w = relay.var("w", relay.ty.TensorType((32, 32, 3, 3), "int16")) y = relay.nn.bitserial_conv2d( diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index c5f340a..7e5314d 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -19,6 +19,7 @@ import numpy as np import pytest import tvm +from tvm import te from tvm import relay from tvm.relay import create_executor, transform from tvm.relay.testing import ctx_list, check_grad, run_infer_type @@ -166,7 +167,7 @@ def test_squeeze(): def test_transpose_infer_type(): - n, t, d = tvm.size_var("n"), tvm.size_var("t"), 100 + n, t, d = te.size_var("n"), te.size_var("t"), 100 x = relay.var("x", relay.TensorType((n, t, d), "float32")) y = relay.transpose(x, axes=(1, 0, 2)) assert "axes=" in y.astext() @@ -274,7 +275,7 @@ def test_reshape_like_infer_type(): assert zz.checked_type == relay.TensorType((1, 6), "float32") # symbolic shape - n, c, h, w = tvm.size_var("n"), 2, 3, tvm.size_var("w") + n, c, h, w = te.size_var("n"), 2, 3, te.size_var("w") x = relay.var("x", relay.TensorType((n, c, h, w), "float32")) y = relay.var("y", relay.TensorType((1, 8, 8), "float32")) z = relay.reshape_like(x, y) @@ -313,8 +314,8 @@ def test_take_infer_type(): yy = run_infer_type(y) assert yy.checked_type == relay.TensorType(oshape, "float32") - d1, d2, d3 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3") - d4, d5, d6 = tvm.var("d4"), tvm.var("d5"), tvm.var("d6") + d1, d2, d3 = te.var("d1"), te.var("d2"), te.var("d3") + d4, d5, d6 = te.var("d4"), te.var("d5"), te.var("d6") verify_take((d1,), (1,), (1,), 0) verify_take((4,), (d1, d2), (d1, d2)) verify_take((3, 3, 3), (1, d2), (1, d2)) @@ -368,12 +369,12 @@ def test_split_infer_type(): yy = run_infer_type(y.astuple()) assert yy.checked_type == ret_type - idxd = tvm.indexdiv + idxd = tvm.tir.indexdiv - d1, d2, d3, d4 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3"), tvm.var("d4") - axis = tvm.var("axis") + d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4") + axis = te.var("axis") verify_split((5, 5, 2, 2), 5, - relay.ty.TupleType(tvm.convert([ + relay.ty.TupleType(tvm.runtime.convert([ relay.ty.TensorType((5, 1, 2, 2), "float32"), relay.ty.TensorType((5, 1, 2, 2), "float32"), relay.ty.TensorType((5, 1, 2, 2), "float32"), @@ -381,7 +382,7 @@ def test_split_infer_type(): relay.ty.TensorType((5, 1, 2, 2), "float32")])), axis=1) verify_split((5, 5, 2, 2), 5, - relay.ty.TupleType(tvm.convert([ + relay.ty.TupleType(tvm.runtime.convert([ relay.ty.TensorType((1, 5, 2, 2), "float32"), relay.ty.TensorType((1, 5, 2, 2), "float32"), relay.ty.TensorType((1, 5, 2, 2), "float32"), @@ -389,19 +390,19 @@ def test_split_infer_type(): relay.ty.TensorType((1, 5, 2, 2), "float32")])), axis=0) verify_split((d1, d2, d3, d4), 4, - relay.ty.TupleType(tvm.convert([ + relay.ty.TupleType(tvm.runtime.convert([ relay.ty.TensorType((d1, d2, idxd(d3, 4), d4), "float32"), relay.ty.TensorType((d1, d2, idxd(d3, 4), d4), "float32"), relay.ty.TensorType((d1, d2, idxd(d3, 4), d4), "float32"), relay.ty.TensorType((d1, d2, idxd(d3, 4), d4), "float32")])), axis=2) verify_split((d1, d2, d3, d4), 2, - relay.ty.TupleType(tvm.convert([ + relay.ty.TupleType(tvm.runtime.convert([ relay.ty.TensorType((idxd(d1, 2), d2, d3, d4), "float32"), relay.ty.TensorType((idxd(d1, 2), d2, d3, d4), "float32")])), axis=0) verify_split((d1, d2, d3, d4), (2, 4, 7), - relay.ty.TupleType(tvm.convert([ + relay.ty.TupleType(tvm.runtime.convert([ relay.ty.TensorType((d1, 2, d3, d4), "float32"), relay.ty.TensorType((d1, 2, d3, d4), "float32"), relay.ty.TensorType((d1, 3, d3, d4), "float32"), @@ -447,7 +448,7 @@ def test_full_like_infer_type(): assert yy.checked_type == relay.TensorType((1, 2, 3), "float32") # symbolic shape - n, c, h, w = tvm.size_var("n"), 2, 3, tvm.size_var("w") + n, c, h, w = te.size_var("n"), 2, 3, te.size_var("w") base = relay.var("base", relay.TensorType((n, c, h, w), "float32")) fill = relay.var("fill", relay.TensorType((), "float32")) y = relay.full_like(base, fill) @@ -475,7 +476,7 @@ def test_full_like(): def test_infer_type_leaky_relu(): - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", relay.TensorType((n, c, h, w), "float32")) y = relay.nn.leaky_relu(x, alpha=0.1) "alpha=0.1" in y.astext() @@ -539,7 +540,7 @@ def verify_infer_type_prelu(data, alpha, axis, output, dtype="float32"): def test_infer_type_prelu(): - n, c , h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") verify_infer_type_prelu((n, c, h, w), (c,), 1, (n, c, h, w)) verify_infer_type_prelu((n, h, w, c), (c,), 3, (n, h, w, c)) verify_infer_type_prelu((n, c, h, w), None, 1, (n, c, h, w)) diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index 44b51f2..473ae59 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.relay import transform @@ -24,7 +25,7 @@ import topi.testing def test_binary_op(): def check_binary_op(opfunc, ref): - n = tvm.size_var("n") + n = te.size_var("n") t1 = relay.TensorType((5, n, 5)) t2 = relay.TensorType((n, 1)) x = relay.var("x", t1) @@ -193,7 +194,7 @@ def test_reduce_functions(): return func(data, axis=axis).reshape(out_shape) return _wrapper - d1, d2, d3, d4 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3"), tvm.var("d4") + d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4") for func in [[relay.sum, np.sum], [relay.max, np.max], [relay.min, np.min], @@ -282,7 +283,7 @@ def test_strided_slice(): op_res = intrp.evaluate(func)(x_data) tvm.testing.assert_allclose(op_res.asnumpy(), ref_res) - d1, d2, d3, d4 = tvm.var("d1"), tvm.var("d2"), tvm.var("d3"), tvm.var("d4") + d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4") verify((d1, d2, 3), [None, None, 1], [None, None, 2], None, (d1, d2, 1), False) verify((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2], (3, 1, 2)) verify((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], (1, 3, 3)) diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index e622a8a..8fd05da 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -19,6 +19,7 @@ import math import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import transform from tvm.relay.testing import ctx_list, run_infer_type @@ -26,9 +27,9 @@ import topi.testing def test_resize_infer_type(): - n, c, h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") + n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") x = relay.var("x", relay.TensorType((n, c, h, w), "int8")) - th, tw = tvm.var("th"), tvm.var("tw") + th, tw = te.var("th"), te.var("tw") z = relay.image.resize(x, (th, tw)) zz = run_infer_type(z) assert zz.checked_type == relay.TensorType((n, c, th, tw), "int8") @@ -182,7 +183,7 @@ def test_multibox_prior(): x = relay.var("x", relay.TensorType(dshape, "float32")) verify_multibox_prior(x, dshape, ref_res, sizes, ratios, steps, offsets, check_size=True) - y = relay.var("y", relay.TensorType((tvm.size_var("n"), 3, 56, 56), "float32")) + y = relay.var("y", relay.TensorType((te.size_var("n"), 3, 56, 56), "float32")) verify_multibox_prior(x, dshape, ref_res, sizes, ratios, steps, offsets, check_size=True, check_type_only=True) @@ -190,7 +191,7 @@ def test_multibox_prior(): ref_res = get_ref_result(dshape, clip=False) x = relay.var("x", relay.TensorType(dshape, "float32")) verify_multibox_prior(x, dshape, ref_res, clip=False) - y = relay.var("y", relay.TensorType((tvm.size_var("n"), 24, 32, 32), "float32")) + y = relay.var("y", relay.TensorType((te.size_var("n"), 24, 32, 32), "float32")) verify_multibox_prior(x, dshape, ref_res, clip=False, check_type_only=True) @@ -280,7 +281,7 @@ def test_non_max_suppression(): np_indices_result = np.array([[3, 0, -1, -1, -1]]) num_anchors = 5 - dshape = (tvm.size_var("n"), num_anchors, 6) + dshape = (te.size_var("n"), num_anchors, 6) verify_nms(np_data, np_valid_count, dshape, np_result, np_indices_result, force_suppress=True, top_k=2, check_type_only=True) dshape = (1, num_anchors, 6) @@ -291,7 +292,7 @@ def test_non_max_suppression(): [1, 0.7, 30, 60, 50, 80], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) np_indices_result = np.array([[3, 0, 1, -1, -1]]) - dshape = (tvm.size_var("n"), num_anchors, 6) + dshape = (te.size_var("n"), num_anchors, 6) verify_nms(np_data, np_valid_count, dshape, np_result, np_indices_result, check_type_only=True) dshape = (1, num_anchors, 6) @@ -331,7 +332,7 @@ def test_multibox_transform_loc(): cls_prob=cls_prob, loc_pred=loc_pred, anchor=anchors) ret = run_infer_type(mtl.astuple()) ref_type = relay.ty.TupleType( - tvm.convert([ + tvm.runtime.convert([ relay.ty.TensorType((1, num_anchors, 6), "float32"), relay.ty.TensorType((1, ), "int") ])) @@ -354,7 +355,7 @@ def test_multibox_transform_loc(): def test_threshold(): num_anchors = 5 num_classes = 5 - n = tvm.size_var("n") + n = te.size_var("n") cls_prob = relay.var( "cls_prob", relay.ty.TensorType((n, num_anchors, num_classes), "float32")) @@ -373,7 +374,7 @@ def test_multibox_transform_loc(): variances=variances) ret = run_infer_type(ret.astuple()) ref_type = relay.ty.TupleType( - tvm.convert([ + tvm.runtime.convert([ relay.ty.TensorType((n, num_anchors, 6), "float32"), relay.ty.TensorType((n, ), "int") ])) @@ -520,8 +521,8 @@ def test_yolo_reorg_infer_shape(): assert "stride=" in z.astext() assert zz.checked_type == relay.ty.TensorType(out_shape, "float32") - n, c, h, w = tvm.size_var("n"), tvm.size_var("c"), tvm.size_var("h"), tvm.size_var("w") - idxd = tvm.indexdiv + n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w") + idxd = tvm.tir.indexdiv verify_yolo_reorg((n, c, 20, 20), 10, (n, c*10*10, 2, 2)) verify_yolo_reorg((n, c, h, w), 2, (n, c*2*2, idxd(h, 2), idxd(w, 2))) diff --git a/tests/python/relay/test_op_level6.py b/tests/python/relay/test_op_level6.py index 286776e..287e80a 100644 --- a/tests/python/relay/test_op_level6.py +++ b/tests/python/relay/test_op_level6.py @@ -18,6 +18,7 @@ """ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay.testing import ctx_list diff --git a/tests/python/relay/test_op_qnn_add.py b/tests/python/relay/test_op_qnn_add.py index e1f54ed..bd0f661 100644 --- a/tests/python/relay/test_op_qnn_add.py +++ b/tests/python/relay/test_op_qnn_add.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_op_qnn_concatenate.py b/tests/python/relay/test_op_qnn_concatenate.py index 35c2f97..03ab9ee 100644 --- a/tests/python/relay/test_op_qnn_concatenate.py +++ b/tests/python/relay/test_op_qnn_concatenate.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_op_qnn_conv2d.py b/tests/python/relay/test_op_qnn_conv2d.py index e827c72..66acda8 100644 --- a/tests/python/relay/test_op_qnn_conv2d.py +++ b/tests/python/relay/test_op_qnn_conv2d.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.relay import transform diff --git a/tests/python/relay/test_op_qnn_dense.py b/tests/python/relay/test_op_qnn_dense.py index 43600cb..3cfcfd1 100644 --- a/tests/python/relay/test_op_qnn_dense.py +++ b/tests/python/relay/test_op_qnn_dense.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_op_qnn_dequantize.py b/tests/python/relay/test_op_qnn_dequantize.py index b1965c9..febf5c5 100644 --- a/tests/python/relay/test_op_qnn_dequantize.py +++ b/tests/python/relay/test_op_qnn_dequantize.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_op_qnn_mul.py b/tests/python/relay/test_op_qnn_mul.py index 959a02a..6516871 100644 --- a/tests/python/relay/test_op_qnn_mul.py +++ b/tests/python/relay/test_op_qnn_mul.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_op_qnn_quantize.py b/tests/python/relay/test_op_qnn_quantize.py index bdc7bc0..09b04d8 100644 --- a/tests/python/relay/test_op_qnn_quantize.py +++ b/tests/python/relay/test_op_qnn_quantize.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_op_qnn_requantize.py b/tests/python/relay/test_op_qnn_requantize.py index 8af7781..8123397 100644 --- a/tests/python/relay/test_op_qnn_requantize.py +++ b/tests/python/relay/test_op_qnn_requantize.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_param_dict.py b/tests/python/relay/test_param_dict.py index 4161b90..497a818 100644 --- a/tests/python/relay/test_param_dict.py +++ b/tests/python/relay/test_param_dict.py @@ -17,6 +17,7 @@ import os import numpy as np import tvm +from tvm import te import json import base64 from tvm._ffi.base import py_str diff --git a/tests/python/relay/test_pass_alpha_equal.py b/tests/python/relay/test_pass_alpha_equal.py index 0319d0b..7e34f48 100644 --- a/tests/python/relay/test_pass_alpha_equal.py +++ b/tests/python/relay/test_pass_alpha_equal.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import analysis from tvm.relay.testing import run_opt_pass @@ -64,10 +65,10 @@ def test_type_param_alpha_equal(): # function types are the only way to put type params # in eq map - ft1 = relay.FuncType(tvm.convert([]), t1, tvm.convert([t1]), tvm.convert([])) - ft2 = relay.FuncType(tvm.convert([]), t3, tvm.convert([t3]), tvm.convert([])) + ft1 = relay.FuncType(tvm.runtime.convert([]), t1, tvm.runtime.convert([t1]), tvm.runtime.convert([])) + ft2 = relay.FuncType(tvm.runtime.convert([]), t3, tvm.runtime.convert([t3]), tvm.runtime.convert([])) # actually an invalid type because t2 is wrong kind - ft3 = relay.FuncType(tvm.convert([]), t2, tvm.convert([t2]), tvm.convert([])) + ft3 = relay.FuncType(tvm.runtime.convert([]), t2, tvm.runtime.convert([t2]), tvm.runtime.convert([])) assert ft1 == ft2 assert ft1 != ft3 # kinds still do not match @@ -85,51 +86,51 @@ def test_func_type_alpha_equal(): broadcast = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Broadcast") identity = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Identity") - tr1 = relay.TypeRelation(broadcast, tvm.convert([tp1, tp3]), 1, None) - tr2 = relay.TypeRelation(broadcast, tvm.convert([tp2, tp4]), 1, None) - tr3 = relay.TypeRelation(identity, tvm.convert([tp1, tp3]), 1, None) + tr1 = relay.TypeRelation(broadcast, tvm.runtime.convert([tp1, tp3]), 1, None) + tr2 = relay.TypeRelation(broadcast, tvm.runtime.convert([tp2, tp4]), 1, None) + tr3 = relay.TypeRelation(identity, tvm.runtime.convert([tp1, tp3]), 1, None) - ft = relay.FuncType(tvm.convert([t1, t2]), tp1, - tvm.convert([tp1, tp3]), - tvm.convert([tr1])) - translate_vars = relay.FuncType(tvm.convert([t1, t2]), tp1, - tvm.convert([tp2, tp4]), - tvm.convert([tr2])) + ft = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1, + tvm.runtime.convert([tp1, tp3]), + tvm.runtime.convert([tr1])) + translate_vars = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1, + tvm.runtime.convert([tp2, tp4]), + tvm.runtime.convert([tr2])) assert ft == translate_vars - different_args = relay.FuncType(tvm.convert([t1]), tp1, - tvm.convert([tp1, tp3]), - tvm.convert([tr1])) + different_args = relay.FuncType(tvm.runtime.convert([t1]), tp1, + tvm.runtime.convert([tp1, tp3]), + tvm.runtime.convert([tr1])) assert ft != different_args - different_order = relay.FuncType(tvm.convert([t2, t1]), tp1, - tvm.convert([tp1, tp3]), - tvm.convert([tr1])) + different_order = relay.FuncType(tvm.runtime.convert([t2, t1]), tp1, + tvm.runtime.convert([tp1, tp3]), + tvm.runtime.convert([tr1])) assert ft != different_order - no_rel = relay.FuncType(tvm.convert([t1, t2]), tp1, - tvm.convert([tp1, tp3]), - tvm.convert([])) + no_rel = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1, + tvm.runtime.convert([tp1, tp3]), + tvm.runtime.convert([])) assert ft != no_rel - more_vars = relay.FuncType(tvm.convert([t1, t2]), tp2, - tvm.convert([tp1, tp2, tp3]), - tvm.convert([tr1])) + more_vars = relay.FuncType(tvm.runtime.convert([t1, t2]), tp2, + tvm.runtime.convert([tp1, tp2, tp3]), + tvm.runtime.convert([tr1])) assert ft != more_vars - all_the_vars = relay.FuncType(tvm.convert([t1, t2]), tp1, - tvm.convert([tp1, tp2, tp3, tp4]), - tvm.convert([tr1, tr2])) + all_the_vars = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1, + tvm.runtime.convert([tp1, tp2, tp3, tp4]), + tvm.runtime.convert([tr1, tr2])) assert ft != all_the_vars - different_rel = relay.FuncType(tvm.convert([t1, t2]), tp1, - tvm.convert([tp1, tp3]), - tvm.convert([tr3])) + different_rel = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1, + tvm.runtime.convert([tp1, tp3]), + tvm.runtime.convert([tr3])) assert ft != different_rel - more_rels = relay.FuncType(tvm.convert([t1, t2]), tp1, - tvm.convert([tp1, tp3]), - tvm.convert([tr1, tr3])) + more_rels = relay.FuncType(tvm.runtime.convert([t1, t2]), tp1, + tvm.runtime.convert([tp1, tp3]), + tvm.runtime.convert([tr1, tr3])) assert ft != more_rels @@ -139,10 +140,10 @@ def test_tuple_type_alpha_equal(): tp1 = relay.TypeVar("v1", relay.TypeKind.Type) tp2 = relay.TypeVar("v2", relay.TypeKind.Type) - tup1 = relay.TupleType(tvm.convert([t1, t2, tp1])) - tup2 = relay.TupleType(tvm.convert([t1, t2, tp1])) - tup3 = relay.TupleType(tvm.convert([t2, t1, tp1])) - tup4 = relay.TupleType(tvm.convert([t1, t2, tp2])) + tup1 = relay.TupleType(tvm.runtime.convert([t1, t2, tp1])) + tup2 = relay.TupleType(tvm.runtime.convert([t1, t2, tp1])) + tup3 = relay.TupleType(tvm.runtime.convert([t2, t1, tp1])) + tup4 = relay.TupleType(tvm.runtime.convert([t1, t2, tp2])) # as long as types are alpha-equal and in same order, # tuples should be alpha-equal @@ -165,16 +166,16 @@ def test_type_relation_alpha_equal(): attr1_same = tvm.ir.make_node("attrs.TestAttrs", name="attr", padding=(3,4)) attr2 = tvm.ir.make_node("attrs.TestAttrs", name="attr", padding=(3,4,4)) - tr = relay.TypeRelation(broadcast, tvm.convert([t1, t2]), 1, attr1) - same = relay.TypeRelation(broadcast, tvm.convert([t1, t2]), 1, attr1) - diff_func = relay.TypeRelation(identity, tvm.convert([t1, t2]), 1, attr1) - diff_order = relay.TypeRelation(broadcast, tvm.convert([t2, t1]), 1, attr1) - diff_args = relay.TypeRelation(broadcast, tvm.convert([t2, t3]), 1, attr1) - diff_attr = relay.TypeRelation(broadcast, tvm.convert([t1, t2]), 1, attr2) - same_attr = relay.TypeRelation(broadcast, tvm.convert([t1, t2]), 1, attr1_same) + tr = relay.TypeRelation(broadcast, tvm.runtime.convert([t1, t2]), 1, attr1) + same = relay.TypeRelation(broadcast, tvm.runtime.convert([t1, t2]), 1, attr1) + diff_func = relay.TypeRelation(identity, tvm.runtime.convert([t1, t2]), 1, attr1) + diff_order = relay.TypeRelation(broadcast, tvm.runtime.convert([t2, t1]), 1, attr1) + diff_args = relay.TypeRelation(broadcast, tvm.runtime.convert([t2, t3]), 1, attr1) + diff_attr = relay.TypeRelation(broadcast, tvm.runtime.convert([t1, t2]), 1, attr2) + same_attr = relay.TypeRelation(broadcast, tvm.runtime.convert([t1, t2]), 1, attr1_same) - bigger = relay.TypeRelation(identity, tvm.convert([t1, t3, t2]), 2, attr1) - diff_num_inputs = relay.TypeRelation(identity, tvm.convert([t1, t3, t2]), 1, attr2) + bigger = relay.TypeRelation(identity, tvm.runtime.convert([t1, t3, t2]), 2, attr1) + diff_num_inputs = relay.TypeRelation(identity, tvm.runtime.convert([t1, t3, t2]), 1, attr2) # func, number of args, input count, and order should be the same assert tr == same diff --git a/tests/python/relay/test_pass_alter_op_layout.py b/tests/python/relay/test_pass_alter_op_layout.py index df01310..eabe758 100644 --- a/tests/python/relay/test_pass_alter_op_layout.py +++ b/tests/python/relay/test_pass_alter_op_layout.py @@ -18,6 +18,7 @@ import pytest import tvm +from tvm import te from tvm import relay from tvm.relay import transform, analysis from tvm.relay.testing.temp_op_attr import TempOpAttr diff --git a/tests/python/relay/test_pass_annotation.py b/tests/python/relay/test_pass_annotation.py index 3e7d916..49e9883 100644 --- a/tests/python/relay/test_pass_annotation.py +++ b/tests/python/relay/test_pass_annotation.py @@ -19,6 +19,7 @@ import json import numpy as np import tvm +from tvm import te from tvm import relay from tvm.contrib import graph_runtime from tvm.relay.expr_functor import ExprMutator diff --git a/tests/python/relay/test_pass_auto_quantize.py b/tests/python/relay/test_pass_auto_quantize.py index 02438ef..35d33b1 100644 --- a/tests/python/relay/test_pass_auto_quantize.py +++ b/tests/python/relay/test_pass_auto_quantize.py @@ -18,6 +18,7 @@ import numpy as np import pytest import tvm +from tvm import te from tvm import relay from tvm.relay import testing diff --git a/tests/python/relay/test_pass_canonicalize_cast.py b/tests/python/relay/test_pass_canonicalize_cast.py index 672b4b1..e9ab67f 100644 --- a/tests/python/relay/test_pass_canonicalize_cast.py +++ b/tests/python/relay/test_pass_canonicalize_cast.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import tvm.relay as relay import tvm.relay.transform as _transform diff --git a/tests/python/relay/test_pass_check_kind.py b/tests/python/relay/test_pass_check_kind.py index 62a9204..06fe13a 100644 --- a/tests/python/relay/test_pass_check_kind.py +++ b/tests/python/relay/test_pass_check_kind.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import check_kind import pytest @@ -33,9 +34,9 @@ def test_typevar_kind(): def test_tuple_kind(): # only contain type kinds tp = relay.TypeVar('tp', relay.TypeKind.Type) - tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32') - tf = relay.FuncType(tvm.convert([]), tt, tvm.convert([]), tvm.convert([])) - fields = tvm.convert([tp, tf, tt]) + tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32') + tf = relay.FuncType(tvm.runtime.convert([]), tt, tvm.runtime.convert([]), tvm.runtime.convert([])) + fields = tvm.runtime.convert([tp, tf, tt]) tup_ty = relay.TupleType(fields) assert check_kind(tup_ty) == relay.TypeKind.Type @@ -46,16 +47,16 @@ def test_func_kind(): tp1 = relay.TypeVar('tp1', relay.TypeKind.Type) tp2 = relay.TypeVar('tp2', relay.TypeKind.Type) - shape = tvm.convert([1, 2, 3]) + shape = tvm.runtime.convert([1, 2, 3]) dtype = 'float32' tensor_type = relay.TensorType(shape, dtype) - tr = relay.TypeRelation(None, tvm.convert([tensor_type, tp1]) , 1, None) + tr = relay.TypeRelation(None, tvm.runtime.convert([tensor_type, tp1]) , 1, None) - type_params = tvm.convert([tp1, tp2]) - type_constraints = tvm.convert([tr]) - arg_types = tvm.convert([tp1, tensor_type]) - ret_type = relay.TupleType(tvm.convert([tp2, tensor_type])) + type_params = tvm.runtime.convert([tp1, tp2]) + type_constraints = tvm.runtime.convert([tr]) + arg_types = tvm.runtime.convert([tp1, tensor_type]) + ret_type = relay.TupleType(tvm.runtime.convert([tp2, tensor_type])) tf = relay.FuncType(arg_types, ret_type, type_params, type_constraints) assert check_kind(tf) == relay.TypeKind.Type @@ -63,8 +64,8 @@ def test_func_kind(): def test_ref_kind(): # only contain type kinds - tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32') - ft = relay.FuncType(tvm.convert([]), tt, tvm.convert([]), tvm.convert([])) + tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32') + ft = relay.FuncType(tvm.runtime.convert([]), tt, tvm.runtime.convert([]), tvm.runtime.convert([])) rt1 = relay.RefType(tt) assert check_kind(rt1) == relay.TypeKind.Type @@ -77,9 +78,9 @@ def test_ref_kind(): def test_relation_kind(): # only have type kinds for arguments tp = relay.TypeVar('tp', relay.TypeKind.Type) - tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32') - tf = relay.FuncType(tvm.convert([]), tt, tvm.convert([]), tvm.convert([])) - args = tvm.convert([tf, tt, tp]) + tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32') + tf = relay.FuncType(tvm.runtime.convert([]), tt, tvm.runtime.convert([]), tvm.runtime.convert([])) + args = tvm.runtime.convert([tf, tt, tp]) tr = relay.TypeRelation(None, args, 2, None) assert check_kind(tr) == relay.TypeKind.Constraint @@ -115,7 +116,7 @@ def test_invalid_tuple_kind(): tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar) tp2 = relay.TypeVar('tp2', relay.TypeKind.BaseType) tp3 = relay.TypeVar('tp3', relay.TypeKind.Constraint) - fields = tvm.convert([tp1, tp2, tp3]) + fields = tvm.runtime.convert([tp1, tp2, tp3]) tup_ty = relay.TupleType(fields) check_kind(tup_ty) @@ -127,9 +128,9 @@ def test_invalid_func_kind(): tp2 = relay.TypeVar('tp2', relay.TypeKind.BaseType) tp3 = relay.TypeVar('tp3', relay.TypeKind.Constraint) - type_params = tvm.convert([tp1, tp2, tp3]) - type_constraints = tvm.convert([]) - arg_types = tvm.convert([tp1, tp2]) + type_params = tvm.runtime.convert([tp1, tp2, tp3]) + type_constraints = tvm.runtime.convert([]) + arg_types = tvm.runtime.convert([tp1, tp2]) ret_type = tp3 tf = relay.FuncType(arg_types, ret_type, type_params, type_constraints) @@ -148,7 +149,7 @@ def test_invalid_relation_kind(): tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar) tp2 = relay.TypeVar('tp2', relay.TypeKind.BaseType) tp3 = relay.TypeVar('tp3', relay.TypeKind.Constraint) - args = tvm.convert([tp1, tp2, tp3]) + args = tvm.runtime.convert([tp1, tp2, tp3]) func = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Broadcast") tr = relay.TypeRelation(func, args, 2, None) @@ -187,7 +188,7 @@ def test_typecall_invalid_num_args(): def test_func_with_invalid_ret_type(): tp1 = relay.TypeVar('tp1', relay.TypeKind.Type) tp2 = relay.TypeVar('tp2', relay.TypeKind.ShapeVar) - tf = relay.FuncType(tvm.convert([tp1]), tp2, tvm.convert([tp1, tp2]), tvm.convert([])) + tf = relay.FuncType(tvm.runtime.convert([tp1]), tp2, tvm.runtime.convert([tp1, tp2]), tvm.runtime.convert([])) check_kind(tf) @@ -196,7 +197,7 @@ def test_func_with_invalid_ret_type(): def test_func_with_invalid_arg_types(): tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar) tp2 = relay.TypeVar('tp2', relay.TypeKind.Type) - tf = relay.FuncType(tvm.convert([tp1]), tp2, tvm.convert([tp1, tp2]), tvm.convert([])) + tf = relay.FuncType(tvm.runtime.convert([tp1]), tp2, tvm.runtime.convert([tp1, tp2]), tvm.runtime.convert([])) check_kind(tf) @@ -205,9 +206,9 @@ def test_func_with_invalid_arg_types(): def test_func_with_invalid_tuple(): tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar) - ret_type = relay.TupleType(tvm.convert([tp1, tp1, tp1])) + ret_type = relay.TupleType(tvm.runtime.convert([tp1, tp1, tp1])) - tf = relay.FuncType(tvm.convert([]), ret_type, tvm.convert([tp1]), tvm.convert([])) + tf = relay.FuncType(tvm.runtime.convert([]), ret_type, tvm.runtime.convert([tp1]), tvm.runtime.convert([])) check_kind(tf) @@ -218,20 +219,20 @@ def test_func_with_invalid_relation(): tp3 = relay.TypeVar('tp3', relay.TypeKind.Constraint) func = tvm.ir.EnvFunc.get("tvm.relay.type_relation.Identity") - tr = relay.TypeRelation(func, tvm.convert([tp2, tp3]), 1, None) + tr = relay.TypeRelation(func, tvm.runtime.convert([tp2, tp3]), 1, None) - tf = relay.FuncType(tvm.convert([tp1]), tp1, tvm.convert([tp1, tp2, tp3]), tvm.convert([tr])) + tf = relay.FuncType(tvm.runtime.convert([tp1]), tp1, tvm.runtime.convert([tp1, tp2, tp3]), tvm.runtime.convert([tr])) check_kind(tf) @pytest.mark.xfail(raises=tvm.error.TVMError) def test_tuple_with_invalid_func(): - tensor_type = relay.TensorType(tvm.convert([1, 2, 3]), 'float32') + tensor_type = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32') tp1 = relay.TypeVar('tp1', relay.TypeKind.ShapeVar) - tf = relay.FuncType(tvm.convert([]), tp1, tvm.convert([tp1]), tvm.convert([])) + tf = relay.FuncType(tvm.runtime.convert([]), tp1, tvm.runtime.convert([tp1]), tvm.runtime.convert([])) - tup_ty = relay.TupleType(tvm.convert([tensor_type, tf])) + tup_ty = relay.TupleType(tvm.runtime.convert([tensor_type, tf])) check_kind(tup_ty) diff --git a/tests/python/relay/test_pass_combine_parallel_conv2d.py b/tests/python/relay/test_pass_combine_parallel_conv2d.py index c10a7b8..ec9bcd9 100644 --- a/tests/python/relay/test_pass_combine_parallel_conv2d.py +++ b/tests/python/relay/test_pass_combine_parallel_conv2d.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay import transform diff --git a/tests/python/relay/test_pass_combine_parallel_dense.py b/tests/python/relay/test_pass_combine_parallel_dense.py index f693f30..84d8211 100644 --- a/tests/python/relay/test_pass_combine_parallel_dense.py +++ b/tests/python/relay/test_pass_combine_parallel_dense.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay import transform diff --git a/tests/python/relay/test_pass_convert_op_layout.py b/tests/python/relay/test_pass_convert_op_layout.py index 4b80d6c..f9e7ca9 100644 --- a/tests/python/relay/test_pass_convert_op_layout.py +++ b/tests/python/relay/test_pass_convert_op_layout.py @@ -16,6 +16,7 @@ # under the License. """Test alter op layout pass""" import tvm +from tvm import te from tvm import relay from tvm.relay.op import register_alter_op_layout diff --git a/tests/python/relay/test_pass_dead_code_elimination.py b/tests/python/relay/test_pass_dead_code_elimination.py index 3f1ec9e..604ec89 100644 --- a/tests/python/relay/test_pass_dead_code_elimination.py +++ b/tests/python/relay/test_pass_dead_code_elimination.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay import Function, transform from tvm.relay.analysis import alpha_equal, graph_equal, free_vars, assert_alpha_equal @@ -25,7 +26,7 @@ import pytest class env: def __init__(self): - self.shape = tvm.convert([1, 2, 3]) + self.shape = tvm.runtime.convert([1, 2, 3]) self.tt = relay.TensorType(self.shape, "float32") self.int32 = relay.TensorType([], "int32") self.float32 = relay.TensorType([], "float32") diff --git a/tests/python/relay/test_pass_eliminate_common_subexpr.py b/tests/python/relay/test_pass_eliminate_common_subexpr.py index e2fec61..dddbef7 100644 --- a/tests/python/relay/test_pass_eliminate_common_subexpr.py +++ b/tests/python/relay/test_pass_eliminate_common_subexpr.py @@ -16,6 +16,7 @@ # under the License. """Test eliminate common subexpr pass""" import tvm +from tvm import te from tvm import relay from tvm.relay.op import register_alter_op_layout diff --git a/tests/python/relay/test_pass_eta_expand.py b/tests/python/relay/test_pass_eta_expand.py index b9eb2a1..ad04e41 100644 --- a/tests/python/relay/test_pass_eta_expand.py +++ b/tests/python/relay/test_pass_eta_expand.py @@ -19,6 +19,7 @@ import os import numpy as np import tvm +from tvm import te from tvm import relay import tvm.relay.transform as _transform diff --git a/tests/python/relay/test_pass_fold_constant.py b/tests/python/relay/test_pass_fold_constant.py index 08834f1..cc362a2 100644 --- a/tests/python/relay/test_pass_fold_constant.py +++ b/tests/python/relay/test_pass_fold_constant.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import transform from tvm.relay.build_module import bind_params_by_name @@ -54,7 +55,7 @@ def test_fold_const(): raise RuntimeError() # the fold constant should work on any context. - with tvm.build_config(add_lower_pass=[(0, fail)]): + with tvm.target.build_config(add_lower_pass=[(0, fail)]): with tvm.target.create("cuda"): zz = run_opt_pass(before(), transform.FoldConstant()) zexpected = run_opt_pass(expected(), transform.InferType()) diff --git a/tests/python/relay/test_pass_fold_scale_axis.py b/tests/python/relay/test_pass_fold_scale_axis.py index bfc3cab..4c094fb 100644 --- a/tests/python/relay/test_pass_fold_scale_axis.py +++ b/tests/python/relay/test_pass_fold_scale_axis.py @@ -17,6 +17,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import transform diff --git a/tests/python/relay/test_pass_fuse_ops.py b/tests/python/relay/test_pass_fuse_ops.py index e11b6ae..a660222 100644 --- a/tests/python/relay/test_pass_fuse_ops.py +++ b/tests/python/relay/test_pass_fuse_ops.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay import transform from tvm.relay.testing import run_opt_pass diff --git a/tests/python/relay/test_pass_gradient.py b/tests/python/relay/test_pass_gradient.py index 6c2ea8f..6f2a125 100644 --- a/tests/python/relay/test_pass_gradient.py +++ b/tests/python/relay/test_pass_gradient.py @@ -17,6 +17,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import free_vars, free_type_vars, assert_alpha_equal from tvm.relay import create_executor, transform diff --git a/tests/python/relay/test_pass_lambda_lift.py b/tests/python/relay/test_pass_lambda_lift.py index a66c4c7..e388878 100644 --- a/tests/python/relay/test_pass_lambda_lift.py +++ b/tests/python/relay/test_pass_lambda_lift.py @@ -18,6 +18,7 @@ import numpy as np import pytest import tvm +from tvm import te from tvm import relay from tvm.relay import transform diff --git a/tests/python/relay/test_pass_legalize.py b/tests/python/relay/test_pass_legalize.py index e4e16c0..9976eca 100644 --- a/tests/python/relay/test_pass_legalize.py +++ b/tests/python/relay/test_pass_legalize.py @@ -17,6 +17,7 @@ """Test legalize pass""" import numpy as np import tvm +from tvm import te from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_pass_mac_count.py b/tests/python/relay/test_pass_mac_count.py index 5ce0e41..697aad8 100644 --- a/tests/python/relay/test_pass_mac_count.py +++ b/tests/python/relay/test_pass_mac_count.py @@ -17,6 +17,7 @@ """Unit tests for MAC counter.""" import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import analysis, transform @@ -39,7 +40,7 @@ def test_gemm(): data2 = relay.var("data2", shape=dshape2) gemm = relay.nn.dense(data1, data2) func = relay.Function([data1, data2], - relay.Tuple(tvm.convert([gemm]))) + relay.Tuple(tvm.runtime.convert([gemm]))) func = run_opt_pass(func, transform.InferType()) compute_count = analysis.get_total_mac_number(func) expect_count = n * m * k @@ -66,7 +67,7 @@ def test_conv(): channels=output_channel, kernel_size=(kh, kw), padding=(h_padding, w_padding)) - func = relay.Function([data, weight], relay.Tuple(tvm.convert([conv2d]))) + func = relay.Function([data, weight], relay.Tuple(tvm.runtime.convert([conv2d]))) func = run_opt_pass(func, transform.InferType()) compute_count = analysis.get_total_mac_number(func) expect_count = batch_size * input_channel * oh * ow * output_channel * kh * kw @@ -99,7 +100,7 @@ def test_simple_network(): weight_dense) func = relay.Function([data1, data2, weight_conv, weight_dense], - relay.Tuple(tvm.convert([conv2d_1, conv2d_2, + relay.Tuple(tvm.runtime.convert([conv2d_1, conv2d_2, dense_1, add, flattened]))) # alter the CONV 2D data layout to test func = run_opt_pass(func, transform.AlterOpLayout()) @@ -127,7 +128,7 @@ def test_depthwise_conv2d(): groups=64) add = relay.add(depthwise_conv2d_1, depthwise_conv2d_2) func = relay.Function([data1, data2, weight_conv], - relay.Tuple(tvm.convert([depthwise_conv2d_1, + relay.Tuple(tvm.runtime.convert([depthwise_conv2d_1, depthwise_conv2d_2, add]))) func = run_opt_pass(func, transform.InferType()) @@ -156,7 +157,7 @@ def test_conv_2d_transpose(): kernel_size=(kh, kw), padding=(h_padding, w_padding)) func = relay.Function([data, weight], - relay.Tuple(tvm.convert([conv2d_transpose]))) + relay.Tuple(tvm.runtime.convert([conv2d_transpose]))) func = run_opt_pass(func, transform.InferType()) compute_count = analysis.get_total_mac_number(func) expect_count = batch_size * input_channel * oh * ow * output_channel * kh * kw diff --git a/tests/python/relay/test_pass_manager.py b/tests/python/relay/test_pass_manager.py index a13e5e9..aed0269 100644 --- a/tests/python/relay/test_pass_manager.py +++ b/tests/python/relay/test_pass_manager.py @@ -19,6 +19,7 @@ import numpy as np import pytest import tvm +from tvm import te from tvm import relay from tvm.relay import ExprFunctor from tvm.relay import Function, Call diff --git a/tests/python/relay/test_pass_partial_eval.py b/tests/python/relay/test_pass_partial_eval.py index 2bec98c..f54dd6b 100644 --- a/tests/python/relay/test_pass_partial_eval.py +++ b/tests/python/relay/test_pass_partial_eval.py @@ -17,6 +17,7 @@ import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import alpha_equal, assert_alpha_equal from tvm.relay.prelude import Prelude diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index 6f20278..9c3228f 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -21,6 +21,7 @@ import numpy as np import pytest import tvm +from tvm import te import tvm.relay.testing import tvm.relay.transform as transform from tvm import relay diff --git a/tests/python/relay/test_pass_qnn_legalize.py b/tests/python/relay/test_pass_qnn_legalize.py index dee19f7..7d3d9cc 100644 --- a/tests/python/relay/test_pass_qnn_legalize.py +++ b/tests/python/relay/test_pass_qnn_legalize.py @@ -17,6 +17,7 @@ """Test legalize pass""" import numpy as np import tvm +from tvm import te from tvm import relay from tvm.contrib import graph_runtime diff --git a/tests/python/relay/test_pass_remove_unused_functions.py b/tests/python/relay/test_pass_remove_unused_functions.py index bacc312..3381634 100644 --- a/tests/python/relay/test_pass_remove_unused_functions.py +++ b/tests/python/relay/test_pass_remove_unused_functions.py @@ -16,6 +16,7 @@ # under the License. import pytest import tvm +from tvm import te from tvm import relay from tvm.relay import transform from tvm.relay.prelude import Prelude diff --git a/tests/python/relay/test_pass_to_a_normal_form.py b/tests/python/relay/test_pass_to_a_normal_form.py index 46bde4f..f68f648 100644 --- a/tests/python/relay/test_pass_to_a_normal_form.py +++ b/tests/python/relay/test_pass_to_a_normal_form.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import alpha_equal, detect_feature from tvm.relay import op, create_executor, transform diff --git a/tests/python/relay/test_pass_to_cps.py b/tests/python/relay/test_pass_to_cps.py index 4645e20..fe4959e 100644 --- a/tests/python/relay/test_pass_to_cps.py +++ b/tests/python/relay/test_pass_to_cps.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import alpha_equal, detect_feature from tvm.relay.transform import to_cps, un_cps diff --git a/tests/python/relay/test_pass_to_graph_normal_form.py b/tests/python/relay/test_pass_to_graph_normal_form.py index 5c5221f..dc47ad3 100644 --- a/tests/python/relay/test_pass_to_graph_normal_form.py +++ b/tests/python/relay/test_pass_to_graph_normal_form.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay import op, create_executor, transform, Feature from tvm.relay.analysis import detect_feature diff --git a/tests/python/relay/test_pass_unmatched_cases.py b/tests/python/relay/test_pass_unmatched_cases.py index 1ac99a6..42344bc 100644 --- a/tests/python/relay/test_pass_unmatched_cases.py +++ b/tests/python/relay/test_pass_unmatched_cases.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.prelude import Prelude from tvm.relay.analysis import unmatched_cases diff --git a/tests/python/relay/test_pass_vars.py b/tests/python/relay/test_pass_vars.py index d8b77ba..1aad74b 100644 --- a/tests/python/relay/test_pass_vars.py +++ b/tests/python/relay/test_pass_vars.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay.analysis import (free_vars, free_type_vars, bound_vars, bound_type_vars, diff --git a/tests/python/relay/test_py_converter.py b/tests/python/relay/test_py_converter.py index f489e9f..f6b1b24 100644 --- a/tests/python/relay/test_py_converter.py +++ b/tests/python/relay/test_py_converter.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import relay from tvm.relay.testing import to_python, run_as_python from tvm.relay.prelude import Prelude diff --git a/tests/python/relay/test_type_functor.py b/tests/python/relay/test_type_functor.py index 854301b..9e023bc 100644 --- a/tests/python/relay/test_type_functor.py +++ b/tests/python/relay/test_type_functor.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay import TypeFunctor, TypeMutator, TypeVisitor from tvm.relay.analysis import assert_graph_equal @@ -53,7 +54,7 @@ def test_tensor_type(): def test_func_type(): tv = TypeVar('tv') - tt = relay.TensorType(tvm.convert([1, 2, 3]), 'float32') + tt = relay.TensorType(tvm.runtime.convert([1, 2, 3]), 'float32') ft = FuncType([tt], tt, type_params=[tv]) check_visit(ft) diff --git a/tests/python/relay/test_type_infer.py b/tests/python/relay/test_type_infer.py index 892c91d..74507ba 100644 --- a/tests/python/relay/test_type_infer.py +++ b/tests/python/relay/test_type_infer.py @@ -18,6 +18,7 @@ for expressions. """ import tvm +from tvm import te from tvm import relay from tvm.relay import op, transform, analysis from tvm.relay.analysis import assert_alpha_equal diff --git a/tests/python/relay/test_type_solver.py b/tests/python/relay/test_type_solver.py index 118066e..d90fd29 100644 --- a/tests/python/relay/test_type_solver.py +++ b/tests/python/relay/test_type_solver.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay import pytest diff --git a/tests/python/relay/test_typecall.py b/tests/python/relay/test_typecall.py index fa2601f..491047d 100644 --- a/tests/python/relay/test_typecall.py +++ b/tests/python/relay/test_typecall.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import relay from tvm.relay import transform diff --git a/tests/python/relay/test_vm.py b/tests/python/relay/test_vm.py index 8cac656..02f1e5b 100644 --- a/tests/python/relay/test_vm.py +++ b/tests/python/relay/test_vm.py @@ -18,6 +18,7 @@ import numpy as np import pytest import tvm +from tvm import te from tvm import runtime from tvm import relay from tvm.relay.scope_builder import ScopeBuilder diff --git a/tests/python/relay/test_vm_serialization.py b/tests/python/relay/test_vm_serialization.py index 9fed495..5d20651 100644 --- a/tests/python/relay/test_vm_serialization.py +++ b/tests/python/relay/test_vm_serialization.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm.runtime import vm as _vm from tvm.relay import vm as rly_vm from tvm import relay diff --git a/tests/python/unittest/test_arith_canonical_simplify.py b/tests/python/unittest/test_arith_canonical_simplify.py index 35822d2..b4649a4 100644 --- a/tests/python/unittest/test_arith_canonical_simplify.py +++ b/tests/python/unittest/test_arith_canonical_simplify.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te class CanonicalChecker: def __init__(self): @@ -22,26 +23,26 @@ class CanonicalChecker: def verify(self, data, expected): res = self.analyzer.canonical_simplify(data) - assert tvm.ir_pass.Equal(res, expected), "\ndata={}\nres={}\nexpected={}".format(data, res, expected) + assert tvm.tir.ir_pass.Equal(res, expected), "\ndata={}\nres={}\nexpected={}".format(data, res, expected) def test_mul_sum_simplify(): ck = CanonicalChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") ck.verify(2 + (3 * x + z + y + 1) * 4 + x, x * 13 + z * 4 + y * 4 +6) ck.verify(x * 3 - 4 * x + 1, 1 - x) ck.verify(y + x * 3 - 5 * x + 1 + y, y * 2 + 1 - x * 2) - tdiv = tvm.truncdiv - tmod = tvm.truncmod + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # trucdiv ck.verify(tdiv(x + y + x + y * 3, 2), y * 2 + x) ck.verify(tmod(x + y + x + y * 3, 2), 0) # floordiv - fld = tvm.floordiv - flm = tvm.floormod + fld = tvm.te.floordiv + flm = tvm.te.floormod ck.verify(flm(x + x + y * 3, 2), flm(y * 3, 2)) ck.verify(fld(x + y + x + y * 3, 2), y * 2 + x) ck.verify(flm(x + y + x + y * 3, 2), 0) @@ -50,11 +51,11 @@ def test_mul_sum_simplify(): def test_split_index_simplify(): ck = CanonicalChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") # trucdiv - tdiv = tvm.truncdiv - tmod = tvm.truncmod + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # split div const ck.verify(tdiv(x, 3) *3 + tmod(x, 3), x) @@ -80,8 +81,8 @@ def test_split_index_simplify(): ck.verify(tdiv(x * 4 + y, 2) * 2 + tmod(x * 4 + y, 2), x * 4 + y) # floordiv - fld = tvm.floordiv - flm = tvm.floormod + fld = tvm.te.floordiv + flm = tvm.te.floormod ck.verify(fld(x, 3) * 3 + flm(x, 3), x) ck.verify(fld(x, 6) * 6 + flm(fld(x, 3), 2) * 3 + flm(x, 3), x) ck.verify(fld(fld(flm(x, 16), 2) * 2, 4), fld(flm(x, 16), 4)) @@ -95,8 +96,8 @@ def test_split_index_simplify(): def test_div_simplify(): ck = CanonicalChecker() - x = tvm.var("x") - tdiv = tvm.truncdiv + x = te.var("x") + tdiv = tvm.tir.truncdiv # truc div ck.verify(tdiv(16+48*x,16), x*3 + 1) @@ -110,7 +111,7 @@ def test_div_simplify(): ck.verify(tdiv(17 + 47 * x, 16), tdiv(x * 47 + 17, 16)) # floordiv - fld = tvm.floordiv + fld = tvm.te.floordiv ck.analyzer.update(x, tvm.arith.ConstIntBound(-1000, 10000), True) ck.verify(fld(16+48*x, 16), x*3 + 1) ck.verify(fld(17+48*x, 16), x * 3 + 1) @@ -119,8 +120,8 @@ def test_div_simplify(): def test_floormod_simplify(): ck = CanonicalChecker() - flm = tvm.floormod - x, y = tvm.var("x"), tvm.var("y") + flm = tvm.te.floormod + x, y = te.var("x"), te.var("y") ck.verify(flm(flm((x*4) + y - 466036, 24528) - 24512, 16), flm((x*4) + y + 12, 16)) @@ -128,59 +129,59 @@ def test_floormod_simplify(): def test_canonical_mixed(): ck = CanonicalChecker() - x = tvm.var("x") - z = tvm.const(3, "int32") - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x = te.var("x") + z = tvm.tir.const(3, "int32") + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod ck.verify(tdiv(x, (z*z)) - tdiv(x, (z*z)), 0) ck.verify(tdiv(x, (z+z)) - tdiv(x, (z+z)), 0) ck.verify(x - 2 < 3, x < 5) - ck.verify(tvm.max(x, 1) - tvm.max(x, 1), 0) - ck.verify(tvm.min(x, 1) - tvm.min(x, 1), 0) + ck.verify(tvm.te.max(x, 1) - tvm.te.max(x, 1), 0) + ck.verify(tvm.te.min(x, 1) - tvm.te.min(x, 1), 0) ck.verify(x * x - x * x, 0) - fld = tvm.floordiv + fld = tvm.te.floordiv ck.verify(fld(x, (z*z)) - fld(x, (z*z)), 0) ck.verify(fld(x, (z+z)) - fld(x, (z+z)), 0) def test_reduce_combiner_simplify(): ck = CanonicalChecker() - dummy = tvm.var('dummy') - comm_reducer = tvm.comm_reducer - prod = comm_reducer(lambda x, y: x*y, lambda t0: tvm.const(1, t0)) + dummy = te.var('dummy') + comm_reducer = te.comm_reducer + prod = comm_reducer(lambda x, y: x*y, lambda t0: tvm.tir.const(1, t0)) sum_or_prod = comm_reducer( lambda x, y: tvm.tir.Select(dummy < 0, x + y, x*y), lambda t0: tvm.tir.Select(dummy < 0, - tvm.const(0, t0), tvm.const(1, t0))) + tvm.tir.const(0, t0), tvm.tir.const(1, t0))) sum_and_prod = comm_reducer( lambda x, y: (x[0] + y[0], x[1]*y[1]), - lambda t0, t1: (tvm.const(0, t0), - tvm.const(5, t0) - tvm.const(4, t0))) + lambda t0, t1: (tvm.tir.const(0, t0), + tvm.tir.const(5, t0) - tvm.tir.const(4, t0))) some_reducer1 = comm_reducer( lambda x, y: (x[0] + y[0], x[0] + y[0] + x[1] + y[1], x[0]*y[2] + y[0]*x[2], x[1] + y[2], 4.0), - lambda t0, t1, t2, t3, t4: (tvm.const(0, t0), - tvm.const(1, t1), - tvm.const(2, t2), - tvm.const(3, t3), - tvm.const(4, t4))) - - k = tvm.reduce_axis((0, 10), name="k") - A = tvm.placeholder((10,), name='A') + lambda t0, t1, t2, t3, t4: (tvm.tir.const(0, t0), + tvm.tir.const(1, t1), + tvm.tir.const(2, t2), + tvm.tir.const(3, t3), + tvm.tir.const(4, t4))) + + k = te.reduce_axis((0, 10), name="k") + A = te.placeholder((10,), name='A') # Test that SimplifyCombiner makes use of vranges ck.analyzer.update(dummy, tvm.arith.ConstIntBound(-10, -4)) - ck.verify(sum_or_prod(A[k], k), tvm.sum(A[k], k)) + ck.verify(sum_or_prod(A[k], k), te.sum(A[k], k)) ck.analyzer.update(dummy, tvm.arith.ConstIntBound(5, 9), True) ck.verify(sum_or_prod(A[k], k), prod(A[k], k)) ck.analyzer.update(dummy, tvm.arith.ConstIntBound(-10, 100), True) - ck.verify(sum_and_prod((A[k], A[10-k]), k)[0], tvm.sum(A[k], k)) + ck.verify(sum_and_prod((A[k], A[10-k]), k)[0], te.sum(A[k], k)) ck.verify(sum_and_prod((A[k], A[10-k]), k)[1], prod(A[10-k], k)) reference_simplified_sources = [[A[0]], @@ -196,72 +197,72 @@ def test_reduce_combiner_simplify(): # Check that the remaining components are the expected ones. for lhs, rhs in zip(simplified.source, reference_simplified_sources[j]): - assert tvm.ir_pass.Equal(lhs, rhs) + assert tvm.tir.ir_pass.Equal(lhs, rhs) # Test that components with side effects are not removed side_effect = lambda *xs: tvm.tir.Call("int32", "dummy", xs, tvm.tir.Call.Intrinsic, None, 0) ck.verify(sum_and_prod((A[k], side_effect(A[10-k])), k)[0], sum_and_prod((A[k], side_effect(A[10-k])), k)[0]) ck.verify(sum_and_prod((side_effect(A[k]), A[10-k]), k)[0], - tvm.sum(side_effect(A[k]), k)) + te.sum(side_effect(A[k]), k)) def test_reduce_simplify(): ck = CanonicalChecker() - k = tvm.reduce_axis((0, 10), name="k") - j = tvm.reduce_axis((-5, 3), name="j") - A = tvm.placeholder((10,), name='A') - ck.verify(tvm.sum(tvm.tir.Select(k + j < 12, k + j, 0), [k, j]), - tvm.sum(k + j, [k, j])) - ck.verify(tvm.sum(A[3], []), A[3]) + k = te.reduce_axis((0, 10), name="k") + j = te.reduce_axis((-5, 3), name="j") + A = te.placeholder((10,), name='A') + ck.verify(te.sum(tvm.tir.Select(k + j < 12, k + j, 0), [k, j]), + te.sum(k + j, [k, j])) + ck.verify(te.sum(A[3], []), A[3]) # The rule below is not typical, removed for now - ck.verify(tvm.sum(tvm.div(k, 10), k), tvm.sum(tvm.const(0, "int32"), k)) + ck.verify(te.sum(te.div(k, 10), k), te.sum(tvm.tir.const(0, "int32"), k)) def test_simplify_if_then_else(): ck = CanonicalChecker() - x = tvm.var("x") - y = tvm.var("y") - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x = te.var("x") + y = te.var("y") + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # simplification that takes condition into account. - res = tvm.if_then_else((x * 4 + y) >= 466036, - tvm.if_then_else(24512 <= tmod(((x*4) + y) - 466036, 24528), + res = tvm.tir.if_then_else((x * 4 + y) >= 466036, + tvm.tir.if_then_else(24512 <= tmod(((x*4) + y) - 466036, 24528), tmod(tmod(((x*4) + y) - 466036, 24528) -24512, 16), x), y) - res2 = tvm.if_then_else((x * 4) >= 466036 - y, - tvm.if_then_else(24512 <= tmod(((x*4) + y) - 466036, 24528), + res2 = tvm.tir.if_then_else((x * 4) >= 466036 - y, + tvm.tir.if_then_else(24512 <= tmod(((x*4) + y) - 466036, 24528), tmod(tmod(((x*4) + y) - 466036, 24528) -24512, 16), x), y) - expected = tvm.if_then_else( + expected = tvm.tir.if_then_else( tvm.tir.LE(466036, (x * 4 + y)), - tvm.if_then_else(tvm.tir.LE(24512, tmod(((x*4) + y) - 4, 24528)), + tvm.tir.if_then_else(tvm.tir.LE(24512, tmod(((x*4) + y) - 4, 24528)), tmod(((x*4) + y) - 4, 16), x), y) ck.verify(res, expected) ck.verify(res2, expected) # can only simplify if condition - res = tvm.tir.Select(tvm.all(x >= -1, y >= 0), tmod(x + y + 100, 3), tmod(x + 100, 3)) - expected = tvm.tir.Select(tvm.all(x >= -1, y >= 0), tmod(x + y + 1, 3), tmod(x + 100, 3)) + res = tvm.tir.Select(tvm.tir.all(x >= -1, y >= 0), tmod(x + y + 100, 3), tmod(x + 100, 3)) + expected = tvm.tir.Select(tvm.tir.all(x >= -1, y >= 0), tmod(x + y + 1, 3), tmod(x + 100, 3)) ck.verify(res, ck.analyzer.canonical_simplify(expected)) res = tvm.tir.Select(x >= 10, - tvm.if_then_else(tdiv(x, 3) > 2, x, 0), 0) + tvm.tir.if_then_else(tdiv(x, 3) > 2, x, 0), 0) expected = tvm.tir.Select(x >= 10, x, 0) ck.verify(res, ck.analyzer.canonical_simplify(expected)) res = tvm.tir.Select(x >= 10, - tvm.if_then_else(tdiv(x, 3) < 2, x, 0), 0) + tvm.tir.if_then_else(tdiv(x, 3) < 2, x, 0), 0) ck.verify(res, 0) def test_complex_cases(): ck = CanonicalChecker() - x = tvm.var("x") - y = tvm.var("y") - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x = te.var("x") + y = te.var("y") + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod res2 = (tdiv(tdiv(tmod(x*128 + y, 1296),36)*2 + 1,2)*36 + tdiv(tmod((x*128) + y, 36)*2 + 1,2) - tmod((x*128) + y, 1296) + 1) diff --git a/tests/python/unittest/test_arith_const_int_bound.py b/tests/python/unittest/test_arith_const_int_bound.py index aba56ac..4829b97 100644 --- a/tests/python/unittest/test_arith_const_int_bound.py +++ b/tests/python/unittest/test_arith_const_int_bound.py @@ -15,21 +15,22 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_dtype_bound(): analyzer = tvm.arith.Analyzer() - x = tvm.var("x", dtype="int64") + x = te.var("x", dtype="int64") bd = analyzer.const_int_bound(x) assert bd.min_value == bd.NEG_INF assert bd.max_value == bd.POS_INF - x = tvm.var("x", dtype="int8") + x = te.var("x", dtype="int8") bd = analyzer.const_int_bound(x) assert bd.min_value == -128 assert bd.max_value == 127 - x = tvm.var("x", dtype="uint8") + x = te.var("x", dtype="uint8") bd = analyzer.const_int_bound(x) assert bd.min_value == 0 assert bd.max_value == 255 @@ -37,8 +38,8 @@ def test_dtype_bound(): def test_cast_bound(): analyzer = tvm.arith.Analyzer() - x = tvm.var("x", dtype="int8") - tmod = tvm.truncmod + x = te.var("x", dtype="int8") + tmod = tvm.tir.truncmod bd = analyzer.const_int_bound(tmod(x, 3).astype("uint32")) assert bd.min_value == 0 assert bd.max_value == 2 @@ -51,7 +52,7 @@ def test_cast_bound(): def test_add_sub_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x", "int64"), tvm.var("y", "int64") + x, y = te.var("x", "int64"), te.var("y", "int64") bd = analyzer.const_int_bound(x + y) assert bd.min_value == bd.NEG_INF assert bd.max_value == bd.POS_INF @@ -78,7 +79,7 @@ def test_add_sub_bound(): def test_mul_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") analyzer.update(x, tvm.arith.ConstIntBound(-2, 4)) analyzer.update(y, tvm.arith.ConstIntBound(4, 10)) @@ -101,8 +102,8 @@ def test_mul_bound(): def test_truncdiv_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") - tdiv = tvm.truncdiv + x, y = te.var("x"), te.var("y") + tdiv = tvm.tir.truncdiv analyzer.update(x, tvm.arith.ConstIntBound(-9, 4)) analyzer.update(y, tvm.arith.ConstIntBound(4, 10)) @@ -124,9 +125,9 @@ def test_truncdiv_bound(): def test_truncmod_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") - tmod = tvm.truncmod + tmod = tvm.tir.truncmod analyzer.update(x, tvm.arith.ConstIntBound(-9, 4)) analyzer.update(y, tvm.arith.ConstIntBound(4, 10)) @@ -149,8 +150,8 @@ def test_truncmod_bound(): def test_floordiv_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") - fld = tvm.floordiv + x, y = te.var("x"), te.var("y") + fld = tvm.te.floordiv analyzer.update(x, tvm.arith.ConstIntBound(-9, 4)) analyzer.update(y, tvm.arith.ConstIntBound(4, 10)) bd = analyzer.const_int_bound(fld(x, y)) @@ -171,8 +172,8 @@ def test_floordiv_bound(): def test_floormod_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") - flm = tvm.floormod + x, y = te.var("x"), te.var("y") + flm = tvm.te.floormod analyzer.update(x, tvm.arith.ConstIntBound(-9, 4)) analyzer.update(y, tvm.arith.ConstIntBound(4, 10)) @@ -195,34 +196,34 @@ def test_floormod_bound(): def test_min_max_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") analyzer.update(x, tvm.arith.ConstIntBound(-9, 11)) analyzer.update(y, tvm.arith.ConstIntBound(4, 10)) - bd = analyzer.const_int_bound(tvm.min(x, y)) + bd = analyzer.const_int_bound(tvm.te.min(x, y)) assert bd.min_value == -9 assert bd.max_value == 10 analyzer.update(x, tvm.arith.ConstIntBound(bd.NEG_INF, bd.POS_INF), override=True) analyzer.update(y, tvm.arith.ConstIntBound(4, 10), override=True) - bd = analyzer.const_int_bound(tvm.min(x, y)) + bd = analyzer.const_int_bound(tvm.te.min(x, y)) assert bd.min_value == bd.NEG_INF assert bd.max_value == 10 - bd = analyzer.const_int_bound(tvm.max(x, y)) + bd = analyzer.const_int_bound(tvm.te.max(x, y)) assert bd.min_value == 4 assert bd.max_value == bd.POS_INF analyzer.update(x, tvm.arith.ConstIntBound(1, bd.POS_INF), override=True) analyzer.update(y, tvm.arith.ConstIntBound(4, 10), override=True) - bd = analyzer.const_int_bound(tvm.max(x, y)) + bd = analyzer.const_int_bound(tvm.te.max(x, y)) assert bd.min_value == 4 assert bd.max_value == bd.POS_INF def test_select_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") analyzer.update(x, tvm.arith.ConstIntBound(-9, 11)) analyzer.update(y, tvm.arith.ConstIntBound(4, 10)) @@ -235,7 +236,7 @@ def test_select_bound(): def test_shift_and_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") analyzer.update(x, tvm.arith.ConstIntBound(-9, 11)) analyzer.update(y, tvm.arith.ConstIntBound(2, 10)) @@ -256,9 +257,9 @@ def test_shift_and_bound(): def test_mix_index_bound(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x, y = te.var("x"), te.var("y") + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod analyzer.update(x, tvm.arith.ConstIntBound(0, 24 - 1)) analyzer.update(y, tvm.arith.ConstIntBound(0, 3 - 1)) @@ -277,7 +278,7 @@ def test_mix_index_bound(): def test_size_var_bound(): analyzer = tvm.arith.Analyzer() - x = tvm.size_var("x") + x = te.size_var("x") bd = analyzer.const_int_bound(x) assert bd.min_value == 0 assert bd.max_value == bd.POS_INF diff --git a/tests/python/unittest/test_arith_deduce_bound.py b/tests/python/unittest/test_arith_deduce_bound.py index 5e08635..5baabd1 100644 --- a/tests/python/unittest/test_arith_deduce_bound.py +++ b/tests/python/unittest/test_arith_deduce_bound.py @@ -15,27 +15,28 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def assert_expr_equal(a, b): - res = tvm.ir_pass.Simplify(a - b) + res = tvm.tir.ir_pass.Simplify(a - b) equal = isinstance(res, tvm.tir.IntImm) and res.value == 0 if not equal: raise ValueError("{} and {} are not equal".format(a, b)) def test_deduce(): - a = tvm.var('a') - b = tvm.var('b') - c = tvm.var('c') - d = tvm.var('d') + a = te.var('a') + b = te.var('b') + c = te.var('c') + d = te.var('d') b_s = tvm.arith.IntervalSet(2, 3) c_s = tvm.arith.IntervalSet(10, 15) d_s = tvm.arith.IntervalSet(-3, -1) - zero = tvm.const(0, "int32") + zero = tvm.tir.const(0, "int32") - fdiv = tvm.floordiv + fdiv = tvm.te.floordiv e0 = (-b)*a+c-d res0 = tvm.arith.deduce_bound(a, e0>=0, {b: b_s, c: c_s, d: d_s}, {}) @@ -68,13 +69,13 @@ def test_deduce(): assert_expr_equal(res1.max_value, ans1) - e2 = (tvm.max(5, a * 4) < 0) + e2 = (tvm.te.max(5, a * 4) < 0) res2 = tvm.arith.deduce_bound(a, e2, {b: b_s, c: c_s, d: d_s}, {}) assert str(res2.max_value) == "neg_inf" assert str(res2.min_value) == "pos_inf" # expression containing variable a is on rhs - e2 = (zero < tvm.max(5, a * 4)) + e2 = (zero < tvm.te.max(5, a * 4)) res2 = tvm.arith.deduce_bound(a, e2, {b: b_s, c: c_s, d: d_s}, {}) assert str(res2.max_value) == "neg_inf" assert str(res2.min_value) == "pos_inf" @@ -82,10 +83,10 @@ def test_deduce(): e3 = (-b)+a*c-d res3 = tvm.arith.deduce_bound(a, e3>=0, {b: b_s, c: c_s, d: d_s}, {b: b_s, d: d_s}) ans3 = fdiv(2,c)+1 - assert str(tvm.ir_pass.Simplify(res3.min_value)) == str(ans3) + assert str(tvm.tir.ir_pass.Simplify(res3.min_value)) == str(ans3) res3 = tvm.arith.deduce_bound(a, zero <= e3, {b: b_s, c: c_s, d: d_s}, {b: b_s, d: d_s}) - assert str(tvm.ir_pass.Simplify(res3.min_value)) == str(ans3) + assert str(tvm.tir.ir_pass.Simplify(res3.min_value)) == str(ans3) # tests for `EQ` op res4 = tvm.arith.deduce_bound(a, a == b, {}, {}) @@ -127,10 +128,10 @@ def test_deduce(): def test_check(): - a = tvm.var('a') - b = tvm.var('b') - c = tvm.var('c') - d = tvm.var('d') + a = te.var('a') + b = te.var('b') + c = te.var('c') + d = te.var('d') b_s = tvm.arith.IntervalSet(2, 3) c_s = tvm.arith.IntervalSet(5, 7) @@ -150,28 +151,28 @@ def test_check(): def test_deduce_basic(): def test_basic(a1, a2, coff): - a = tvm.var('a') - b = tvm.var('b') + a = te.var('a') + b = te.var('b') b_s = tvm.arith.IntervalSet(a1, a2) e0 = b + a*coff + 3 res1 = tvm.arith.deduce_bound(a, e0<17, {b: b_s}, {b: b_s}) [x, y] = [res1.max_value, b_s.max_value] if coff > 0 else [res1.min_value, b_s.min_value] - assert (tvm.ir_pass.Simplify((x * coff + 3 + y) < 17)).value == 1 + assert (tvm.tir.ir_pass.Simplify((x * coff + 3 + y) < 17)).value == 1 # expression containing variable a is on rhs - res1 = tvm.arith.deduce_bound(a, tvm.const(17, "int32") < e0, {b: b_s}, {b: b_s}) + res1 = tvm.arith.deduce_bound(a, tvm.tir.const(17, "int32") < e0, {b: b_s}, {b: b_s}) [x, y] = [res1.max_value, b_s.max_value] if coff < 0 else [res1.min_value, b_s.min_value] - assert (tvm.ir_pass.Simplify((x * coff + 3 + y) > 17)).value == 1 + assert (tvm.tir.ir_pass.Simplify((x * coff + 3 + y) > 17)).value == 1 # expression containing variable a is on rhs - res1 = tvm.arith.deduce_bound(a, tvm.const(17, "int32")>= e0, {b: b_s}, {b: b_s}) + res1 = tvm.arith.deduce_bound(a, tvm.tir.const(17, "int32")>= e0, {b: b_s}, {b: b_s}) [x, y] = [res1.max_value, b_s.max_value] if coff > 0 else [res1.min_value, b_s.min_value] - assert (tvm.ir_pass.Simplify((x * coff + 3 + y) <= 17)).value == 1 + assert (tvm.tir.ir_pass.Simplify((x * coff + 3 + y) <= 17)).value == 1 res1 = tvm.arith.deduce_bound(a, e0>=17, {b: b_s}, {b: b_s}) [x, y] = [res1.max_value, b_s.max_value] if coff < 0 else [res1.min_value, b_s.min_value] - assert (tvm.ir_pass.Simplify((x * coff + 3 + y) >= 17)).value == 1 + assert (tvm.tir.ir_pass.Simplify((x * coff + 3 + y) >= 17)).value == 1 test_basic(0, 4, 4) test_basic(1, 5, 4) @@ -182,28 +183,28 @@ def test_deduce_basic(): def test_deduce_complex(): def test_complex(a1, a2, coff): - a = tvm.var('a') - b = tvm.var('b') + a = te.var('a') + b = te.var('b') b_s = tvm.arith.IntervalSet(a1, a2) e0 = (b*3 + a* coff) * 4 res1 = tvm.arith.deduce_bound(a, e0<63, {b: b_s}, {b: b_s}) [t, x] = [res1.max_value, b_s.max_value] if coff > 0 else [res1.min_value, b_s.min_value] - assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) < 63)).value == 1 + assert (tvm.tir.ir_pass.Simplify(((x*3 + t* coff) * 4) < 63)).value == 1 # expression containing variable a is on rhs - res1 = tvm.arith.deduce_bound(a, tvm.const(63, "int32")>= e0, {b: b_s}, {b: b_s}) + res1 = tvm.arith.deduce_bound(a, tvm.tir.const(63, "int32")>= e0, {b: b_s}, {b: b_s}) [t, x] = [res1.max_value, b_s.max_value] if coff > 0 else [res1.min_value, b_s.min_value] - assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) <= 63)).value == 1 + assert (tvm.tir.ir_pass.Simplify(((x*3 + t* coff) * 4) <= 63)).value == 1 res1 = tvm.arith.deduce_bound(a, e0>63, {b: b_s}, {b: b_s}) [t, x] = [res1.max_value, b_s.max_value] if coff < 0 else [res1.min_value, b_s.min_value] - assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) > 63)).value == 1 + assert (tvm.tir.ir_pass.Simplify(((x*3 + t* coff) * 4) > 63)).value == 1 # expression containing variable a is on rhs - res1 = tvm.arith.deduce_bound(a, tvm.const(63, "int32") <= e0, {b: b_s}, {b: b_s}) + res1 = tvm.arith.deduce_bound(a, tvm.tir.const(63, "int32") <= e0, {b: b_s}, {b: b_s}) [t, x] = [res1.max_value, b_s.max_value] if coff < 0 else [res1.min_value, b_s.min_value] - assert (tvm.ir_pass.Simplify(((x*3 + t* coff) * 4) >= 63)).value == 1 + assert (tvm.tir.ir_pass.Simplify(((x*3 + t* coff) * 4) >= 63)).value == 1 test_complex(0, 4, 4) test_complex(0, 4, -4) diff --git a/tests/python/unittest/test_arith_detect_clip_bound.py b/tests/python/unittest/test_arith_detect_clip_bound.py index 44ae24c..d695371 100644 --- a/tests/python/unittest/test_arith_detect_clip_bound.py +++ b/tests/python/unittest/test_arith_detect_clip_bound.py @@ -15,22 +15,23 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_basic(): - a = tvm.var("a") - b = tvm.var("b") - c = tvm.var("c") - m = tvm.arith.detect_clip_bound(tvm.all(a * 1 < b * 6, + a = te.var("a") + b = te.var("b") + c = te.var("c") + m = tvm.arith.detect_clip_bound(tvm.tir.all(a * 1 < b * 6, a - 1 > 0), [a]) - assert tvm.ir_pass.Simplify(m[1] - (b * 6 - 1)).value == 0 + assert tvm.tir.ir_pass.Simplify(m[1] - (b * 6 - 1)).value == 0 assert m[0].value == 2 - m = tvm.arith.detect_clip_bound(tvm.all(a * 1 < b * 6, + m = tvm.arith.detect_clip_bound(tvm.tir.all(a * 1 < b * 6, a - 1 > 0), [a, b]) assert len(m) == 0 - m = tvm.arith.detect_clip_bound(tvm.all(a + 10 * c <= 20, + m = tvm.arith.detect_clip_bound(tvm.tir.all(a + 10 * c <= 20, b - 1 > 0), [a, b]) - assert tvm.ir_pass.Simplify(m[1] - (20 - 10 * c)).value == 0 - assert tvm.ir_pass.Simplify(m[2] - 2).value == 0 + assert tvm.tir.ir_pass.Simplify(m[1] - (20 - 10 * c)).value == 0 + assert tvm.tir.ir_pass.Simplify(m[2] - 2).value == 0 if __name__ == "__main__": diff --git a/tests/python/unittest/test_arith_detect_linear_equation.py b/tests/python/unittest/test_arith_detect_linear_equation.py index 3b10302..c6e6b75 100644 --- a/tests/python/unittest/test_arith_detect_linear_equation.py +++ b/tests/python/unittest/test_arith_detect_linear_equation.py @@ -15,20 +15,21 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_basic(): - a = tvm.var("a") - b = tvm.var("b") + a = te.var("a") + b = te.var("b") m = tvm.arith.detect_linear_equation(a * 4 + b * 6 + 7, [a]) assert m[0].value == 4 - assert tvm.ir_pass.Simplify(m[1] - (b * 6 + 7)).value == 0 + assert tvm.tir.ir_pass.Simplify(m[1] - (b * 6 + 7)).value == 0 m = tvm.arith.detect_linear_equation(a * 4 * (a+1) + b * 6 + 7, [a]) assert len(m) == 0 m = tvm.arith.detect_linear_equation(a * 4 + (a+1) + b * 6 + 7, [a]) assert m[0].value == 5 - assert tvm.ir_pass.Simplify(m[1] - (b * 6 + 7 + 1)).value == 0 + assert tvm.tir.ir_pass.Simplify(m[1] - (b * 6 + 7 + 1)).value == 0 m = tvm.arith.detect_linear_equation(a * b + 7, [a]) assert m[0] == b @@ -38,13 +39,13 @@ def test_basic(): m = tvm.arith.detect_linear_equation(b * 7, []) assert len(m) == 1 - assert tvm.ir_pass.Simplify(m[0] - b * 7).value == 0 + assert tvm.tir.ir_pass.Simplify(m[0] - b * 7).value == 0 def test_multivariate(): - v = [tvm.var("v%d" % i) for i in range(4)] - b = tvm.var("b") + v = [te.var("v%d" % i) for i in range(4)] + b = te.var("b") m = tvm.arith.detect_linear_equation(v[0] * (b + 4) + v[0] + v[1] * 8, v) - assert(tvm.ir_pass.Equal(tvm.ir_pass.Simplify(m[0]), b + 5)) + assert(tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.Simplify(m[0]), b + 5)) assert(m[1].value == 8) m = tvm.arith.detect_linear_equation(v[0] * (b + 4) + v[0] + v[1] * 8 * v[2], v) @@ -60,11 +61,11 @@ def test_multivariate(): m = tvm.arith.detect_linear_equation((v[0] - v[1]), [v[2]]) assert(m[0].value == 0) - assert(tvm.ir_pass.Simplify(m[1] - (v[0] - v[1])).value == 0) + assert(tvm.tir.ir_pass.Simplify(m[1] - (v[0] - v[1])).value == 0) m = tvm.arith.detect_linear_equation((v[0] - v[1]), []) assert(len(m) == 1) - assert(tvm.ir_pass.Simplify(m[0] - (v[0] - v[1])).value == 0) + assert(tvm.tir.ir_pass.Simplify(m[0] - (v[0] - v[1])).value == 0) if __name__ == "__main__": test_basic() diff --git a/tests/python/unittest/test_arith_domain_touched.py b/tests/python/unittest/test_arith_domain_touched.py index 7876fb6..0d769aa 100644 --- a/tests/python/unittest/test_arith_domain_touched.py +++ b/tests/python/unittest/test_arith_domain_touched.py @@ -15,14 +15,15 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_domain_touched(): - i = tvm.var('i') - j = tvm.var('j') - n = tvm.convert(100) - m = tvm.var('m') - a = tvm.placeholder((n, m), name = 'a') - b = tvm.placeholder((n, m), name = 'b') + i = te.var('i') + j = te.var('j') + n = tvm.runtime.convert(100) + m = te.var('m') + a = te.placeholder((n, m), name = 'a') + b = te.placeholder((n, m), name = 'b') ir = tvm.tir.For( i, 0, n, 0, 0, tvm.tir.For(j, 0, m, 0, 0, diff --git a/tests/python/unittest/test_arith_intset.py b/tests/python/unittest/test_arith_intset.py index dad2fa7..8352d9c 100644 --- a/tests/python/unittest/test_arith_intset.py +++ b/tests/python/unittest/test_arith_intset.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te class IntSetChecker: @@ -27,7 +28,7 @@ class IntSetChecker: return "\ndata={}\ndmap={}\nres={}\nexpected={}".format(data, dmap, res, expected) def equal(x, y): res = self.analyzer.canonical_simplify(x - y) - return tvm.ir_pass.Equal(res, 0) + return tvm.tir.ir_pass.Equal(res, 0) assert equal(res.min_value, expected[0]), err_msg() assert equal(res.max_value, expected[1]), err_msg() @@ -52,7 +53,7 @@ def test_vector(): def test_add_sub(): ck = IntSetChecker() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") ck.verify(x + y, {x : tvm.arith.IntervalSet(0, 10)}, (y, 10 + y)) ck.verify(x + y, {x : tvm.arith.IntervalSet(0, 10), y : tvm.arith.IntervalSet(1, 11)}, @@ -63,9 +64,9 @@ def test_add_sub(): def test_mul_div(): ck = IntSetChecker() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") - tdiv = tvm.truncdiv + tdiv = tvm.tir.truncdiv ck.analyzer.update(y, tvm.arith.ConstIntBound(1, 100), override=True) ck.verify(x * y, {x : tvm.arith.IntervalSet(0, 10)}, (0, 10 * y)) ck.verify(x * 2, {x : tvm.arith.IntervalSet(1, 10)}, (2, 20)) @@ -74,35 +75,35 @@ def test_mul_div(): ck.verify(tdiv(x, y), {x : tvm.arith.IntervalSet(0, 10)}, (0, tdiv(10, y))) ck.verify(tdiv(x, 2), {x : tvm.arith.IntervalSet(1, 10)}, (0, 5)) - fld = tvm.floordiv + fld = tvm.te.floordiv ck.verify(fld(x, y), {x : tvm.arith.IntervalSet(0, 10)}, (0, fld(10, y))) ck.verify(fld(x, 2), {x : tvm.arith.IntervalSet(-1, 10)}, (-1, 5)) def test_mod(): ck = IntSetChecker() - x, y = tvm.var("x"), tvm.var("y") - tmod = tvm.truncmod + x, y = te.var("x"), te.var("y") + tmod = tvm.tir.truncmod ck.analyzer.update(y, tvm.arith.ConstIntBound(1, 100), override=True) ck.verify(tmod(x, y), {x : tvm.arith.IntervalSet(0, 10)}, (0, y - 1)) ck.verify(tmod(x, 10), {x : tvm.arith.IntervalSet(1, 10)}, (0, 9)) - flm = tvm.floormod + flm = tvm.te.floormod ck.verify(flm(x, 10), {x : tvm.arith.IntervalSet(-10, 10)}, (0, 9)) def test_max_min(): ck = IntSetChecker() - x, y = tvm.var("x"), tvm.var("y") - ck.verify(tvm.max(x, x + 1), {x : tvm.arith.IntervalSet(0, 10)}, (1, 11)) - ck.verify(tvm.min(x - 1, x + 1), {x : tvm.arith.IntervalSet(0, 10)}, (-1, 9)) - ck.verify(tvm.min(x, y), {}, (tvm.min(x, y), tvm.min(x, y))) - ck.verify(tvm.max(x, y), {}, (tvm.max(x, y), tvm.max(x, y))) + x, y = te.var("x"), te.var("y") + ck.verify(tvm.te.max(x, x + 1), {x : tvm.arith.IntervalSet(0, 10)}, (1, 11)) + ck.verify(tvm.te.min(x - 1, x + 1), {x : tvm.arith.IntervalSet(0, 10)}, (-1, 9)) + ck.verify(tvm.te.min(x, y), {}, (tvm.te.min(x, y), tvm.te.min(x, y))) + ck.verify(tvm.te.max(x, y), {}, (tvm.te.max(x, y), tvm.te.max(x, y))) def test_select(): ck = IntSetChecker() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") ck.verify(tvm.tir.Select(x > 0, x - 1, x + 1), {x : tvm.arith.IntervalSet(0, 10)}, (-1, 11)) diff --git a/tests/python/unittest/test_arith_modular_set.py b/tests/python/unittest/test_arith_modular_set.py index 6bb86e4..01180d2 100644 --- a/tests/python/unittest/test_arith_modular_set.py +++ b/tests/python/unittest/test_arith_modular_set.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_cast(): analyzer = tvm.arith.Analyzer() - x = tvm.var("x", dtype="int8") + x = te.var("x", dtype="int8") m = analyzer.modular_set((x * 3).astype("uint32")) assert m.coeff == 3 assert m.base == 0 @@ -31,7 +32,7 @@ def test_cast(): def test_add_sub(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x", "int64"), tvm.var("y", "int64") + x, y = te.var("x", "int64"), te.var("y", "int64") m = analyzer.modular_set(x * 6 + y * 4) assert m.coeff == 2 assert m.base == 0 @@ -44,7 +45,7 @@ def test_add_sub(): def test_mul(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") m = analyzer.modular_set((x * 4 + 2) * (y * 6 + 1)) assert m.coeff == 4 assert m.base == 2 @@ -52,9 +53,9 @@ def test_mul(): def test_div_shift(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") # not sure if x is non-negative - tdiv = tvm.truncdiv + tdiv = tvm.tir.truncdiv m = analyzer.modular_set(tdiv(x * 4 + 2, 2)) assert m.coeff == 1 assert m.base == 0 @@ -62,7 +63,7 @@ def test_div_shift(): m = analyzer.modular_set((x * 4 + 2) >> 1) assert m.coeff == 2 assert m.base == 1 - fld = tvm.floordiv + fld = tvm.te.floordiv m = analyzer.modular_set(fld(x * 4 + 2, 2)) assert m.coeff == 2 assert m.base == 1 @@ -75,12 +76,12 @@ def test_div_shift(): def test_min_max_select(): analyzer = tvm.arith.Analyzer() - x, y = tvm.var("x"), tvm.var("y") - m = analyzer.modular_set(tvm.min(x * 3, y * 9)) + x, y = te.var("x"), te.var("y") + m = analyzer.modular_set(tvm.te.min(x * 3, y * 9)) assert m.coeff == 3 assert m.base == 0 - m = analyzer.modular_set(tvm.max(x * 3 + 1, y * 9 + 4)) + m = analyzer.modular_set(tvm.te.max(x * 3 + 1, y * 9 + 4)) assert m.coeff == 3 assert m.base == 1 @@ -90,10 +91,10 @@ def test_min_max_select(): def test_mix_index(): - a = tvm.var("a") - b = tvm.var("b") + a = te.var("a") + b = te.var("b") analyzer = tvm.arith.Analyzer() - tdiv = tvm.truncdiv + tdiv = tvm.tir.truncdiv m = analyzer.modular_set(a * 4 + b * 6 + 7) assert m.coeff == 2 assert m.base == 1 @@ -114,16 +115,16 @@ def test_mix_index(): assert m.coeff == 3 assert m.base == 2 - m = analyzer.modular_set(a * 12 + tvm.min(b * 3 * 7, 2)) + m = analyzer.modular_set(a * 12 + tvm.te.min(b * 3 * 7, 2)) assert m.coeff == 1 assert m.base == 0 def test_constraint_scope(): - a = tvm.var("a") - b = tvm.var("b") + a = te.var("a") + b = te.var("b") analyzer = tvm.arith.Analyzer() - tmod = tvm.truncmod + tmod = tvm.tir.truncmod with analyzer.constraint_scope(tmod(b, 4) == 2): m = analyzer.modular_set(b + 1) @@ -142,9 +143,9 @@ def test_constraint_scope(): assert m.base == 0 def test_intersect(): - a = tvm.var("a") + a = te.var("a") analyzer = tvm.arith.Analyzer() - tmod = tvm.truncmod + tmod = tvm.tir.truncmod with analyzer.constraint_scope(tmod(a, 4) == 1): with analyzer.constraint_scope(tmod(a, 3) == 1): m = analyzer.modular_set(a) diff --git a/tests/python/unittest/test_arith_rewrite_simplify.py b/tests/python/unittest/test_arith_rewrite_simplify.py index 84560e8..c8c3b0b 100644 --- a/tests/python/unittest/test_arith_rewrite_simplify.py +++ b/tests/python/unittest/test_arith_rewrite_simplify.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te class RewriteChecker: def __init__(self): @@ -22,12 +23,12 @@ class RewriteChecker: def verify(self, data, expected): res = self.analyzer.rewrite_simplify(data) - assert tvm.ir_pass.Equal(res, expected), "data={}, res={}, expected={}".format(data, res, expected) + assert tvm.tir.ir_pass.Equal(res, expected), "data={}, res={}, expected={}".format(data, res, expected) def test_vector_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") # Add rules ck.verify(tvm.tir.Ramp(x, 1, 4) + tvm.tir.Ramp(y, 2, 4), tvm.tir.Ramp(x + y, 3, 4)) @@ -56,8 +57,8 @@ def test_vector_simplify(): tvm.tir.Ramp(x * 2, 8, 4)) ## DivMod rules - tdiv = tvm.truncdiv - tmod = tvm.truncmod + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # truc div ck.verify(tdiv(y.astype("int32x2"), x.astype("int32x2")), tdiv(y, x).astype("int32x2")) @@ -78,8 +79,8 @@ def test_vector_simplify(): tmod(tvm.tir.Ramp(1, 15, 4), 8)) # floor div - fld = tvm.floordiv - flm = tvm.floormod + fld = tvm.te.floordiv + flm = tvm.te.floormod ck.analyzer.update(x, tvm.arith.ConstIntBound(-10, 1000), override=True) ck.verify(fld(y.astype("int32x2"), x.astype("int32x2")), fld(y, x).astype("int32x2")) @@ -99,16 +100,16 @@ def test_vector_simplify(): flm(tvm.tir.Ramp(1, 15, 4), 8)) # Min/Max rules - vx = tvm.var("vx", dtype="int32x2") - vc = tvm.var("vc", dtype="uint1") - ck.verify(tvm.min(y.astype("int32x2"), x.astype("int32x2")), - tvm.min(y, x).astype("int32x2")) - ck.verify(tvm.min(tvm.min(vx, y.astype("int32x2")), x.astype("int32x2")), - tvm.min(vx, tvm.min(y, x).astype("int32x2"))) - ck.verify(tvm.max(y.astype("int32x2"), x.astype("int32x2")), - tvm.max(y, x).astype("int32x2")) - ck.verify(tvm.max(tvm.max(vx, y.astype("int32x2")), x.astype("int32x2")), - tvm.max(vx, tvm.max(y, x).astype("int32x2"))) + vx = te.var("vx", dtype="int32x2") + vc = te.var("vc", dtype="uint1") + ck.verify(tvm.te.min(y.astype("int32x2"), x.astype("int32x2")), + tvm.te.min(y, x).astype("int32x2")) + ck.verify(tvm.te.min(tvm.te.min(vx, y.astype("int32x2")), x.astype("int32x2")), + tvm.te.min(vx, tvm.te.min(y, x).astype("int32x2"))) + ck.verify(tvm.te.max(y.astype("int32x2"), x.astype("int32x2")), + tvm.te.max(y, x).astype("int32x2")) + ck.verify(tvm.te.max(tvm.te.max(vx, y.astype("int32x2")), x.astype("int32x2")), + tvm.te.max(vx, tvm.te.max(y, x).astype("int32x2"))) ## Logical rules ck.verify(y.astype("int32x2").equal(x.astype("int32x2")), @@ -131,7 +132,7 @@ def test_vector_simplify(): def test_select_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") # Add rules ck.verify(tvm.tir.Select(x < 0, y, 0) + tvm.tir.Select(x < 0, 1, z), tvm.tir.Select(x < 0, y + 1, z)) @@ -141,10 +142,10 @@ def test_select_simplify(): tvm.tir.Select(x < 0, 0, z - y)) ck.verify(tvm.tir.Select(x < 0, y, z) - z, tvm.tir.Select(x < 0, y - z, 0)) - ck.verify(tvm.min(tvm.tir.Select(x < 0, y, 0), tvm.tir.Select(x < 0, 1, z)), - tvm.tir.Select(x < 0, tvm.min(y, 1), tvm.min(0, z))) - ck.verify(tvm.max(tvm.tir.Select(x < 0, y, 0), tvm.tir.Select(x < 0, 1, z)), - tvm.tir.Select(x < 0, tvm.max(y, 1), tvm.max(0, z))) + ck.verify(tvm.te.min(tvm.tir.Select(x < 0, y, 0), tvm.tir.Select(x < 0, 1, z)), + tvm.tir.Select(x < 0, tvm.te.min(y, 1), tvm.te.min(0, z))) + ck.verify(tvm.te.max(tvm.tir.Select(x < 0, y, 0), tvm.tir.Select(x < 0, 1, z)), + tvm.tir.Select(x < 0, tvm.te.max(y, 1), tvm.te.max(0, z))) ck.verify(tvm.tir.Select(x * 3 + 1 != 0, y, z), y) ck.verify(tvm.tir.Select(x * 3 + 1 == 0, y, z), z) @@ -153,30 +154,30 @@ def test_select_simplify(): def test_add_index_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") ck.verify(x + (y - x), y) ck.verify(x - (y + 1) + (y + 1), x) ck.verify((x - 10) + (10 - z), x - z) ck.verify((x - y) + (z - x), z - y) - ck.verify(tvm.min(x, y - z) + z, tvm.min(x + z, y)) - ck.verify(tvm.min(x - z, y) + z, tvm.min(x, y + z)) - ck.verify(tvm.max(x, y - 10) + 10, tvm.max(x + 10, y)) - ck.verify(tvm.max(x - 11, y) + 11, tvm.max(x, y + 11)) + ck.verify(tvm.te.min(x, y - z) + z, tvm.te.min(x + z, y)) + ck.verify(tvm.te.min(x - z, y) + z, tvm.te.min(x, y + z)) + ck.verify(tvm.te.max(x, y - 10) + 10, tvm.te.max(x + 10, y)) + ck.verify(tvm.te.max(x - 11, y) + 11, tvm.te.max(x, y + 11)) - ck.verify(tvm.max(x, y * 2) + tvm.min(x, y * 2), x + y * 2); - ck.verify(tvm.min(x, y * 2) + tvm.max(x, y * 2), x + y * 2); + ck.verify(tvm.te.max(x, y * 2) + tvm.te.min(x, y * 2), x + y * 2); + ck.verify(tvm.te.min(x, y * 2) + tvm.te.max(x, y * 2), x + y * 2); - ck.verify(tvm.max(x, y + 2) + (-2), tvm.max(x + (-2), y)); - ck.verify(tvm.min(x, y + 2) + (-2), tvm.min(x + (-2), y)); - ck.verify(tvm.min(x + 2, y + 3) + (-2), tvm.min(x, y + 1)); + ck.verify(tvm.te.max(x, y + 2) + (-2), tvm.te.max(x + (-2), y)); + ck.verify(tvm.te.min(x, y + 2) + (-2), tvm.te.min(x + (-2), y)); + ck.verify(tvm.te.min(x + 2, y + 3) + (-2), tvm.te.min(x, y + 1)); - ck.verify(tvm.max(0, 1 - x * 4) + x * 4, tvm.max(x * 4, 1)) - ck.verify(tvm.max(2 - x * 4, 0) + x * 4, tvm.max(x * 4, 2)) + ck.verify(tvm.te.max(0, 1 - x * 4) + x * 4, tvm.te.max(x * 4, 1)) + ck.verify(tvm.te.max(2 - x * 4, 0) + x * 4, tvm.te.max(x * 4, 2)) - ck.verify(tvm.min(0, 1 - x * 4) + x * 4, tvm.min(x * 4, 1)) - ck.verify(tvm.min(2 - x * 4, 0) + x * 4, tvm.min(x * 4, 2)) + ck.verify(tvm.te.min(0, 1 - x * 4) + x * 4, tvm.te.min(x * 4, 1)) + ck.verify(tvm.te.min(2 - x * 4, 0) + x * 4, tvm.te.min(x * 4, 2)) ck.verify(x * y + x * 10, x * (y + 10)) ck.verify(y * x + x * 10, x * (y + 10)) @@ -189,16 +190,16 @@ def test_add_index_simplify(): ck.verify(x + 2 + 3 + 4 + x * 3, x * 4 + 9); # DivMod rules - tdiv = tvm.truncdiv - tmod = tvm.truncmod + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # truc div ck.verify(y * tmod(x, 8) + 10 * tmod(x, 8), tmod(x, 8) * (y + 10)) ck.analyzer.update(x, tvm.arith.ConstIntBound(-1, 1000), override=True) ck.verify(tdiv(x, 8) * 8 + tmod(x, 8), x) # floor div - fld = tvm.floordiv - flm = tvm.floormod + fld = tvm.te.floordiv + flm = tvm.te.floormod ck.verify(y * flm(x, 8) + 10 * flm(x, 8), flm(x, 8) * (y + 10)) ck.verify(fld(x, 8) * 8 + flm(x, 8), x) @@ -206,22 +207,22 @@ def test_add_index_simplify(): def test_sub_index_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") ck.verify(x + y - y, x) ck.verify(x + y - x, y) ck.verify(x - (y + x), 0 - y) ck.verify(x - (x + y), 0 - y) - ck.verify(tvm.min(x, y) - x, tvm.min(0, y - x)) - ck.verify(tvm.min(x, y) - y, tvm.min(x - y, 0)) - ck.verify(tvm.max(x, y) - x, tvm.max(0, y - x)) - ck.verify(tvm.max(x, y) - y, tvm.max(x - y, 0)) + ck.verify(tvm.te.min(x, y) - x, tvm.te.min(0, y - x)) + ck.verify(tvm.te.min(x, y) - y, tvm.te.min(x - y, 0)) + ck.verify(tvm.te.max(x, y) - x, tvm.te.max(0, y - x)) + ck.verify(tvm.te.max(x, y) - y, tvm.te.max(x - y, 0)) - ck.verify(x - tvm.min(x, y), tvm.max(0, x - y)) - ck.verify(y - tvm.min(x, y), tvm.max(y - x, 0)) - ck.verify(x - tvm.max(x, y), tvm.min(0, x - y)) - ck.verify(y - tvm.max(x, y), tvm.min(y - x, 0)) + ck.verify(x - tvm.te.min(x, y), tvm.te.max(0, x - y)) + ck.verify(y - tvm.te.min(x, y), tvm.te.max(y - x, 0)) + ck.verify(x - tvm.te.max(x, y), tvm.te.min(0, x - y)) + ck.verify(y - tvm.te.max(x, y), tvm.te.min(y - x, 0)) # mul co-efficient foldng ck.verify(x - x, 0) @@ -238,30 +239,30 @@ def test_sub_index_simplify(): ck.verify((x + y) - (z + x), y - z) ck.verify((y + x) - (z + x), y - z) - ck.verify(tvm.min(x + y, z) - x, tvm.min(y, z - x)) - ck.verify(tvm.min(y + x, z) - x, tvm.min(y, z - x)) - ck.verify(tvm.min(z, x + y) - x, tvm.min(z - x, y)) - ck.verify(tvm.min(z, y + x) - x, tvm.min(z - x, y)) + ck.verify(tvm.te.min(x + y, z) - x, tvm.te.min(y, z - x)) + ck.verify(tvm.te.min(y + x, z) - x, tvm.te.min(y, z - x)) + ck.verify(tvm.te.min(z, x + y) - x, tvm.te.min(z - x, y)) + ck.verify(tvm.te.min(z, y + x) - x, tvm.te.min(z - x, y)) - ck.verify(tvm.max(x + y, z) - x, tvm.max(y, z - x)) - ck.verify(tvm.max(y + x, z) - x, tvm.max(y, z - x)) - ck.verify(tvm.max(z, x + y) - x, tvm.max(z - x, y)) - ck.verify(tvm.max(z, y + x) - x, tvm.max(z - x, y)) + ck.verify(tvm.te.max(x + y, z) - x, tvm.te.max(y, z - x)) + ck.verify(tvm.te.max(y + x, z) - x, tvm.te.max(y, z - x)) + ck.verify(tvm.te.max(z, x + y) - x, tvm.te.max(z - x, y)) + ck.verify(tvm.te.max(z, y + x) - x, tvm.te.max(z - x, y)) - ck.verify(x - tvm.min(x + y, z), tvm.max(0 - y, x - z)) - ck.verify(x - tvm.min(y + x, z), tvm.max(0 - y, x - z)) - ck.verify(x - tvm.min(z, x + y), tvm.max(x - z, 0 - y)) - ck.verify(x - tvm.min(z, y + x), tvm.max(x - z, 0 - y)) + ck.verify(x - tvm.te.min(x + y, z), tvm.te.max(0 - y, x - z)) + ck.verify(x - tvm.te.min(y + x, z), tvm.te.max(0 - y, x - z)) + ck.verify(x - tvm.te.min(z, x + y), tvm.te.max(x - z, 0 - y)) + ck.verify(x - tvm.te.min(z, y + x), tvm.te.max(x - z, 0 - y)) - ck.verify(tvm.min(x, y) - tvm.min(y, x), 0) - ck.verify(tvm.max(x, y) - tvm.max(y, x), 0) - ck.verify(tvm.min(x, y) - tvm.min(x + 10, y + 10), -10) - ck.verify(tvm.min(x + 10, y + 1) - tvm.min(x, y - 9), 10) + ck.verify(tvm.te.min(x, y) - tvm.te.min(y, x), 0) + ck.verify(tvm.te.max(x, y) - tvm.te.max(y, x), 0) + ck.verify(tvm.te.min(x, y) - tvm.te.min(x + 10, y + 10), -10) + ck.verify(tvm.te.min(x + 10, y + 1) - tvm.te.min(x, y - 9), 10) # DivMod patterns # truc div - tdiv = tvm.truncdiv - tmod = tvm.truncmod + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 1000), override=True) ck.verify(x - tdiv(x, 3) * 3, tmod(x, 3)) @@ -289,8 +290,8 @@ def test_sub_index_simplify(): ck.verify(tdiv(y - z, 3) * 6 - 2 * y, (0 - tmod(y - z, 3) - z) * 2) # floor div - fld = tvm.floordiv - flm = tvm.floormod + fld = tvm.te.floordiv + flm = tvm.te.floormod ck.analyzer.update(x, tvm.arith.ConstIntBound(-1000, 1000), override=True) ck.analyzer.update(y, tvm.arith.ConstIntBound(-1000, 1000), override=True) ck.verify(x - fld(x, 3) * 3, flm(x, 3)) @@ -318,19 +319,19 @@ def test_sub_index_simplify(): def test_mul_index_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") ck.verify((x + 2) * 3, x * 3 + 6) ck.verify((x * 2) * 3, x * 6) - ck.verify(tvm.min(x, y) * tvm.max(x, y), x * y) - ck.verify(tvm.max(x, y) * tvm.min(x, y), x * y) + ck.verify(tvm.te.min(x, y) * tvm.te.max(x, y), x * y) + ck.verify(tvm.te.max(x, y) * tvm.te.min(x, y), x * y) ck.verify((x - y) * (-2), (y - x) * 2) def test_div_index_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x, y, z = te.var("x"), te.var("y"), te.var("z") + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod ck.verify(tdiv(x, x), 1) ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 1000), override=True) @@ -343,12 +344,12 @@ def test_div_index_simplify(): ck.verify(tdiv(x * 4, 2), x * 2) ck.verify(tdiv(x * 4 + y, 2), x * 2 + tdiv(y, 2)) - ck.verify(tdiv(tvm.min(x * 6, y), 2), tvm.min(x * 3, tdiv(y, 2))) - ck.verify(tdiv(tvm.max(x * 6, y), 2), tvm.max(x * 3, tdiv(y, 2))) + ck.verify(tdiv(tvm.te.min(x * 6, y), 2), tvm.te.min(x * 3, tdiv(y, 2))) + ck.verify(tdiv(tvm.te.max(x * 6, y), 2), tvm.te.max(x * 3, tdiv(y, 2))) ck.verify(tdiv(y + x * 4, 2), tdiv(y, 2) + x * 2) - ck.verify(tdiv(tvm.min(y, x * 6), 2), tvm.min(tdiv(y, 2), x * 3)) - ck.verify(tdiv(tvm.max(y, x * 6), 2), tvm.max(tdiv(y, 2), x * 3)) + ck.verify(tdiv(tvm.te.min(y, x * 6), 2), tvm.te.min(tdiv(y, 2), x * 3)) + ck.verify(tdiv(tvm.te.max(y, x * 6), 2), tvm.te.max(tdiv(y, 2), x * 3)) # 3-operands ck.verify(tdiv(x * 6 + y + z, 2), x * 3 + tdiv(y + z, 2)) @@ -375,9 +376,9 @@ def test_div_index_simplify(): def test_floordiv_index_simplify(): # short name for floordiv - fld = tvm.floordiv + fld = tvm.te.floordiv ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") ck.verify(fld(fld(x, 2), 3), fld(x, 6)) ck.verify(fld(fld(x, 2) + 1, 3), fld(x + 2, 6)) @@ -386,12 +387,12 @@ def test_floordiv_index_simplify(): ck.verify(fld(x * 4, 2), x * 2) ck.verify(fld(x * 4 + y, 2), x * 2 + fld(y, 2)) - ck.verify(fld(tvm.min(x * 6, y), 2), tvm.min(x * 3, fld(y, 2))) - ck.verify(fld(tvm.max(x * 6, y), 2), tvm.max(x * 3, fld(y, 2))) + ck.verify(fld(tvm.te.min(x * 6, y), 2), tvm.te.min(x * 3, fld(y, 2))) + ck.verify(fld(tvm.te.max(x * 6, y), 2), tvm.te.max(x * 3, fld(y, 2))) ck.verify(fld(y + x * 4, 2), fld(y, 2) + x * 2) - ck.verify(fld(tvm.min(y, x * 6), 2), tvm.min(fld(y, 2), x * 3)) - ck.verify(fld(tvm.max(y, x * 6), 2), tvm.max(fld(y, 2), x * 3)) + ck.verify(fld(tvm.te.min(y, x * 6), 2), tvm.te.min(fld(y, 2), x * 3)) + ck.verify(fld(tvm.te.max(y, x * 6), 2), tvm.te.max(fld(y, 2), x * 3)) # 3-operands ck.verify(fld(x * 6 + y + z, 2), x * 3 + fld(y + z, 2)) @@ -420,13 +421,13 @@ def test_floordiv_index_simplify(): def test_mod_index_simplify(): ck = RewriteChecker() - x, y, nx, ny, z = tvm.var("x"), tvm.var("y"), tvm.var("nx"), tvm.var("ny"), tvm.var("z") + x, y, nx, ny, z = te.var("x"), te.var("y"), te.var("nx"), te.var("ny"), te.var("z") ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 1000), override=True) ck.analyzer.update(y, tvm.arith.ConstIntBound(0, 1000), override=True) ck.analyzer.update(nx, tvm.arith.ConstIntBound(-1000, 0), override=True) ck.analyzer.update(ny, tvm.arith.ConstIntBound(-1000, 0), override=True) - tdiv = tvm.truncdiv - tmod = tvm.truncmod + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod ck.verify(tmod(x * 10, 2), 0) ck.verify(tmod(x * 10 + y, 2), tmod(y, 2)) @@ -456,11 +457,11 @@ def test_mod_index_simplify(): def test_floormod_index_simplify(): # short name for floordiv - flm = tvm.floormod + flm = tvm.te.floormod ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") ck = RewriteChecker() - x, y, nx, ny, z = tvm.var("x"), tvm.var("y"), tvm.var("nx"), tvm.var("ny"), tvm.var("z") + x, y, nx, ny, z = te.var("x"), te.var("y"), te.var("nx"), te.var("ny"), te.var("z") ck.verify(flm(x * 10, 2), 0) ck.verify(flm(x * 10 + y, 2), flm(y, 2)) @@ -475,172 +476,172 @@ def test_floormod_index_simplify(): def test_min_index_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") - fld = tvm.floordiv - flm = tvm.floormod - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x, y, z = te.var("x"), te.var("y"), te.var("z") + fld = tvm.te.floordiv + flm = tvm.te.floormod + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # const int bound - ck.verify(tvm.min(tmod(x, 2), tmod(y, 2) + 10), tmod(x, 2)) - ck.verify(tvm.min(flm(x, 2), flm(y, 2) + 10), flm(x, 2)) - - ck.verify(tvm.min(x + 1, x + 10), x + 1) - ck.verify(tvm.min(x + 111, x + 10), x + 10) - ck.verify(tvm.min(x + 1, x), x) - ck.verify(tvm.min(x, x + 2), x) - ck.verify(tvm.min(1 - x, 2 - x), 1 - x) - ck.verify(tvm.min(3 - x, 2 - x), 2 - x) - - ck.verify(tvm.min(tvm.max(x, y), tvm.min(x, y)), tvm.min(x, y)) - ck.verify(tvm.min(tvm.max(x, y), tvm.min(y, x)), tvm.min(x, y)) - - ck.verify(tvm.min(tvm.max(x, y), x), x) - ck.verify(tvm.min(tvm.max(y, x), x), x) - ck.verify(tvm.min(tvm.min(x, y), x), tvm.min(x, y)) - ck.verify(tvm.min(tvm.min(x, y), y), tvm.min(x, y)) - - ck.verify(tvm.min(x, tvm.max(x, y)), x) - ck.verify(tvm.min(x, tvm.max(y, x)), x) - ck.verify(tvm.min(x, tvm.min(x, y)), tvm.min(x, y)) - ck.verify(tvm.min(y, tvm.min(x, y)), tvm.min(x, y)) - - ck.verify(tvm.min(tvm.min(tvm.min(x, y), z), y), - tvm.min(tvm.min(x, y), z)) - ck.verify(tvm.min(tvm.min(tvm.min(tvm.min(x, y), z), x * 2), y), - tvm.min(tvm.min(tvm.min(x, y), z), x * 2)) - ck.verify(tvm.min(tvm.min(tvm.min(tvm.min(tvm.min(x, y), z), x * 2), z * 2), y), - tvm.min(tvm.min(tvm.min(tvm.min(x, y), z), x * 2), z * 2)) - - ck.verify(tvm.min(tvm.max(x, y), tvm.max(x, z)), tvm.max(tvm.min(y, z), x)) - ck.verify(tvm.min(tvm.max(x, y), tvm.max(z, x)), tvm.max(tvm.min(y, z), x)) - ck.verify(tvm.min(tvm.max(y, x), tvm.max(x, z)), tvm.max(tvm.min(y, z), x)) - ck.verify(tvm.min(tvm.max(y, x), tvm.max(z, x)), tvm.max(tvm.min(y, z), x)) - - ck.verify(tvm.min(y + x, z + x), tvm.min(y, z) + x) - ck.verify(tvm.min(y + x, x + z), tvm.min(y, z) + x) - ck.verify(tvm.min(x + y, z + x), tvm.min(y, z) + x) - ck.verify(tvm.min(x + y, x + z), tvm.min(y, z) + x) - - ck.verify(tvm.min(x - y, x - z), x - tvm.max(y, z)) - ck.verify(tvm.min(y - x, z - x), tvm.min(y, z) - x) - - ck.verify(tvm.min(tvm.min(x, 1), 10), tvm.min(x, 1)) - ck.verify(tvm.min(tvm.min(x, 11), 10), tvm.min(x, 10)) - - ck.verify(tvm.min(x * 3, 9), tvm.min(x, 3) * 3) - ck.verify(tvm.min(3 - x, 2), 3 - tvm.max(x, 1)) + ck.verify(tvm.te.min(tmod(x, 2), tmod(y, 2) + 10), tmod(x, 2)) + ck.verify(tvm.te.min(flm(x, 2), flm(y, 2) + 10), flm(x, 2)) + + ck.verify(tvm.te.min(x + 1, x + 10), x + 1) + ck.verify(tvm.te.min(x + 111, x + 10), x + 10) + ck.verify(tvm.te.min(x + 1, x), x) + ck.verify(tvm.te.min(x, x + 2), x) + ck.verify(tvm.te.min(1 - x, 2 - x), 1 - x) + ck.verify(tvm.te.min(3 - x, 2 - x), 2 - x) + + ck.verify(tvm.te.min(tvm.te.max(x, y), tvm.te.min(x, y)), tvm.te.min(x, y)) + ck.verify(tvm.te.min(tvm.te.max(x, y), tvm.te.min(y, x)), tvm.te.min(x, y)) + + ck.verify(tvm.te.min(tvm.te.max(x, y), x), x) + ck.verify(tvm.te.min(tvm.te.max(y, x), x), x) + ck.verify(tvm.te.min(tvm.te.min(x, y), x), tvm.te.min(x, y)) + ck.verify(tvm.te.min(tvm.te.min(x, y), y), tvm.te.min(x, y)) + + ck.verify(tvm.te.min(x, tvm.te.max(x, y)), x) + ck.verify(tvm.te.min(x, tvm.te.max(y, x)), x) + ck.verify(tvm.te.min(x, tvm.te.min(x, y)), tvm.te.min(x, y)) + ck.verify(tvm.te.min(y, tvm.te.min(x, y)), tvm.te.min(x, y)) + + ck.verify(tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), y), + tvm.te.min(tvm.te.min(x, y), z)) + ck.verify(tvm.te.min(tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), x * 2), y), + tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), x * 2)) + ck.verify(tvm.te.min(tvm.te.min(tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), x * 2), z * 2), y), + tvm.te.min(tvm.te.min(tvm.te.min(tvm.te.min(x, y), z), x * 2), z * 2)) + + ck.verify(tvm.te.min(tvm.te.max(x, y), tvm.te.max(x, z)), tvm.te.max(tvm.te.min(y, z), x)) + ck.verify(tvm.te.min(tvm.te.max(x, y), tvm.te.max(z, x)), tvm.te.max(tvm.te.min(y, z), x)) + ck.verify(tvm.te.min(tvm.te.max(y, x), tvm.te.max(x, z)), tvm.te.max(tvm.te.min(y, z), x)) + ck.verify(tvm.te.min(tvm.te.max(y, x), tvm.te.max(z, x)), tvm.te.max(tvm.te.min(y, z), x)) + + ck.verify(tvm.te.min(y + x, z + x), tvm.te.min(y, z) + x) + ck.verify(tvm.te.min(y + x, x + z), tvm.te.min(y, z) + x) + ck.verify(tvm.te.min(x + y, z + x), tvm.te.min(y, z) + x) + ck.verify(tvm.te.min(x + y, x + z), tvm.te.min(y, z) + x) + + ck.verify(tvm.te.min(x - y, x - z), x - tvm.te.max(y, z)) + ck.verify(tvm.te.min(y - x, z - x), tvm.te.min(y, z) - x) + + ck.verify(tvm.te.min(tvm.te.min(x, 1), 10), tvm.te.min(x, 1)) + ck.verify(tvm.te.min(tvm.te.min(x, 11), 10), tvm.te.min(x, 10)) + + ck.verify(tvm.te.min(x * 3, 9), tvm.te.min(x, 3) * 3) + ck.verify(tvm.te.min(3 - x, 2), 3 - tvm.te.max(x, 1)) # DivMod rules # truc div ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 1000)) - ck.verify(tvm.min(tdiv(x + 3, 4) * 4, x), x) - ck.verify(tvm.min(tdiv(x + 3, 4) * 4, tvm.max(x, 4)), tvm.max(x, 4)) - ck.verify(tvm.min(x, tdiv(x + 3, 4) * 4), x) - ck.verify(tvm.min(tvm.max(x, 4), tdiv(x + 3, 4) * 4), tvm.max(x, 4)) + ck.verify(tvm.te.min(tdiv(x + 3, 4) * 4, x), x) + ck.verify(tvm.te.min(tdiv(x + 3, 4) * 4, tvm.te.max(x, 4)), tvm.te.max(x, 4)) + ck.verify(tvm.te.min(x, tdiv(x + 3, 4) * 4), x) + ck.verify(tvm.te.min(tvm.te.max(x, 4), tdiv(x + 3, 4) * 4), tvm.te.max(x, 4)) ck.analyzer.update(x, tvm.arith.ConstIntBound(-1000, 1000), True) - ck.verify(tvm.min(tdiv(x, 10), tdiv(y, 10)), tdiv(tvm.min(x, y), 10)) - ck.verify(tvm.min(tdiv(x, (-10)), tdiv(y, (-10))), - tdiv(tvm.max(x, y), (-10))) + ck.verify(tvm.te.min(tdiv(x, 10), tdiv(y, 10)), tdiv(tvm.te.min(x, y), 10)) + ck.verify(tvm.te.min(tdiv(x, (-10)), tdiv(y, (-10))), + tdiv(tvm.te.max(x, y), (-10))) # floor div ck.analyzer.update(x, tvm.arith.ConstIntBound(-1000, 1000), True) - ck.verify(tvm.min(fld(x + 3, 4) * 4, x), x) - ck.verify(tvm.min(fld(x + 3, 4) * 4, tvm.max(x, 4)), tvm.max(x, 4)) - ck.verify(tvm.min(x, fld(x + 3, 4) * 4), x) - ck.verify(tvm.min(x, fld(x, 4) * 4), fld(x, 4) * 4) - ck.verify(tvm.min(tvm.max(x, 4), fld(x + 3, 4) * 4), tvm.max(x, 4)) - ck.verify(tvm.min(fld(x, 10), fld(y, 10)), fld(tvm.min(x, y), 10)) - ck.verify(tvm.min(fld(x, (-10)), fld(y, (-10))), fld(tvm.max(x, y), (-10))) + ck.verify(tvm.te.min(fld(x + 3, 4) * 4, x), x) + ck.verify(tvm.te.min(fld(x + 3, 4) * 4, tvm.te.max(x, 4)), tvm.te.max(x, 4)) + ck.verify(tvm.te.min(x, fld(x + 3, 4) * 4), x) + ck.verify(tvm.te.min(x, fld(x, 4) * 4), fld(x, 4) * 4) + ck.verify(tvm.te.min(tvm.te.max(x, 4), fld(x + 3, 4) * 4), tvm.te.max(x, 4)) + ck.verify(tvm.te.min(fld(x, 10), fld(y, 10)), fld(tvm.te.min(x, y), 10)) + ck.verify(tvm.te.min(fld(x, (-10)), fld(y, (-10))), fld(tvm.te.max(x, y), (-10))) def test_max_index_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") - flm = tvm.floormod - fld = tvm.floordiv - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x, y, z = te.var("x"), te.var("y"), te.var("z") + flm = tvm.te.floormod + fld = tvm.te.floordiv + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # const int bound - ck.verify(tvm.max(tmod(x, 2), tmod(y, 2) + 10), tmod(y, 2) + 10) - ck.verify(tvm.max(flm(x, 2), flm(y, 2) + 10), flm(y, 2) + 10) - - ck.verify(tvm.max(x + 1, x + 10), x + 10) - ck.verify(tvm.max(x + 111, x + 10), x + 111) - ck.verify(tvm.max(x + 1, x), x + 1) - ck.verify(tvm.max(x, x + 2), x + 2) - ck.verify(tvm.max(1 - x, 2 - x), 2 - x) - ck.verify(tvm.max(3 - x, 2 - x), 3 - x) - - ck.verify(tvm.max(tvm.min(x, y), tvm.max(x, y)), tvm.max(x, y)) - ck.verify(tvm.max(tvm.min(x, y), tvm.max(y, x)), tvm.max(x, y)) - - ck.verify(tvm.max(tvm.min(x, y), x), x) - ck.verify(tvm.max(tvm.min(y, x), x), x) - ck.verify(tvm.max(tvm.max(x, y), x), tvm.max(x, y)) - ck.verify(tvm.max(tvm.max(x, y), y), tvm.max(x, y)) - - ck.verify(tvm.max(x, tvm.min(x, y)), x) - ck.verify(tvm.max(x, tvm.min(y, x)), x) - ck.verify(tvm.max(x, tvm.max(x, y)), tvm.max(x, y)) - ck.verify(tvm.max(y, tvm.max(x, y)), tvm.max(x, y)) - - ck.verify(tvm.max(tvm.max(tvm.max(x, y), z), y), - tvm.max(tvm.max(x, y), z)) - ck.verify(tvm.max(tvm.max(tvm.max(tvm.max(x, y), z), x * 2), y), - tvm.max(tvm.max(tvm.max(x, y), z), x * 2)) - ck.verify(tvm.max(tvm.max(tvm.max(tvm.max(tvm.max(x, y), z), x * 2), z * 2), y), - tvm.max(tvm.max(tvm.max(tvm.max(x, y), z), x * 2), z * 2)) - - ck.verify(tvm.max(tvm.min(x, y), tvm.min(x, z)), tvm.min(tvm.max(y, z), x)) - ck.verify(tvm.max(tvm.min(x, y), tvm.min(z, x)), tvm.min(tvm.max(y, z), x)) - ck.verify(tvm.max(tvm.min(y, x), tvm.min(x, z)), tvm.min(tvm.max(y, z), x)) - ck.verify(tvm.max(tvm.min(y, x), tvm.min(z, x)), tvm.min(tvm.max(y, z), x)) - - ck.verify(tvm.max(y + x, z + x), tvm.max(y, z) + x) - ck.verify(tvm.max(y + x, x + z), tvm.max(y, z) + x) - ck.verify(tvm.max(x + y, z + x), tvm.max(y, z) + x) - ck.verify(tvm.max(x + y, x + z), tvm.max(y, z) + x) - - ck.verify(tvm.max(x - y, x - z), x - tvm.min(y, z)) - ck.verify(tvm.max(y - x, z - x), tvm.max(y, z) - x) - - ck.verify(tvm.max(tvm.max(x, 1), 10), tvm.max(x, 10)) - ck.verify(tvm.max(tvm.max(x, 11), 10), tvm.max(x, 11)) - - ck.verify(tvm.max(x * 3, 9), tvm.max(x, 3) * 3) - ck.verify(tvm.max(3 - x, 1), 3 - tvm.min(x, 2)) + ck.verify(tvm.te.max(tmod(x, 2), tmod(y, 2) + 10), tmod(y, 2) + 10) + ck.verify(tvm.te.max(flm(x, 2), flm(y, 2) + 10), flm(y, 2) + 10) + + ck.verify(tvm.te.max(x + 1, x + 10), x + 10) + ck.verify(tvm.te.max(x + 111, x + 10), x + 111) + ck.verify(tvm.te.max(x + 1, x), x + 1) + ck.verify(tvm.te.max(x, x + 2), x + 2) + ck.verify(tvm.te.max(1 - x, 2 - x), 2 - x) + ck.verify(tvm.te.max(3 - x, 2 - x), 3 - x) + + ck.verify(tvm.te.max(tvm.te.min(x, y), tvm.te.max(x, y)), tvm.te.max(x, y)) + ck.verify(tvm.te.max(tvm.te.min(x, y), tvm.te.max(y, x)), tvm.te.max(x, y)) + + ck.verify(tvm.te.max(tvm.te.min(x, y), x), x) + ck.verify(tvm.te.max(tvm.te.min(y, x), x), x) + ck.verify(tvm.te.max(tvm.te.max(x, y), x), tvm.te.max(x, y)) + ck.verify(tvm.te.max(tvm.te.max(x, y), y), tvm.te.max(x, y)) + + ck.verify(tvm.te.max(x, tvm.te.min(x, y)), x) + ck.verify(tvm.te.max(x, tvm.te.min(y, x)), x) + ck.verify(tvm.te.max(x, tvm.te.max(x, y)), tvm.te.max(x, y)) + ck.verify(tvm.te.max(y, tvm.te.max(x, y)), tvm.te.max(x, y)) + + ck.verify(tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), y), + tvm.te.max(tvm.te.max(x, y), z)) + ck.verify(tvm.te.max(tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), x * 2), y), + tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), x * 2)) + ck.verify(tvm.te.max(tvm.te.max(tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), x * 2), z * 2), y), + tvm.te.max(tvm.te.max(tvm.te.max(tvm.te.max(x, y), z), x * 2), z * 2)) + + ck.verify(tvm.te.max(tvm.te.min(x, y), tvm.te.min(x, z)), tvm.te.min(tvm.te.max(y, z), x)) + ck.verify(tvm.te.max(tvm.te.min(x, y), tvm.te.min(z, x)), tvm.te.min(tvm.te.max(y, z), x)) + ck.verify(tvm.te.max(tvm.te.min(y, x), tvm.te.min(x, z)), tvm.te.min(tvm.te.max(y, z), x)) + ck.verify(tvm.te.max(tvm.te.min(y, x), tvm.te.min(z, x)), tvm.te.min(tvm.te.max(y, z), x)) + + ck.verify(tvm.te.max(y + x, z + x), tvm.te.max(y, z) + x) + ck.verify(tvm.te.max(y + x, x + z), tvm.te.max(y, z) + x) + ck.verify(tvm.te.max(x + y, z + x), tvm.te.max(y, z) + x) + ck.verify(tvm.te.max(x + y, x + z), tvm.te.max(y, z) + x) + + ck.verify(tvm.te.max(x - y, x - z), x - tvm.te.min(y, z)) + ck.verify(tvm.te.max(y - x, z - x), tvm.te.max(y, z) - x) + + ck.verify(tvm.te.max(tvm.te.max(x, 1), 10), tvm.te.max(x, 10)) + ck.verify(tvm.te.max(tvm.te.max(x, 11), 10), tvm.te.max(x, 11)) + + ck.verify(tvm.te.max(x * 3, 9), tvm.te.max(x, 3) * 3) + ck.verify(tvm.te.max(3 - x, 1), 3 - tvm.te.min(x, 2)) # DivMod rules # truc div - ck.verify(tvm.max(tdiv(x, 10), tdiv(y, 10)), tdiv(tvm.max(x, y), 10)) - ck.verify(tvm.max(tdiv(x, (-10)), tdiv(y, (-10))), tdiv(tvm.min(x, y), (-10))) - ck.verify(tvm.max(tdiv(x + 3, 4) * 4, x), tdiv(x + 3, 4) * 4) + ck.verify(tvm.te.max(tdiv(x, 10), tdiv(y, 10)), tdiv(tvm.te.max(x, y), 10)) + ck.verify(tvm.te.max(tdiv(x, (-10)), tdiv(y, (-10))), tdiv(tvm.te.min(x, y), (-10))) + ck.verify(tvm.te.max(tdiv(x + 3, 4) * 4, x), tdiv(x + 3, 4) * 4) # floordiv - ck.verify(tvm.max(fld(x, 10), fld(y, 10)), fld(tvm.max(x, y), 10)) - ck.verify(tvm.max(fld(x, (-10)), fld(y, (-10))), fld(tvm.min(x, y), (-10))) - ck.verify(tvm.max(fld(x + 3, 4) * 4, x), fld(x + 3, 4) * 4) - ck.verify(tvm.max(fld(x, 4) * 4, x), x) - ck.verify(tvm.max(x, fld(x, 4) * 4), x) + ck.verify(tvm.te.max(fld(x, 10), fld(y, 10)), fld(tvm.te.max(x, y), 10)) + ck.verify(tvm.te.max(fld(x, (-10)), fld(y, (-10))), fld(tvm.te.min(x, y), (-10))) + ck.verify(tvm.te.max(fld(x + 3, 4) * 4, x), fld(x + 3, 4) * 4) + ck.verify(tvm.te.max(fld(x, 4) * 4, x), x) + ck.verify(tvm.te.max(x, fld(x, 4) * 4), x) def test_cmp_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") - flm = tvm.floormod - fld = tvm.floordiv - tdiv = tvm.truncdiv - tmod = tvm.truncmod + x, y, z = te.var("x"), te.var("y"), te.var("z") + flm = tvm.te.floormod + fld = tvm.te.floordiv + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod # const int bound - ck.verify((tmod(x, 2) + 10).equal(0), tvm.const(0, "bool")) - ck.verify(tvm.tir.NE(tmod(x, 2) + 10, 0), tvm.const(1, "bool")) - ck.verify(tmod(x, 2) + 10 > 1, tvm.const(1, "bool")) - ck.verify(tmod(x, 2) + 10 <= 1, tvm.const(0, "bool")) - ck.verify(flm(x, 2) + 2 > 1, tvm.const(1, "bool")) - ck.verify(flm(x, 2) + 10 <= 1, tvm.const(0, "bool")) + ck.verify((tmod(x, 2) + 10).equal(0), tvm.tir.const(0, "bool")) + ck.verify(tvm.tir.NE(tmod(x, 2) + 10, 0), tvm.tir.const(1, "bool")) + ck.verify(tmod(x, 2) + 10 > 1, tvm.tir.const(1, "bool")) + ck.verify(tmod(x, 2) + 10 <= 1, tvm.tir.const(0, "bool")) + ck.verify(flm(x, 2) + 2 > 1, tvm.tir.const(1, "bool")) + ck.verify(flm(x, 2) + 10 <= 1, tvm.tir.const(0, "bool")) - ck.verify(x * 3 + 10 == 0, tvm.const(0, "bool")) - ck.verify(x * 3 + 10 != 0, tvm.const(1, "bool")) + ck.verify(x * 3 + 10 == 0, tvm.tir.const(0, "bool")) + ck.verify(x * 3 + 10 != 0, tvm.tir.const(1, "bool")) # canonicalization ck.verify((x - 10).equal(0), x.equal(10)) @@ -750,88 +751,88 @@ def test_cmp_simplify(): ck.verify(fld(x + 2, 4) * 4 >= x - y, tvm.tir.LE(flm(x + 2, 4) + (-2), y)) # End DivMod Rules - ck.verify(tvm.min(x, 11) < 10, x < 10) - ck.verify(tvm.min(x, 8) < 10, tvm.const(1, "bool")) - ck.verify(tvm.max(8, x) > 10, tvm.tir.LT(10, x)) - ck.verify(x + 1 < tvm.max(8, x), x < 7) + ck.verify(tvm.te.min(x, 11) < 10, x < 10) + ck.verify(tvm.te.min(x, 8) < 10, tvm.tir.const(1, "bool")) + ck.verify(tvm.te.max(8, x) > 10, tvm.tir.LT(10, x)) + ck.verify(x + 1 < tvm.te.max(8, x), x < 7) ck.analyzer.update(x, tvm.arith.ConstIntBound(0, 10), override=True) ck.analyzer.update(y, tvm.arith.ConstIntBound(-10, 0), override=True) ck.analyzer.update(z, tvm.arith.ConstIntBound(-5, 5), override=True) - ck.verify(x < 11, tvm.const(1, "bool")) - ck.verify(x <= 10, tvm.const(1, "bool")) - ck.verify(z <= 5, tvm.const(1, "bool")) - ck.verify(x + y <= 10, tvm.const(1, "bool")) - ck.verify(x + y >= -10, tvm.const(1, "bool")) - ck.verify(z - 5 <= y + 10, tvm.const(1, "bool")) - ck.verify(tvm.all(x > -1, z <= x + 5), tvm.const(1, "bool")) - ck.verify(x*y <= 0, tvm.const(1, "bool")) - ck.verify((x + 1)*(y - 1) < 0, tvm.const(1, "bool")) - ck.verify(y*y >= 0, tvm.const(1, "bool")) - ck.verify(x*6 <= -3, tvm.const(0, "bool")) + ck.verify(x < 11, tvm.tir.const(1, "bool")) + ck.verify(x <= 10, tvm.tir.const(1, "bool")) + ck.verify(z <= 5, tvm.tir.const(1, "bool")) + ck.verify(x + y <= 10, tvm.tir.const(1, "bool")) + ck.verify(x + y >= -10, tvm.tir.const(1, "bool")) + ck.verify(z - 5 <= y + 10, tvm.tir.const(1, "bool")) + ck.verify(tvm.tir.all(x > -1, z <= x + 5), tvm.tir.const(1, "bool")) + ck.verify(x*y <= 0, tvm.tir.const(1, "bool")) + ck.verify((x + 1)*(y - 1) < 0, tvm.tir.const(1, "bool")) + ck.verify(y*y >= 0, tvm.tir.const(1, "bool")) + ck.verify(x*6 <= -3, tvm.tir.const(0, "bool")) ck.verify(tmod(y - 1, 3) == 0, tmod(y + (-1), 3) == 0) def test_logical_simplify(): ck = RewriteChecker() - x, y, z = tvm.var("x"), tvm.var("y"), tvm.var("z") + x, y, z = te.var("x"), te.var("y"), te.var("z") ck.verify(tvm.tir.And(tvm.tir.EQ(x, y), tvm.tir.NE(x, y)), - tvm.const(False, "bool")) + tvm.tir.const(False, "bool")) ck.verify(tvm.tir.And(tvm.tir.NE(x, y), tvm.tir.EQ(x, y)), - tvm.const(False, "bool")) - ck.verify(tvm.tir.And(x > 1, tvm.tir.Not(x > 1)), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(x <= y, y < x), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(y < x, x <= y), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(x < 1, 0 < x), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(x < 0, 1 < x), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(x < 1, 1 <= x), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(x <= 1, 1 < x), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(1 <= x, x < 1), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(1 < x, x <= 1), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(x <= 1, 2 <= x), tvm.const(False, "bool")) - ck.verify(tvm.tir.And(2 <= x, x <= 1), tvm.const(False, "bool")) + tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(x > 1, tvm.tir.Not(x > 1)), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(x <= y, y < x), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(y < x, x <= y), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(x < 1, 0 < x), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(x < 0, 1 < x), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(x < 1, 1 <= x), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(x <= 1, 1 < x), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(1 <= x, x < 1), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(1 < x, x <= 1), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(x <= 1, 2 <= x), tvm.tir.const(False, "bool")) + ck.verify(tvm.tir.And(2 <= x, x <= 1), tvm.tir.const(False, "bool")) ck.verify(tvm.tir.And(x == 1, x != 2), x == 1) ck.verify(tvm.tir.Or(tvm.tir.EQ(x, y), tvm.tir.NE(x, y)), - tvm.const(True, "bool")) + tvm.tir.const(True, "bool")) ck.verify(tvm.tir.Or(tvm.tir.NE(x, y), tvm.tir.EQ(x, y)), - tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(x > y, tvm.tir.Not(x > y)), tvm.const(True, "bool")) + tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(x > y, tvm.tir.Not(x > y)), tvm.tir.const(True, "bool")) - ck.verify(tvm.tir.Or(x <= y, y < x), tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(y < x, y >= x), tvm.const(True, "bool")) + ck.verify(tvm.tir.Or(x <= y, y < x), tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(y < x, y >= x), tvm.tir.const(True, "bool")) - ck.verify(tvm.tir.Or(x < 1, 0 < x), tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(0 < x, x < 1), tvm.const(True, "bool")) + ck.verify(tvm.tir.Or(x < 1, 0 < x), tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(0 < x, x < 1), tvm.tir.const(True, "bool")) - ck.verify(tvm.tir.Or(x < 1, 1 <= x), tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(x <= 1, 1 < x), tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(1 <= x, x < 1), tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(1 < x, x <= 1), tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(x <= 1, 2 <= x), tvm.const(True, "bool")) - ck.verify(tvm.tir.Or(2 <= x, x <= 1), tvm.const(True, "bool")) + ck.verify(tvm.tir.Or(x < 1, 1 <= x), tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(x <= 1, 1 < x), tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(1 <= x, x < 1), tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(1 < x, x <= 1), tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(x <= 1, 2 <= x), tvm.tir.const(True, "bool")) + ck.verify(tvm.tir.Or(2 <= x, x <= 1), tvm.tir.const(True, "bool")) ck.verify(tvm.tir.Or(x != 1, x == 2), x != 1) def test_let_simplify(): ck = RewriteChecker() - x, y = tvm.var("x"), tvm.var("y") + x, y = te.var("x"), te.var("y") z = tvm.tir.Let(x, 1, x + 1) ck.verify(z + z, 4) def test_cast_simplify(): ck = RewriteChecker() - x = tvm.var("x") + x = te.var("x") dtypes = ["float32", "float16", "int32", "int8", "bool"] for dtype1 in dtypes: - ck.verify(tvm.tir.Cast(dtype1, x - x), tvm.const(0, dtype1)) - ck.verify(tvm.tir.Cast(dtype1, x == x), tvm.const(1, dtype1)) + ck.verify(tvm.tir.Cast(dtype1, x - x), tvm.tir.const(0, dtype1)) + ck.verify(tvm.tir.Cast(dtype1, x == x), tvm.tir.const(1, dtype1)) for dtype2 in dtypes: for i in [0, 1, 2, 3]: - ck.verify(tvm.tir.Cast(dtype1, tvm.const(i, dtype2)), tvm.const(i, dtype1)) + ck.verify(tvm.tir.Cast(dtype1, tvm.tir.const(i, dtype2)), tvm.tir.const(i, dtype1)) if __name__ == "__main__": test_floordiv_index_simplify() diff --git a/tests/python/unittest/test_arith_stmt_simplify.py b/tests/python/unittest/test_arith_stmt_simplify.py index 58b6083..45f0833 100644 --- a/tests/python/unittest/test_arith_stmt_simplify.py +++ b/tests/python/unittest/test_arith_stmt_simplify.py @@ -15,50 +15,51 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_stmt_simplify(): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() A = ib.pointer("float32", name="A") C = ib.pointer("float32", name="C") - n = tvm.size_var("n") + n = te.size_var("n") with ib.for_range(0, n, name="i") as i: with ib.if_scope(i < 12): A[i] = C[i] body = tvm.tir.LetStmt(n, 10, ib.get()) - body = tvm.ir_pass.CanonicalSimplify(body) + body = tvm.tir.ir_pass.CanonicalSimplify(body) assert isinstance(body.body, tvm.tir.Store) def test_thread_extent_simplify(): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() A = ib.pointer("float32", name="A") C = ib.pointer("float32", name="C") - n = tvm.size_var("n") - tx = tvm.thread_axis("threadIdx.x") - ty = tvm.thread_axis("threadIdx.y") + n = te.size_var("n") + tx = te.thread_axis("threadIdx.x") + ty = te.thread_axis("threadIdx.y") ib.scope_attr(tx, "thread_extent", n) ib.scope_attr(tx, "thread_extent", n) ib.scope_attr(ty, "thread_extent", 1) with ib.if_scope(tx + ty < 12): A[tx] = C[tx + ty] body = tvm.tir.LetStmt(n, 10, ib.get()) - body = tvm.ir_pass.CanonicalSimplify(body) + body = tvm.tir.ir_pass.CanonicalSimplify(body) assert isinstance(body.body.body.body, tvm.tir.Store) def test_basic_likely_elimination(): - n = tvm.size_var('n') - X = tvm.placeholder(shape=(n,), name="x") - W = tvm.placeholder(shape=(n + 1,), dtype="int32", name="w") + n = te.size_var('n') + X = te.placeholder(shape=(n,), name="x") + W = te.placeholder(shape=(n + 1,), dtype="int32", name="w") def f(i): start = W[i] extent = W[i+1] - W[i] - rv = tvm.reduce_axis((0, extent)) - return tvm.sum(X[rv + start], axis=rv) - Y = tvm.compute(X.shape, f, name="y") - s = tvm.create_schedule([Y.op]) + rv = te.reduce_axis((0, extent)) + return te.sum(X[rv + start], axis=rv) + Y = te.compute(X.shape, f, name="y") + s = te.create_schedule([Y.op]) stmt = tvm.lower(s, [X, W, Y], simple_mode=True) assert('if' not in str(stmt)) @@ -68,10 +69,10 @@ def test_complex_likely_elimination(): Y[i] = sum(X[:i]) """ (m, ) = X.shape - s_state = tvm.placeholder((m + 1, ), dtype="int32", name="state") - s_init = tvm.compute((1, ), lambda _: tvm.const(0, "int32")) - s_update = tvm.compute((m + 1, ), lambda l: s_state[l - 1] + X[l - 1]) - return tvm.scan(s_init, s_update, s_state, inputs=[X], name="cumsum") + s_state = te.placeholder((m + 1, ), dtype="int32", name="state") + s_init = te.compute((1, ), lambda _: tvm.tir.const(0, "int32")) + s_update = te.compute((m + 1, ), lambda l: s_state[l - 1] + X[l - 1]) + return tvm.te.scan(s_init, s_update, s_state, inputs=[X], name="cumsum") def sparse_lengths_sum(data, indices, lengths): oshape = list(data.shape) @@ -79,21 +80,21 @@ def test_complex_likely_elimination(): length_offsets = cumsum(lengths) def sls(n, d): - gg = tvm.reduce_axis((0, lengths[n])) + gg = te.reduce_axis((0, lengths[n])) indices_idx = length_offsets[n] + gg data_idx = indices[indices_idx] data_val = data[data_idx, d] - return tvm.sum(data_val, axis=gg) + return te.sum(data_val, axis=gg) - return tvm.compute(oshape, sls) + return te.compute(oshape, sls) - m, n, d, i, l = tvm.size_var('m'), tvm.size_var('n'), tvm.size_var('d'),\ - tvm.size_var('i'), tvm.size_var('l') - data_ph = tvm.placeholder((m, d * 32), name="data") - indices_ph = tvm.placeholder((i,), name="indices", dtype="int32") - lengths_ph = tvm.placeholder((n,), name="lengths", dtype="int32") + m, n, d, i, l = te.size_var('m'), te.size_var('n'), te.size_var('d'),\ + te.size_var('i'), te.size_var('l') + data_ph = te.placeholder((m, d * 32), name="data") + indices_ph = te.placeholder((i,), name="indices", dtype="int32") + lengths_ph = te.placeholder((n,), name="lengths", dtype="int32") Y = sparse_lengths_sum(data_ph, indices_ph, lengths_ph) - s = tvm.create_schedule([Y.op]) + s = te.create_schedule([Y.op]) (n, d) = s[Y].op.axis (do, di) = s[Y].split(d, factor=32) (gg,) = s[Y].op.reduce_axis diff --git a/tests/python/unittest/test_autotvm_common.py b/tests/python/unittest/test_autotvm_common.py index 83bbd54..a2f9b1d 100644 --- a/tests/python/unittest/test_autotvm_common.py +++ b/tests/python/unittest/test_autotvm_common.py @@ -20,6 +20,7 @@ import time import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.autotvm import MeasureInput, MeasureResult from tvm.autotvm.measure.measure import Runner @@ -38,12 +39,12 @@ class DummyRunner(Runner): @autotvm.register_customized_task("testing/matmul") def matmul(N, L, M, dtype): - A = tvm.placeholder((N, L), name='A', dtype=dtype) - B = tvm.placeholder((L, M), name='B', dtype=dtype) + A = te.placeholder((N, L), name='A', dtype=dtype) + B = te.placeholder((L, M), name='B', dtype=dtype) - k = tvm.reduce_axis((0, L), name='k') - C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C') - s = tvm.create_schedule(C.op) + k = te.reduce_axis((0, L), name='k') + C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C') + s = te.create_schedule(C.op) # schedule y, x = s[C].op.axis @@ -66,12 +67,12 @@ def matmul(N, L, M, dtype): @autotvm.register_customized_task("testing/bad_matmul") def bad_matmul(N, L, M, dtype): if 'bad_device' in tvm.target.Target.current().keys: - A = tvm.placeholder((N, L), name='A', dtype=dtype) - B = tvm.placeholder((L, M), name='B', dtype=dtype) + A = te.placeholder((N, L), name='A', dtype=dtype) + B = te.placeholder((L, M), name='B', dtype=dtype) - k = tvm.reduce_axis((0, L-1), name='k') - C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C') - s = tvm.create_schedule(C.op) + k = te.reduce_axis((0, L-1), name='k') + C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C') + s = te.create_schedule(C.op) # schedule y, x = s[C].op.axis diff --git a/tests/python/unittest/test_autotvm_feature.py b/tests/python/unittest/test_autotvm_feature.py index e0736c2..59ad464 100644 --- a/tests/python/unittest/test_autotvm_feature.py +++ b/tests/python/unittest/test_autotvm_feature.py @@ -19,20 +19,21 @@ import numpy as np import tvm +from tvm import te from tvm.autotvm import feature def test_iter_feature_gemm(): N = 128 - k = tvm.reduce_axis((0, N), 'k') - A = tvm.placeholder((N, N), name='A') - B = tvm.placeholder((N, N), name='B') - C = tvm.compute( + k = te.reduce_axis((0, N), 'k') + A = te.placeholder((N, N), name='A') + B = te.placeholder((N, N), name='B') + C = te.compute( A.shape, - lambda y, x: tvm.sum(A[y, k] * B[k, x], axis=k), + lambda y, x: te.sum(A[y, k] * B[k, x], axis=k), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) feas = feature.get_itervar_feature(s, [A, B, C], take_log=False) @@ -64,15 +65,15 @@ def test_iter_feature_gemm(): def test_curve_feature_gemm(): N = 128 - k = tvm.reduce_axis((0, N), 'k') - A = tvm.placeholder((N, N), name='A') - B = tvm.placeholder((N, N), name='B') - C = tvm.compute( + k = te.reduce_axis((0, N), 'k') + A = te.placeholder((N, N), name='A') + B = te.placeholder((N, N), name='B') + C = te.compute( A.shape, - lambda y, x: tvm.sum(A[y, k] * B[k, x], axis=k), + lambda y, x: te.sum(A[y, k] * B[k, x], axis=k), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) feas = feature.get_buffer_curve_sample_flatten(s, [A, B, C], sample_n=30) # sample_n * #buffers * #curves * 2 numbers per curve @@ -85,13 +86,13 @@ def test_feature_shape(): n_sample = 100 def get_gemm_feature(target): - k = tvm.reduce_axis((0, N), 'k') - A = tvm.placeholder((N, N), name='A') - B = tvm.placeholder((N, N), name='B') - C = tvm.compute(A.shape, lambda y, x: tvm.sum(A[y, k] * B[k, x], axis=k), + k = te.reduce_axis((0, N), 'k') + A = te.placeholder((N, N), name='A') + B = te.placeholder((N, N), name='B') + C = te.compute(A.shape, lambda y, x: te.sum(A[y, k] * B[k, x], axis=k), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) y, x = s[C].op.axis axes = list(s[C].tile(y, x, 8, 8)) + [k] @@ -105,9 +106,9 @@ def test_feature_shape(): for i in range(len(perm)): if perm[i] != 4: pick.append(axes[i]) - s[C].bind(pick[0], tvm.thread_axis("blockIdx.x")) - s[C].bind(pick[1], tvm.thread_axis("vthread")) - s[C].bind(pick[2], tvm.thread_axis("threadIdx.y")) + s[C].bind(pick[0], te.thread_axis("blockIdx.x")) + s[C].bind(pick[1], te.thread_axis("vthread")) + s[C].bind(pick[2], te.thread_axis("threadIdx.y")) with target: feas = feature.get_itervar_feature(s, [A, B, C]) diff --git a/tests/python/unittest/test_autotvm_flop_calculator.py b/tests/python/unittest/test_autotvm_flop_calculator.py index 5cafd02..e06010b 100644 --- a/tests/python/unittest/test_autotvm_flop_calculator.py +++ b/tests/python/unittest/test_autotvm_flop_calculator.py @@ -17,6 +17,7 @@ """Test flop calculation""" import tvm +from tvm import te import numpy as np from tvm.autotvm.task.task import compute_flop @@ -30,24 +31,24 @@ def test_conv(): for i in range(5): N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)] (input_dtype, acc_dtype) = random_dtypes() - D = tvm.placeholder((N, CI, H, W), dtype=input_dtype) - K = tvm.placeholder((CO, CI, KH, KW), dtype=input_dtype) + D = te.placeholder((N, CI, H, W), dtype=input_dtype) + K = te.placeholder((CO, CI, KH, KW), dtype=input_dtype) KH = min(H, KH) KW = min(W, KW) - ci = tvm.reduce_axis((0, CI)) - kh = tvm.reduce_axis((0, KH)) - kw = tvm.reduce_axis((0, KW)) + ci = te.reduce_axis((0, CI)) + kh = te.reduce_axis((0, KH)) + kw = te.reduce_axis((0, KW)) OH = (H - KH) + 1 OW = (W - KW) + 1 - C = tvm.compute((N, CO, OH, OW), lambda n, co, h, w: - tvm.sum(D[n][ci][h][w].astype(acc_dtype) * K[co][ci][h][w].astype(acc_dtype), + C = te.compute((N, CO, OH, OW), lambda n, co, h, w: + te.sum(D[n][ci][h][w].astype(acc_dtype) * K[co][ci][h][w].astype(acc_dtype), axis=[ci, kh, kw])) - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) assert compute_flop(s) == 2 * N * CO * OH * OW * CI * KH * KW @@ -55,55 +56,55 @@ def test_pack_gemm(): for i in range(5): N, L, M = [np.random.randint(10, 128) * 4 for _ in range(3)] (input_dtype, acc_dtype) = random_dtypes() - A = tvm.placeholder((N, L), dtype=input_dtype) - B = tvm.placeholder((M, L), dtype=input_dtype) - k = tvm.reduce_axis((0, L)) + A = te.placeholder((N, L), dtype=input_dtype) + B = te.placeholder((M, L), dtype=input_dtype) + k = te.reduce_axis((0, L)) bn = 4 - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod - A_pack = tvm.compute((N // bn, L, bn), lambda i, j, k: A[i * bn + k][j]) - B_pack = tvm.compute((M // bn, L, bn), lambda i, j, k: B[i * bn + k][j]) - C_pack = tvm.compute((N // bn, M // bn, bn, bn), lambda i, j, ii, jj: - tvm.sum(A_pack[i, k, ii].astype(acc_dtype) * B_pack[j, k, jj].astype(acc_dtype), axis=[k])) - C = tvm.compute((N, M), lambda i, j: C_pack[idxd(i, bn)][idxd(j, bn)][idxm(i, bn)][idxm(j, bn)]) + A_pack = te.compute((N // bn, L, bn), lambda i, j, k: A[i * bn + k][j]) + B_pack = te.compute((M // bn, L, bn), lambda i, j, k: B[i * bn + k][j]) + C_pack = te.compute((N // bn, M // bn, bn, bn), lambda i, j, ii, jj: + te.sum(A_pack[i, k, ii].astype(acc_dtype) * B_pack[j, k, jj].astype(acc_dtype), axis=[k])) + C = te.compute((N, M), lambda i, j: C_pack[idxd(i, bn)][idxd(j, bn)][idxm(i, bn)][idxm(j, bn)]) - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) assert compute_flop(s) == 2 * N * L * M def test_outer_dot(): for i in range(5): N, M = [np.random.randint(10, 128) * 4 for _ in range(2)] (input_dtype, acc_dtype) = random_dtypes() - A = tvm.placeholder((N,), dtype=input_dtype) - B = tvm.placeholder((M,), dtype=input_dtype) + A = te.placeholder((N,), dtype=input_dtype) + B = te.placeholder((M,), dtype=input_dtype) - C = tvm.compute((N, M), lambda i, j: A[i].astype(acc_dtype) * B[j].astype(acc_dtype)) + C = te.compute((N, M), lambda i, j: A[i].astype(acc_dtype) * B[j].astype(acc_dtype)) - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) assert compute_flop(s) == N * M def test_max_pool(): for i in range(5): N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)] (input_dtype, _) = random_dtypes() - D = tvm.placeholder((N, CI, H, W), dtype=input_dtype) + D = te.placeholder((N, CI, H, W), dtype=input_dtype) KH = min(H, KH) KW = min(W, KW) - kh = tvm.reduce_axis((0, KH)) - kw = tvm.reduce_axis((0, KW)) + kh = te.reduce_axis((0, KH)) + kw = te.reduce_axis((0, KW)) OH = (H - KH) + 1 OW = (W - KW) + 1 - C = tvm.compute( + C = te.compute( (N, CO, OH, OW), - lambda n, co, h, w: tvm.max(D[n][co][h + kh][w + kw], axis=[kh, kw])) + lambda n, co, h, w: tvm.te.max(D[n][co][h + kh][w + kw], axis=[kh, kw])) - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) assert compute_flop(s) == N * CO * OH * OW * KH * KW @@ -111,24 +112,24 @@ def test_average_pool(): for i in range(5): N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)] (input_dtype, acc_dtype) = random_dtypes() - D = tvm.placeholder((N, CI, H, W), dtype=input_dtype) + D = te.placeholder((N, CI, H, W), dtype=input_dtype) KH = min(H, KH) KW = min(W, KW) - kh = tvm.reduce_axis((0, KH)) - kw = tvm.reduce_axis((0, KW)) + kh = te.reduce_axis((0, KH)) + kw = te.reduce_axis((0, KW)) OH = (H - KH) + 1 OW = (W - KW) + 1 - C = tvm.compute( + C = te.compute( (N, CO, OH, OW), - lambda n, co, h, w: tvm.sum( - tvm.div(D[n][co][h + kh][w + kw].astype(acc_dtype), (KW * KH)), axis=[kh, kw])) + lambda n, co, h, w: te.sum( + te.div(D[n][co][h + kh][w + kw].astype(acc_dtype), (KW * KH)), axis=[kh, kw])) - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) assert compute_flop(s) == 2 * N * CO * OH * OW * KH * KW @@ -136,9 +137,9 @@ def test_move(): """No float number operation in simple move. So the estimator should raise an error """ N = 1024 - A = tvm.placeholder((N,)) - C = tvm.compute((N,), lambda i: A[i]) - s = tvm.create_schedule([C.op]) + A = te.placeholder((N,)) + C = te.compute((N,), lambda i: A[i]) + s = te.create_schedule([C.op]) try: compute_flop(s) diff --git a/tests/python/unittest/test_autotvm_measure.py b/tests/python/unittest/test_autotvm_measure.py index 0899f6f..f96d333 100644 --- a/tests/python/unittest/test_autotvm_measure.py +++ b/tests/python/unittest/test_autotvm_measure.py @@ -21,6 +21,7 @@ import time import numpy as np import tvm +from tvm import te from test_autotvm_common import DummyRunner, bad_matmul, get_sample_task from tvm import autotvm from tvm.autotvm.measure.measure import MeasureErrorNo, MeasureResult diff --git a/tests/python/unittest/test_autotvm_record.py b/tests/python/unittest/test_autotvm_record.py index 0839ad9..bcc9a93 100644 --- a/tests/python/unittest/test_autotvm_record.py +++ b/tests/python/unittest/test_autotvm_record.py @@ -18,6 +18,7 @@ import time import tvm +from tvm import te from tvm.contrib import util from tvm import autotvm diff --git a/tests/python/unittest/test_autotvm_space.py b/tests/python/unittest/test_autotvm_space.py index 95f3201..2694c49 100644 --- a/tests/python/unittest/test_autotvm_space.py +++ b/tests/python/unittest/test_autotvm_space.py @@ -17,16 +17,17 @@ """Test space definition primitives""" import tvm +from tvm import te from tvm.autotvm.task.space import ConfigSpace, FallbackConfigEntity def gemm_func(cfg, N): - A = tvm.placeholder((N, N), name='A') - B = tvm.placeholder((N, N), name='B') + A = te.placeholder((N, N), name='A') + B = te.placeholder((N, N), name='B') - k = tvm.reduce_axis((0, N), name='k') - C = tvm.compute((N, N), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=[k]), name='C') + k = te.reduce_axis((0, N), name='k') + C = te.compute((N, N), lambda i, j: te.sum(A[i, k] * B[k, j], axis=[k]), name='C') - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) y, x = s[C].op.axis diff --git a/tests/python/unittest/test_autotvm_xgboost_model.py b/tests/python/unittest/test_autotvm_xgboost_model.py index 24677c5..214a600 100644 --- a/tests/python/unittest/test_autotvm_xgboost_model.py +++ b/tests/python/unittest/test_autotvm_xgboost_model.py @@ -19,6 +19,7 @@ import time import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.autotvm import MeasureInput, MeasureResult from tvm.autotvm.tuner.xgboost_cost_model import XGBoostCostModel diff --git a/tests/python/unittest/test_build_lower.py b/tests/python/unittest/test_build_lower.py index 58312dc..736030b 100644 --- a/tests/python/unittest/test_build_lower.py +++ b/tests/python/unittest/test_build_lower.py @@ -15,28 +15,29 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_lower_rfactor(): - n = tvm.size_var("n") - m = tvm.size_var("m") - A = tvm.placeholder((n, m), name='A') - k = tvm.reduce_axis((0, m), "k") - B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="B") - s = tvm.create_schedule(B.op) + n = te.size_var("n") + m = te.size_var("m") + A = te.placeholder((n, m), name='A') + k = te.reduce_axis((0, m), "k") + B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B") + s = te.create_schedule(B.op) ko, ki = s[B].split(B.op.reduce_axis[0], factor=16) BF = s.rfactor(B, ki) xo, xi = s[B].split(s[B].op.axis[0], factor=32) - s[B.op].bind(xo, tvm.thread_axis("blockIdx.x")) - s[B.op].bind(xi, tvm.thread_axis("threadIdx.y")) - s[B].bind(s[B].op.reduce_axis[0], tvm.thread_axis("threadIdx.x")) + s[B.op].bind(xo, te.thread_axis("blockIdx.x")) + s[B.op].bind(xi, te.thread_axis("threadIdx.y")) + s[B].bind(s[B].op.reduce_axis[0], te.thread_axis("threadIdx.x")) s[BF].compute_at(s[B], s[B].op.reduce_axis[0]) fapi = tvm.lower(s, [A, B]) def test_dependent_output_shape(): - n, m, x = tvm.size_var('n'), tvm.size_var('m'), tvm.size_var('x') - A = tvm.placeholder((n, m)) - B = tvm.compute((m, n//x), lambda i, j: A[i,j] , name='B') - s = tvm.create_schedule(B.op) + n, m, x = te.size_var('n'), te.size_var('m'), te.size_var('x') + A = te.placeholder((n, m)) + B = te.compute((m, n//x), lambda i, j: A[i,j] , name='B') + s = te.create_schedule(B.op) mod = tvm.build(s, [A, B, x]) if __name__ == "__main__": diff --git a/tests/python/unittest/test_codegen_arm.py b/tests/python/unittest/test_codegen_arm.py index 8e2ad7a..65d82b0 100644 --- a/tests/python/unittest/test_codegen_arm.py +++ b/tests/python/unittest/test_codegen_arm.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import re import os import ctypes @@ -23,10 +24,10 @@ def test_popcount(): target = 'llvm -target=armv7l-none-linux-gnueabihf -mcpu=cortex-a53 -mattr=+neon' def check_correct_assembly(type, elements, counts): - n = tvm.convert(elements) - A = tvm.placeholder(n, dtype=type, name='A') - B = tvm.compute(A.shape, lambda i: tvm.popcount(A[i]), name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(elements) + A = te.placeholder(n, dtype=type, name='A') + B = te.compute(A.shape, lambda i: tvm.tir.popcount(A[i]), name='B') + s = te.create_schedule(B.op) s[B].vectorize(s[B].op.axis[0]) f = tvm.build(s, [A, B], target) @@ -47,13 +48,13 @@ def test_vmlal_s16(): target = 'llvm -target=armv7l-none-linux-gnueabihf -mcpu=cortex-a53 -mattr=+neon' def check_correct_assembly(N): - K = tvm.size_var("K") - A = tvm.placeholder((K, N), dtype="int8", name='A') - B = tvm.placeholder((K, N), dtype="int8", name='B') - k = tvm.reduce_axis((0, K)) - C = tvm.compute((N, ), lambda n: tvm.sum( + K = te.size_var("K") + A = te.placeholder((K, N), dtype="int8", name='A') + B = te.placeholder((K, N), dtype="int8", name='B') + k = te.reduce_axis((0, K)) + C = te.compute((N, ), lambda n: te.sum( A[k, n].astype("int32") * B[k, n].astype("int32"), axis=[k]), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) s[C].vectorize(s[C].op.axis[0]) f = tvm.build(s, [A, B, C], target) @@ -67,14 +68,14 @@ def test_vmlal_s16(): check_correct_assembly(64) def check_broadcast_correct_assembly(N): - K = tvm.size_var("K") - A = tvm.placeholder((K, N), dtype="int8", name='A') - B = tvm.placeholder((K,), dtype="int8", name='B') - k = tvm.reduce_axis((0, K)) - C = tvm.compute((N, ), lambda n: tvm.sum( + K = te.size_var("K") + A = te.placeholder((K, N), dtype="int8", name='A') + B = te.placeholder((K,), dtype="int8", name='B') + k = te.reduce_axis((0, K)) + C = te.compute((N, ), lambda n: te.sum( A[k, n].astype("int32") * B[k].astype("int32"), axis=[k]), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) s[C].vectorize(s[C].op.axis[0]) f = tvm.build(s, [A, B, C], target) diff --git a/tests/python/unittest/test_codegen_blob.py b/tests/python/unittest/test_codegen_blob.py index c14607d..62043e3 100644 --- a/tests/python/unittest/test_codegen_blob.py +++ b/tests/python/unittest/test_codegen_blob.py @@ -20,6 +20,7 @@ from tvm import relay from tvm.relay import testing from tvm.contrib import graph_runtime import tvm +from tvm import te import ctypes def test_resnet18(): @@ -74,13 +75,13 @@ def test_system_lib(): print("skip because %s is not enabled..." % device) return nn = 12 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) bx, tx = s[B].split(B.op.axis[0], factor=4) - s[B].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B].bind(tx, tvm.thread_axis("threadIdx.x")) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(tx, te.thread_axis("threadIdx.x")) from tvm.contrib import util temp = util.tempdir() diff --git a/tests/python/unittest/test_codegen_bool.py b/tests/python/unittest/test_codegen_bool.py index 33711cb..cdb343f 100644 --- a/tests/python/unittest/test_codegen_bool.py +++ b/tests/python/unittest/test_codegen_bool.py @@ -17,21 +17,22 @@ """codegen related to bool types""" import tvm +from tvm import te import numpy as np def test_cmp_load_store(): n = 32 - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) > B(*i), name='C') - D = tvm.compute(C.shape, lambda *i: tvm.all(C(*i), + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) > B(*i), name='C') + D = te.compute(C.shape, lambda *i: tvm.tir.all(C(*i), A(*i) > 1).astype('float32'), name="D") def check_llvm(): if not tvm.runtime.enabled("llvm"): return - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) xo, xi = s[C].split(C.op.axis[0], factor=4) xo1, xo2 = s[C].split(xo, factor=13) s[C].parallel(xo2) @@ -50,11 +51,11 @@ def test_cmp_load_store(): ctx = tvm.context(device, 0) if not ctx.exist: return - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) for stage in [C, D]: xo, xi = s[stage].split(stage.op.axis[0], factor=4) - s[stage].bind(xo, tvm.thread_axis("blockIdx.x")) - s[stage].bind(xi, tvm.thread_axis("threadIdx.x")) + s[stage].bind(xo, te.thread_axis("blockIdx.x")) + s[stage].bind(xi, te.thread_axis("threadIdx.x")) f = tvm.build(s, [A, B, D], device) a_np = np.random.uniform(size=n).astype(A.dtype) a = tvm.nd.array(a_np, ctx) diff --git a/tests/python/unittest/test_codegen_c_host.py b/tests/python/unittest/test_codegen_c_host.py index a126c07..1604ffb 100644 --- a/tests/python/unittest/test_codegen_c_host.py +++ b/tests/python/unittest/test_codegen_c_host.py @@ -15,16 +15,17 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm.contrib import util def test_add(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = te.create_schedule(C.op) def check_c(): mhost = tvm.build(s, [A, B, C], "c", name="fadd") @@ -47,14 +48,14 @@ def test_add(): def test_add_pipeline(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - AA = tvm.compute((n,), lambda *i: A(*i), name='A') - BB = tvm.compute((n,), lambda *i: B(*i), name='B') - T = tvm.compute(A.shape, lambda *i: AA(*i) + BB(*i), name='T') - C = tvm.compute(A.shape, lambda *i: T(*i), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + AA = te.compute((n,), lambda *i: A(*i), name='A') + BB = te.compute((n,), lambda *i: B(*i), name='B') + T = te.compute(A.shape, lambda *i: AA(*i) + BB(*i), name='T') + C = te.compute(A.shape, lambda *i: T(*i), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=4) xo1, xo2 = s[C].split(xo, factor=13) s[C].parallel(xo2) @@ -65,16 +66,16 @@ def test_add_pipeline(): def check_c(): # Specifically allow offset to test codepath when offset is available - Ab = tvm.decl_buffer( + Ab = tvm.tir.decl_buffer( A.shape, A.dtype, - elem_offset=tvm.size_var('Aoffset'), + elem_offset=te.size_var('Aoffset'), offset_factor=8, name='A') binds = {A : Ab} # BUILD and invoke the kernel. f1 = tvm.lower(s, [A,B,C], name="fadd_pipeline") - fsplits = [x for x in tvm.ir_pass.SplitHostDevice(f1)] - fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0]) + fsplits = [x for x in tvm.tir.ir_pass.SplitHostDevice(f1)] + fsplits[0] = tvm.tir.ir_pass.LowerTVMBuiltin(fsplits[0]) mhost = tvm.target.codegen.build_module(fsplits[0], "c") temp = util.tempdir() path_dso = temp.relpath("temp.so") @@ -91,16 +92,16 @@ def test_add_pipeline(): tvm.testing.assert_allclose( c.asnumpy(), a.asnumpy() + b.asnumpy()) - with tvm.build_config(offset_factor=4): + with tvm.target.build_config(offset_factor=4): check_c() def test_reinterpret(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A', dtype="int32") - B = tvm.compute(A.shape, lambda *i: tvm.call_pure_intrin("float32", "reinterpret", A(*i)), name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A', dtype="int32") + B = te.compute(A.shape, lambda *i: tvm.tir.call_pure_intrin("float32", "reinterpret", A(*i)), name='B') + s = te.create_schedule(B.op) def check_c(): mhost = tvm.build(s, [A, B], "c", name="reinterpret") diff --git a/tests/python/unittest/test_codegen_cross_llvm.py b/tests/python/unittest/test_codegen_cross_llvm.py index 1827ccf..cb3986e 100644 --- a/tests/python/unittest/test_codegen_cross_llvm.py +++ b/tests/python/unittest/test_codegen_cross_llvm.py @@ -16,6 +16,7 @@ # under the License. """Test cross compilation""" import tvm +from tvm import te import os import struct from tvm import rpc @@ -24,11 +25,11 @@ import numpy as np def test_llvm_add_pipeline(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=4) s[C].parallel(xo) s[C].vectorize(xi) diff --git a/tests/python/unittest/test_codegen_cuda.py b/tests/python/unittest/test_codegen_cuda.py index 8652817..f94d8c3 100644 --- a/tests/python/unittest/test_codegen_cuda.py +++ b/tests/python/unittest/test_codegen_cuda.py @@ -16,14 +16,15 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np import topi import unittest from tvm.contrib.nvcc import have_fp16, have_int8 from tvm.contrib import nvcc -tx = tvm.thread_axis("threadIdx.x") -bx = tvm.thread_axis("blockIdx.x") +tx = te.thread_axis("threadIdx.x") +bx = te.thread_axis("blockIdx.x") def test_cuda_vectorize_add(): num_thread = 8 @@ -37,9 +38,9 @@ def test_cuda_vectorize_add(): if dtype == "int8" and not have_int8(tvm.gpu(0).compute_version): print("skip because gpu does not support int8") return - A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) - B = tvm.compute((n,), lambda i: A[i] + tvm.const(1, A.dtype), name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) + B = te.compute((n,), lambda i: A[i] + tvm.tir.const(1, A.dtype), name='B') + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=num_thread) s[B].bind(xo, bx) s[B].bind(xi, tx) @@ -69,12 +70,12 @@ def test_cuda_multiply_add(): if dtype == "int8" and not have_int8(tvm.gpu(0).compute_version): print("skip because gpu does not support int8") return - A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) - B = tvm.placeholder((n,), name='B', dtype="%sx%d" % (dtype, lanes)) - C = tvm.placeholder((n,), name='C', dtype="int32") - D = tvm.compute((n,), - lambda i: tvm.call_pure_extern("int32", "__dp4a", A[i], B[i], C[i]), name='D') - s = tvm.create_schedule(D.op) + A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) + B = te.placeholder((n,), name='B', dtype="%sx%d" % (dtype, lanes)) + C = te.placeholder((n,), name='C', dtype="int32") + D = te.compute((n,), + lambda i: tvm.tir.call_pure_extern("int32", "__dp4a", A[i], B[i], C[i]), name='D') + s = te.create_schedule(D.op) xo, xi = s[D].split(D.op.axis[0], factor=num_thread) s[D].bind(xo, bx) s[D].bind(xi, tx) @@ -99,9 +100,9 @@ def test_cuda_vectorize_load(): print("skip because cuda is not enabled..") return ctx = tvm.gpu(0) - A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) - B = tvm.compute((n,), lambda i: A[i], name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) + B = te.compute((n,), lambda i: A[i], name='B') + s = te.create_schedule(B.op) block, thread = s[B].split(B.op.axis[0], factor=num_thread) s[B].bind(block, bx) s[B].bind(thread, tx) @@ -122,8 +123,8 @@ def test_cuda_make_int8x4(): lanes = 4 dtype = 'int8' ctx = tvm.gpu(0) - A = tvm.compute((n, lanes), lambda i,j: tvm.const(value, dtype=dtype)) - s = tvm.create_schedule(A.op) + A = te.compute((n, lanes), lambda i,j: tvm.tir.const(value, dtype=dtype)) + s = te.create_schedule(A.op) y, x = s[A].op.axis s[A].vectorize(x) s[A].bind(y, bx) @@ -140,10 +141,10 @@ def test_cuda_make_int8x4(): def test_cuda_inf_nan(): target = 'cuda' def check_inf_nan(ctx, n, value, dtype): - A = tvm.placeholder((n,), name='A', dtype=dtype) - inf_value = tvm.const(value, dtype=dtype) - C = tvm.compute((n,), lambda i: inf_value, name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n,), name='A', dtype=dtype) + inf_value = tvm.tir.const(value, dtype=dtype) + C = te.compute((n,), lambda i: inf_value, name='C') + s = te.create_schedule(C.op) s[C].bind(s[C].op.axis[0], tx) fun = tvm.build(s, [A, C], target) a = tvm.nd.empty((n,), A.dtype, ctx) @@ -170,36 +171,36 @@ def test_cuda_shuffle(): print("skip because cuda is not enabled..") return - idxm = tvm.indexmod - a = tvm.placeholder((64, ), 'int32') - b = tvm.placeholder((64, ), 'int32') - c = tvm.compute((64, ), lambda x: a[x] + b[x - idxm(x, 4) + (3 - idxm(x, 4))]) - sch = tvm.create_schedule(c.op) + idxm = tvm.tir.indexmod + a = te.placeholder((64, ), 'int32') + b = te.placeholder((64, ), 'int32') + c = te.compute((64, ), lambda x: a[x] + b[x - idxm(x, 4) + (3 - idxm(x, 4))]) + sch = te.create_schedule(c.op) x = c.op.axis[0] xo, xi = sch[c].split(x, 4) - thrx = tvm.thread_axis("threadIdx.x") + thrx = te.thread_axis("threadIdx.x") sch[c].bind(xo, thrx) sch[c].vectorize(xi) def my_vectorize(stmt): def vectorizer(op): if op.for_type == tvm.tir.For.Vectorized: - four = tvm.const(4, 'int32') - idx = tvm.tir.Ramp(thrx.var * four, tvm.const(1, 'int32'), 4) - all_ones = tvm.const(1, 'int32x4') + four = tvm.tir.const(4, 'int32') + idx = tvm.tir.Ramp(thrx.var * four, tvm.tir.const(1, 'int32'), 4) + all_ones = tvm.tir.const(1, 'int32x4') store = op.body value = store.value new_a = tvm.tir.Load('int32x4', value.a.buffer_var, idx, all_ones) bs, ids = [], [] for i in range(4): - bs.append(tvm.tir.Load('int32', value.b.buffer_var, thrx.var * four + tvm.const(i, 'int32'))) - ids.append(tvm.const(3 - i, 'int32')) + bs.append(tvm.tir.Load('int32', value.b.buffer_var, thrx.var * four + tvm.tir.const(i, 'int32'))) + ids.append(tvm.tir.const(3 - i, 'int32')) new_b = tvm.tir.Shuffle(bs, ids) return tvm.tir.Store(store.buffer_var, new_a + new_b, idx, all_ones) return None - return tvm.ir_pass.IRTransform(stmt, None, vectorizer, ['For']) + return tvm.tir.ir_pass.IRTransform(stmt, None, vectorizer, ['For']) - with tvm.build_config(add_lower_pass=[(1, my_vectorize)]): + with tvm.target.build_config(add_lower_pass=[(1, my_vectorize)]): module = tvm.build(sch, [a, b, c], target='cuda') a_ = np.array(list(range(64)), dtype='int32') b_ = np.array((list(range(4))[::-1]) * 16, dtype='int32') @@ -215,17 +216,17 @@ def test_cuda_reducition_binding(): print("skip because cuda is not enabled..") return - k = tvm.reduce_axis((0, 32), 'k') - A = tvm.placeholder((96, 32), name='A') - B = tvm.compute( (96,), lambda m: - tvm.sum(A[m, k], axis=k), + k = te.reduce_axis((0, 32), 'k') + A = te.placeholder((96, 32), name='A') + B = te.compute( (96,), lambda m: + te.sum(A[m, k], axis=k), name='B') - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) s[B].reorder(B.op.reduce_axis[0], B.op.axis[0]) mo, _ = s[B].split(B.op.axis[0], 32) - s[B].bind(mo, tvm.thread_axis("blockIdx.x")) + s[B].bind(mo, te.thread_axis("blockIdx.x")) fcuda = tvm.build(s, [A, B], "cuda") @@ -234,15 +235,15 @@ def test_rfactor_predicates(): print("skip because cuda is not enabled..") return - n = tvm.reduce_axis((0, 129), 'n') - A = tvm.placeholder((129,), name='A') - B = tvm.compute( (1, ), lambda b: - tvm.sum(A[n], + n = te.reduce_axis((0, 129), 'n') + A = te.placeholder((129,), name='A') + B = te.compute( (1, ), lambda b: + te.sum(A[n], axis=n), name='B' ) - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) _, ni = s[B].split(s[B].op.reduce_axis[0], factor=8) @@ -270,15 +271,15 @@ def test_cuda_const_float_to_half(): # otherwise it is found that the code gen is done by nvrtc. from tvm import autotvm shape = (2, 3, 4) - a = tvm.placeholder(shape, dtype='float16', name='a') - b = tvm.const(0.5, dtype='float16') - c = tvm.compute(shape, lambda i, j, k: a[i, j, k] > b, name='c') - s = tvm.create_schedule(c.op) + a = te.placeholder(shape, dtype='float16', name='a') + b = tvm.tir.const(0.5, dtype='float16') + c = te.compute(shape, lambda i, j, k: a[i, j, k] > b, name='c') + s = te.create_schedule(c.op) axes = [axis for axis in c.op.axis] fused = s[c].fuse(*axes) bx, tx = s[c].split(fused, factor=64) - s[c].bind(bx, tvm.thread_axis('blockIdx.x')) - s[c].bind(tx, tvm.thread_axis('threadIdx.x')) + s[c].bind(bx, te.thread_axis('blockIdx.x')) + s[c].bind(tx, te.thread_axis('threadIdx.x')) func = tvm.build(s, [a, c], 'cuda') ctx = tvm.gpu(0) @@ -298,8 +299,8 @@ def test_cuda_reduction(): print("Skip because gpu does not have fp16 support") return - a = tvm.placeholder((m, n), name="a", dtype=dtype) - b = tvm.placeholder((m, n), name="b", dtype=dtype) + a = te.placeholder((m, n), name="a", dtype=dtype) + b = te.placeholder((m, n), name="b", dtype=dtype) c = a + b d = a * b e = topi.elemwise_sum([c, d]) diff --git a/tests/python/unittest/test_codegen_device.py b/tests/python/unittest/test_codegen_device.py index 63ee030..88abca8 100644 --- a/tests/python/unittest/test_codegen_device.py +++ b/tests/python/unittest/test_codegen_device.py @@ -15,20 +15,21 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm.contrib import util import numpy as np def test_large_uint_imm(): value = (1 << 63) + 123 - other = tvm.const(3, "uint64") + other = tvm.tir.const(3, "uint64") n = 12 num_thread = 2 - A = tvm.compute((n,), lambda *i: tvm.const(value, "uint64") + other, name='A') - s = tvm.create_schedule(A.op) + A = te.compute((n,), lambda *i: tvm.tir.const(value, "uint64") + other, name='A') + s = te.create_schedule(A.op) xo, xi = s[A].split(A.op.axis[0], factor=num_thread) - s[A].bind(xi, tvm.thread_axis("threadIdx.x")) - s[A].bind(xo, tvm.thread_axis("blockIdx.x")) + s[A].bind(xi, te.thread_axis("threadIdx.x")) + s[A].bind(xo, te.thread_axis("blockIdx.x")) def check_target(device): ctx = tvm.context(device, 0) @@ -45,38 +46,38 @@ def test_large_uint_imm(): def test_add_pipeline(): - n = tvm.size_var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(), name='C') - D = tvm.compute(A.shape, lambda *i: C(*i) + 1, name='D') - s = tvm.create_schedule(D.op) + n = te.size_var('n') + A = te.placeholder((n,), name='A') + B = te.placeholder((), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(), name='C') + D = te.compute(A.shape, lambda *i: C(*i) + 1, name='D') + s = te.create_schedule(D.op) # GPU schedule have to split by gridIdx and threadIdx num_thread = 256 xo, xi = s[C].split(C.op.axis[0], factor=num_thread) - s[C].bind(xi, tvm.thread_axis("threadIdx.x")) - s[C].bind(xo, tvm.thread_axis("blockIdx.x")) + s[C].bind(xi, te.thread_axis("threadIdx.x")) + s[C].bind(xo, te.thread_axis("blockIdx.x")) xo, xi = s[D].split(D.op.axis[0], factor=num_thread) - s[D].bind(xi, tvm.thread_axis("threadIdx.x")) - s[D].bind(xo, tvm.thread_axis("blockIdx.x")) + s[D].bind(xi, te.thread_axis("threadIdx.x")) + s[D].bind(xo, te.thread_axis("blockIdx.x")) # compile to IR s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') - Db = tvm.decl_buffer(D.shape, D.dtype, name='D') - stmt = tvm.ir_pass.LoopPartition(stmt, False) - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, D:Db}, 64) - stmt = tvm.ir_pass.Simplify(stmt) - fapi = tvm.ir_pass.MakeAPI(stmt, "myadd", [Ab, Bb, Db], 0, True) - fsplits = [x for x in tvm.ir_pass.SplitHostDevice(fapi)] + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') + Db = tvm.tir.decl_buffer(D.shape, D.dtype, name='D') + stmt = tvm.tir.ir_pass.LoopPartition(stmt, False) + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, D:Db}, 64) + stmt = tvm.tir.ir_pass.Simplify(stmt) + fapi = tvm.tir.ir_pass.MakeAPI(stmt, "myadd", [Ab, Bb, Db], 0, True) + fsplits = [x for x in tvm.tir.ir_pass.SplitHostDevice(fapi)] # lower the floordiv(use stackvm rules so it works for all targets) - fsplits = [tvm.ir_pass.LowerIntrin(x, "stackvm") for x in fsplits] - fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0]) + fsplits = [tvm.tir.ir_pass.LowerIntrin(x, "stackvm") for x in fsplits] + fsplits[0] = tvm.tir.ir_pass.LowerTVMBuiltin(fsplits[0]) def check_target(device, host="stackvm"): ctx = tvm.context(device, 0) diff --git a/tests/python/unittest/test_codegen_extern.py b/tests/python/unittest/test_codegen_extern.py index 03efee5..4104af8 100644 --- a/tests/python/unittest/test_codegen_extern.py +++ b/tests/python/unittest/test_codegen_extern.py @@ -15,37 +15,38 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_add_pipeline(): nn = 64 max_threads = 4 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') def extern_generator(ins, outs): """Manually write the IR for the extern function, add pipeline""" - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() with ib.for_range(0, (n+1) // 2) as i: - ib.emit(outs[0].vstore(i*2, ins[0].vload(i*2, "float32x2") + tvm.const(1, "float32x2"))) + ib.emit(outs[0].vstore(i*2, ins[0].vload(i*2, "float32x2") + tvm.tir.const(1, "float32x2"))) return ib.get() def extern_generator_gpu(ins, outs): """Manually write the IR for the extern function, add pipeline""" - ib = tvm.ir_builder.create() - bx = tvm.thread_axis("blockIdx.x") - tx = tvm.thread_axis("threadIdx.x") + ib = tvm.tir.ir_builder.create() + bx = te.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") ib.scope_attr(bx, "thread_extent", (nn+max_threads-1) // max_threads) ib.scope_attr(tx, "thread_extent", max_threads) idx = bx.var * max_threads + tx.var with ib.if_scope(ib.likely(idx < n)): - ib.emit(outs[0].vstore(idx*2, ins[0].vload(idx*2, "float32x2") + tvm.const(1, "float32x2"))) + ib.emit(outs[0].vstore(idx*2, ins[0].vload(idx*2, "float32x2") + tvm.tir.const(1, "float32x2"))) return ib.get() - C_cpu = tvm.extern(A.shape, [A], extern_generator, name='C') - C_gpu = tvm.extern(A.shape, [A], extern_generator_gpu, name='C') - s_cpu = tvm.create_schedule(C_cpu.op) - s_gpu = tvm.create_schedule(C_gpu.op) + C_cpu = te.extern(A.shape, [A], extern_generator, name='C') + C_gpu = te.extern(A.shape, [A], extern_generator_gpu, name='C') + s_cpu = te.create_schedule(C_cpu.op) + s_gpu = te.create_schedule(C_gpu.op) print(tvm.lower(s_cpu, [A, C_cpu], simple_mode=True)) print(tvm.lower(s_gpu, [A, C_gpu], simple_mode=True)) @@ -70,14 +71,14 @@ def test_add_pipeline(): def test_pack_buffer_simple(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') def extern_generator(ins, outs): """Manually write the IR for the extern function, add pipeline.""" - return tvm.call_packed("my_extern_array_func1", ins[0], outs[0]) + return tvm.tir.call_packed("my_extern_array_func1", ins[0], outs[0]) - C = tvm.extern(A.shape, [A], extern_generator, name='C') - s = tvm.create_schedule(C.op) + C = te.extern(A.shape, [A], extern_generator, name='C') + s = te.create_schedule(C.op) @tvm.register_func def my_extern_array_func1(aa, bb): @@ -104,15 +105,15 @@ def test_pack_buffer_simple(): def test_pack_buffer_intermediate(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.compute((n,), lambda i: A[i] + 1, name="B") + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.compute((n,), lambda i: A[i] + 1, name="B") def extern_generator(ins, outs): """Manually write the IR for the extern function, add pipeline.""" - return tvm.call_packed("my_extern_array_func2", ins[0], outs[0]) + return tvm.tir.call_packed("my_extern_array_func2", ins[0], outs[0]) - C = tvm.extern(B.shape, [B], extern_generator, name='C') - s = tvm.create_schedule(C.op) + C = te.extern(B.shape, [B], extern_generator, name='C') + s = te.create_schedule(C.op) def check_target(target): if not tvm.runtime.enabled(target): diff --git a/tests/python/unittest/test_codegen_llvm.py b/tests/python/unittest/test_codegen_llvm.py index ca32293..45554c5 100644 --- a/tests/python/unittest/test_codegen_llvm.py +++ b/tests/python/unittest/test_codegen_llvm.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import topi from tvm.contrib import util, clang import numpy as np @@ -22,18 +23,18 @@ import ctypes import math def test_llvm_intrin(): - ib = tvm.ir_builder.create() - n = tvm.convert(4) + ib = tvm.tir.ir_builder.create() + n = tvm.runtime.convert(4) A = ib.pointer("float32", name="A") args = [ - tvm.call_pure_intrin("handle", "tvm_address_of", A[0]), + tvm.tir.call_pure_intrin("handle", "tvm_address_of", A[0]), 0, 3, 1 ] ib.emit(tvm.tir.Evaluate( tvm.tir.Call( "int32", "prefetch", args, tvm.tir.Call.Intrinsic, None, 0))) body = ib.get() - func = tvm.ir_pass.MakeAPI(body, "prefetch", [A], 0, True) + func = tvm.tir.ir_pass.MakeAPI(body, "prefetch", [A], 0, True) fcode = tvm.build(func, None, "llvm") @@ -45,9 +46,9 @@ def test_llvm_import(): } """ n = 10 - A = tvm.placeholder((n,), name='A') - B = tvm.compute((n,), lambda *i: - tvm.call_pure_extern("float32", "my_add", A(*i), 1.0), + A = te.placeholder((n,), name='A') + B = te.compute((n,), lambda *i: + tvm.tir.call_pure_extern("float32", "my_add", A(*i), 1.0), name='B') def check_llvm(use_file): if not tvm.runtime.enabled("llvm"): @@ -58,7 +59,7 @@ def test_llvm_import(): temp = util.tempdir() ll_path = temp.relpath("temp.ll") ll_code = clang.create_llvm(cc_code, output=ll_path) - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) if use_file: s[B].pragma(s[B].op.axis[0], "import_llvm", ll_path) else: @@ -78,21 +79,21 @@ def test_llvm_import(): def test_llvm_lookup_intrin(): - ib = tvm.ir_builder.create() - m = tvm.size_var("m") + ib = tvm.tir.ir_builder.create() + m = te.size_var("m") A = ib.pointer("uint8x8", name="A") - x = tvm.call_llvm_intrin("uint8x8", "llvm.ctpop.i8", tvm.const(1, 'uint32'), A) + x = tvm.tir.call_llvm_intrin("uint8x8", "llvm.ctpop.i8", tvm.tir.const(1, 'uint32'), A) ib.emit(x) body = ib.get() - func = tvm.ir_pass.MakeAPI(body, "ctpop", [A], 1, True) + func = tvm.tir.ir_pass.MakeAPI(body, "ctpop", [A], 1, True) fcode = tvm.build(func, None, "llvm") def test_llvm_large_uintimm(): value = (1 << 63) + 123 - other = tvm.const(3, "uint64") - A = tvm.compute((), lambda : tvm.const(value, "uint64") + other, name='A') - s = tvm.create_schedule(A.op) + other = tvm.tir.const(3, "uint64") + A = te.compute((), lambda : tvm.tir.const(value, "uint64") + other, name='A') + s = te.create_schedule(A.op) def check_llvm(): if not tvm.runtime.enabled("llvm"): @@ -109,14 +110,14 @@ def test_llvm_large_uintimm(): def test_llvm_add_pipeline(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - AA = tvm.compute((n,), lambda *i: A(*i), name='A') - BB = tvm.compute((n,), lambda *i: B(*i), name='B') - T = tvm.compute(A.shape, lambda *i: AA(*i) + BB(*i), name='T') - C = tvm.compute(A.shape, lambda *i: T(*i), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + AA = te.compute((n,), lambda *i: A(*i), name='A') + BB = te.compute((n,), lambda *i: B(*i), name='B') + T = te.compute(A.shape, lambda *i: AA(*i) + BB(*i), name='T') + C = te.compute(A.shape, lambda *i: T(*i), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=4) xo1, xo2 = s[C].split(xo, factor=13) s[C].parallel(xo2) @@ -129,9 +130,9 @@ def test_llvm_add_pipeline(): if not tvm.runtime.enabled("llvm"): return # Specifically allow offset to test codepath when offset is available - Ab = tvm.decl_buffer( + Ab = tvm.tir.decl_buffer( A.shape, A.dtype, - elem_offset=tvm.size_var('Aoffset'), + elem_offset=te.size_var('Aoffset'), offset_factor=8, name='A') binds = {A : Ab} @@ -147,16 +148,16 @@ def test_llvm_add_pipeline(): tvm.testing.assert_allclose( c.asnumpy(), a.asnumpy() + b.asnumpy()) - with tvm.build_config(offset_factor=4): + with tvm.target.build_config(offset_factor=4): check_llvm() def test_llvm_persist_parallel(): n = 128 - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1, name='B') - C = tvm.compute(A.shape, lambda *i: tvm.sqrt(B(*i)) * 2 + 2, name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1, name='B') + C = te.compute(A.shape, lambda *i: te.sqrt(B(*i)) * 2 + 2, name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=8) xo1, xo2 = s[C].split(xo, nparts=1) s[B].compute_at(s[C], xo1) @@ -187,10 +188,10 @@ def test_llvm_flip_pipeline(): def check_llvm(nn, base): if not tvm.runtime.enabled("llvm"): return - n = tvm.convert(nn) - A = tvm.placeholder((n + base), name='A') - C = tvm.compute((n,), lambda i: A(nn + base- i - 1), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n + base), name='A') + C = te.compute((n,), lambda i: A(nn + base- i - 1), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=4) s[C].parallel(xo) s[C].vectorize(xi) @@ -214,10 +215,10 @@ def test_llvm_vadd_pipeline(): def check_llvm(n, lanes): if not tvm.runtime.enabled("llvm"): return - A = tvm.placeholder((n,), name='A', dtype="float32x%d" % lanes) - B = tvm.compute((n,), lambda i: A[i], name='B') - C = tvm.compute((n,), lambda i: B[i] + tvm.const(1, A.dtype), name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n,), name='A', dtype="float32x%d" % lanes) + B = te.compute((n,), lambda i: A[i], name='B') + C = te.compute((n,), lambda i: B[i] + tvm.tir.const(1, A.dtype), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], nparts=2) _, xi = s[C].split(xi, factor=2) s[C].parallel(xo) @@ -243,10 +244,10 @@ def test_llvm_madd_pipeline(): def check_llvm(nn, base, stride): if not tvm.runtime.enabled("llvm"): return - n = tvm.convert(nn) - A = tvm.placeholder((n + base, stride), name='A') - C = tvm.compute((n, stride), lambda i, j: A(base + i, j) + 1, name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n + base, stride), name='A') + C = te.compute((n, stride), lambda i, j: A(base + i, j) + 1, name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=4) s[C].parallel(xo) s[C].vectorize(xi) @@ -262,17 +263,17 @@ def test_llvm_madd_pipeline(): c.asnumpy(), a.asnumpy()[base:] + 1) check_llvm(64, 0, 2) check_llvm(4, 0, 1) - with tvm.build_config(restricted_func=False): + with tvm.target.build_config(restricted_func=False): check_llvm(4, 0, 3) def test_llvm_temp_space(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda i: A(i) + 1, name='B') - C = tvm.compute(A.shape, lambda i: B(i) + 1, name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda i: A(i) + 1, name='B') + C = te.compute(A.shape, lambda i: B(i) + 1, name='C') + s = te.create_schedule(C.op) def check_llvm(): if not tvm.runtime.enabled("llvm"): @@ -291,11 +292,11 @@ def test_llvm_temp_space(): def test_multiple_func(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=4) s[C].parallel(xo) s[C].vectorize(xi) @@ -328,9 +329,9 @@ def test_llvm_condition(): def check_llvm(n, offset): if not tvm.runtime.enabled("llvm"): return - A = tvm.placeholder((n, ), name='A') - C = tvm.compute((n,), lambda i: tvm.if_then_else(i >= offset, A[i], 0.0), name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n, ), name='A') + C = te.compute((n,), lambda i: tvm.tir.if_then_else(i >= offset, A[i], 0.0), name='C') + s = te.create_schedule(C.op) # build and invoke the kernel. f = tvm.build(s, [A, C], "llvm") ctx = tvm.cpu(0) @@ -348,9 +349,9 @@ def test_llvm_bool(): def check_llvm(n): if not tvm.runtime.enabled("llvm"): return - A = tvm.placeholder((n, ), name='A', dtype="int32") - C = tvm.compute((n,), lambda i: A[i].equal(1).astype("float"), name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n, ), name='A', dtype="int32") + C = te.compute((n,), lambda i: A[i].equal(1).astype("float"), name='C') + s = te.create_schedule(C.op) # build and invoke the kernel. f = tvm.build(s, [A, C], "llvm") ctx = tvm.cpu(0) @@ -367,12 +368,12 @@ def test_rank_zero(): def check_llvm(n): if not tvm.runtime.enabled("llvm"): return - A = tvm.placeholder((n, ), name='A') - scale = tvm.placeholder((), name='scale') - k = tvm.reduce_axis((0, n), name="k") - C = tvm.compute((), lambda : tvm.sum(A[k] * scale(), axis=k), name="C") - D = tvm.compute((), lambda : C() + 1) - s = tvm.create_schedule(D.op) + A = te.placeholder((n, ), name='A') + scale = te.placeholder((), name='scale') + k = te.reduce_axis((0, n), name="k") + C = te.compute((), lambda : te.sum(A[k] * scale(), axis=k), name="C") + D = te.compute((), lambda : C() + 1) + s = te.create_schedule(D.op) # build and invoke the kernel. f = tvm.build(s, [A, scale, D], "llvm") ctx = tvm.cpu(0) @@ -390,13 +391,13 @@ def test_rank_zero_bound_checkers(): def check_llvm(n): if not tvm.runtime.enabled("llvm"): return - with tvm.build_config(instrument_bound_checkers=True): - A = tvm.placeholder((n, ), name='A') - scale = tvm.placeholder((), name='scale') - k = tvm.reduce_axis((0, n), name="k") - C = tvm.compute((), lambda : tvm.sum(A[k] * scale(), axis=k), name="C") - D = tvm.compute((), lambda : C() + 1) - s = tvm.create_schedule(D.op) + with tvm.target.build_config(instrument_bound_checkers=True): + A = te.placeholder((n, ), name='A') + scale = te.placeholder((), name='scale') + k = te.reduce_axis((0, n), name="k") + C = te.compute((), lambda : te.sum(A[k] * scale(), axis=k), name="C") + D = te.compute((), lambda : C() + 1) + s = te.create_schedule(D.op) # build and invoke the kernel. f = tvm.build(s, [A, scale, D], "llvm") ctx = tvm.cpu(0) @@ -412,10 +413,10 @@ def test_rank_zero_bound_checkers(): def test_alignment(): - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda i: A[i] * 3, name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda i: A[i] * 3, name='B') + s = te.create_schedule(B.op) bx, tx = s[B].split(B.op.axis[0], factor=8) s[B].vectorize(tx) f = tvm.build(s, [A, B], "llvm") @@ -427,26 +428,26 @@ def test_alignment(): def test_llvm_div(): """Check that the semantics of div and mod is correct""" def check(start, end, dstart, dend, dtype, floor_div=False): - div = tvm.floordiv if floor_div else tvm.truncdiv - mod = tvm.floormod if floor_div else tvm.truncmod + div = tvm.te.floordiv if floor_div else tvm.tir.truncdiv + mod = tvm.te.floormod if floor_div else tvm.tir.truncmod # A are dividends, B are divisors. Note that we add 1 to make include end in the range. - A = tvm.placeholder((end - start + 1,), name="A", dtype=dtype) - B = tvm.placeholder((dend - dstart + 1,), name="B", dtype=dtype) + A = te.placeholder((end - start + 1,), name="A", dtype=dtype) + B = te.placeholder((dend - dstart + 1,), name="B", dtype=dtype) # We clip values with min and max so that simplifiers know the ranges of values - clipa = lambda x: tvm.min(tvm.const(end, dtype), tvm.max(tvm.const(start, dtype), x)) - clipb = lambda x: tvm.min(tvm.const(dend, dtype), tvm.max(tvm.const(dstart, dtype), x)) + clipa = lambda x: tvm.te.min(tvm.tir.const(end, dtype), tvm.te.max(tvm.tir.const(start, dtype), x)) + clipb = lambda x: tvm.te.min(tvm.tir.const(dend, dtype), tvm.te.max(tvm.tir.const(dstart, dtype), x)) # If the range is just a single point, use the constant itself if start == end: - clipa = lambda x: tvm.const(start, dtype) + clipa = lambda x: tvm.tir.const(start, dtype) if dstart == dend: - clipb = lambda x: tvm.const(dstart, dtype) + clipb = lambda x: tvm.tir.const(dstart, dtype) # D are division results and M are modulo results - [D, M] = tvm.compute((end - start + 1, dend - dstart + 1), + [D, M] = te.compute((end - start + 1, dend - dstart + 1), lambda i, j: (div(clipa(A[i]), clipb(B[j])), mod(clipa(A[i]), clipb(B[j])))) - s = tvm.create_schedule([D.op, M.op]) + s = te.create_schedule([D.op, M.op]) f = tvm.build(s, [A, B, D, M], "llvm") # Fill input arrays with values @@ -525,10 +526,10 @@ def test_llvm_div(): def test_llvm_fp_math(): def check_llvm_reciprocal(n): - A = tvm.placeholder((n,), name='A') - B = tvm.compute((n,), lambda i: tvm.div(1.0,(1e+37*A[i])), name='B') + A = te.placeholder((n,), name='A') + B = te.compute((n,), lambda i: te.div(1.0,(1e+37*A[i])), name='B') - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) f = tvm.build(s, [A, B], "llvm") a = tvm.nd.array(np.full((n,), 100, 'float32')) @@ -541,10 +542,10 @@ def test_llvm_fp_math(): check_llvm_reciprocal(16) def check_llvm_sigmoid(n): - A = tvm.placeholder((n,), name='A') - B = tvm.compute((n,), lambda i: tvm.sigmoid(A[i]), name='B') + A = te.placeholder((n,), name='A') + B = te.compute((n,), lambda i: te.sigmoid(A[i]), name='B') - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) f = tvm.build(s, [A, B], "llvm") a = tvm.nd.array(np.full((n,), -1000, 'float32')) @@ -559,11 +560,11 @@ def test_llvm_fp_math(): def test_dwarf_debug_information(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - s = tvm.create_schedule(C.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=4) s[C].parallel(xo) s[C].vectorize(xi) @@ -634,27 +635,27 @@ def test_dwarf_debug_information(): def test_llvm_shuffle(): - a = tvm.placeholder((8, ), 'int32') - b = tvm.placeholder((8, ), 'int32') - c = tvm.compute((8, ), lambda x: a[x] + b[7-x]) - sch = tvm.create_schedule(c.op) + a = te.placeholder((8, ), 'int32') + b = te.placeholder((8, ), 'int32') + c = te.compute((8, ), lambda x: a[x] + b[7-x]) + sch = te.create_schedule(c.op) def my_vectorize(stmt): def vectorizer(op): store = op.body - idx = tvm.tir.Ramp(tvm.const(0, 'int32'), tvm.const(1, 'int32'), 8) - all_ones = tvm.const(1, 'int32x8') + idx = tvm.tir.Ramp(tvm.tir.const(0, 'int32'), tvm.tir.const(1, 'int32'), 8) + all_ones = tvm.tir.const(1, 'int32x8') value = store.value - b_idx = tvm.tir.Shuffle([idx], [tvm.const(i, 'int32') for i in range(7, -1, -1)]) + b_idx = tvm.tir.Shuffle([idx], [tvm.tir.const(i, 'int32') for i in range(7, -1, -1)]) new_a = tvm.tir.Load('int32x8', value.a.buffer_var, idx, all_ones) new_b = tvm.tir.Load('int32x8', value.b.buffer_var, b_idx, all_ones) value = new_a + new_b return tvm.tir.Store(store.buffer_var, new_a + new_b, idx, all_ones) - return tvm.ir_pass.IRTransform(stmt, None, vectorizer, ['For']) + return tvm.tir.ir_pass.IRTransform(stmt, None, vectorizer, ['For']) - with tvm.build_config(add_lower_pass=[(1, my_vectorize)]): + with tvm.target.build_config(add_lower_pass=[(1, my_vectorize)]): ir = tvm.lower(sch, [a, b, c], simple_mode=True) module = tvm.build(sch, [a, b, c]) a_ = tvm.nd.array(np.arange(1, 9, dtype='int32')) diff --git a/tests/python/unittest/test_codegen_opencl.py b/tests/python/unittest/test_codegen_opencl.py index 3b9b4a7..140e1f6 100644 --- a/tests/python/unittest/test_codegen_opencl.py +++ b/tests/python/unittest/test_codegen_opencl.py @@ -15,19 +15,20 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te target = 'opencl' def test_opencl_ternary_expression(): def check_if_then_else(ctx, n, dtype): - A = tvm.placeholder((n,), name='A', dtype=dtype) - true_value = tvm.const(1, dtype=dtype) - false_value = tvm.const(3, dtype=dtype) - max_lhs = tvm.const(2, dtype=dtype) - max_rhs = tvm.if_then_else(A[0] > 0, true_value, false_value) - C = tvm.compute((n,), lambda i: tvm.max(max_lhs, max_rhs), name='C') - s = tvm.create_schedule(C.op) - s[C].bind(s[C].op.axis[0], tvm.thread_axis("threadIdx.x")) + A = te.placeholder((n,), name='A', dtype=dtype) + true_value = tvm.tir.const(1, dtype=dtype) + false_value = tvm.tir.const(3, dtype=dtype) + max_lhs = tvm.tir.const(2, dtype=dtype) + max_rhs = tvm.tir.if_then_else(A[0] > 0, true_value, false_value) + C = te.compute((n,), lambda i: tvm.te.max(max_lhs, max_rhs), name='C') + s = te.create_schedule(C.op) + s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x")) fun = tvm.build(s, [A, C], target) a = tvm.nd.empty((n,), A.dtype, ctx) @@ -36,14 +37,14 @@ def test_opencl_ternary_expression(): fun(a, c) def check_select(ctx, n, dtype): - A = tvm.placeholder((n,), name='A', dtype=dtype) - true_value = tvm.const(1, dtype=dtype) - false_value = tvm.const(3, dtype=dtype) - max_lhs = tvm.const(2, dtype=dtype) + A = te.placeholder((n,), name='A', dtype=dtype) + true_value = tvm.tir.const(1, dtype=dtype) + false_value = tvm.tir.const(3, dtype=dtype) + max_lhs = tvm.tir.const(2, dtype=dtype) max_rhs = tvm.tir.Select(A[0] > 0, true_value, false_value) - C = tvm.compute((n,), lambda i: tvm.max(max_lhs, max_rhs), name='C') - s = tvm.create_schedule(C.op) - s[C].bind(s[C].op.axis[0], tvm.thread_axis("threadIdx.x")) + C = te.compute((n,), lambda i: tvm.te.max(max_lhs, max_rhs), name='C') + s = te.create_schedule(C.op) + s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x")) fun = tvm.build(s, [A, C], target) a = tvm.nd.empty((n,), A.dtype, ctx) @@ -68,11 +69,11 @@ def test_opencl_ternary_expression(): def test_opencl_inf_nan(): def check_inf_nan(ctx, n, value, dtype): - A = tvm.placeholder((n,), name='A', dtype=dtype) - inf_value = tvm.const(value, dtype=dtype) - C = tvm.compute((n,), lambda i: inf_value, name='C') - s = tvm.create_schedule(C.op) - s[C].bind(s[C].op.axis[0], tvm.thread_axis("threadIdx.x")) + A = te.placeholder((n,), name='A', dtype=dtype) + inf_value = tvm.tir.const(value, dtype=dtype) + C = te.compute((n,), lambda i: inf_value, name='C') + s = te.create_schedule(C.op) + s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x")) fun = tvm.build(s, [A, C], target) a = tvm.nd.empty((n,), A.dtype, ctx) c = tvm.nd.empty((n,), A.dtype, ctx) diff --git a/tests/python/unittest/test_codegen_rocm.py b/tests/python/unittest/test_codegen_rocm.py index 73f7646..f107e59 100644 --- a/tests/python/unittest/test_codegen_rocm.py +++ b/tests/python/unittest/test_codegen_rocm.py @@ -15,23 +15,24 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np import unittest -tx = tvm.thread_axis("threadIdx.x") -ty = tvm.thread_axis("threadIdx.y") -bx = tvm.thread_axis("blockIdx.x") -by = tvm.thread_axis("blockIdx.y") +tx = te.thread_axis("threadIdx.x") +ty = te.thread_axis("threadIdx.y") +bx = te.thread_axis("blockIdx.x") +by = te.thread_axis("blockIdx.y") @unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..") def test_rocm_cross_thread_reduction(): # based on the reduction tutorial - n = tvm.size_var("n") - m = tvm.size_var("m") - A = tvm.placeholder((n, m), name='A') - k = tvm.reduce_axis((0, m), "k") - B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="B") - s = tvm.create_schedule(B.op) + n = te.size_var("n") + m = te.size_var("m") + A = te.placeholder((n, m), name='A') + k = te.reduce_axis((0, m), "k") + B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B") + s = te.create_schedule(B.op) ko, ki = s[B].split(B.op.reduce_axis[0], factor=16) BF = s.rfactor(B, ki) xo, xi = s[B].split(s[B].op.axis[0], factor=32) @@ -54,10 +55,10 @@ def test_rocm_cross_thread_reduction(): @unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..") def test_rocm_inf_nan(): def check_inf_nan(ctx, n, value, dtype): - A = tvm.placeholder((n,), name='A', dtype=dtype) - inf_value = tvm.const(value, dtype=dtype) - C = tvm.compute((n,), lambda i: inf_value, name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n,), name='A', dtype=dtype) + inf_value = tvm.tir.const(value, dtype=dtype) + C = te.compute((n,), lambda i: inf_value, name='C') + s = te.create_schedule(C.op) s[C].bind(s[C].op.axis[0], tx) fun = tvm.build(s, [A, C], "rocm") a = tvm.nd.empty((n,), A.dtype, ctx) @@ -76,12 +77,12 @@ def test_rocm_inf_nan(): @unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..") def test_rocm_reducition_binding(): - k = tvm.reduce_axis((0, 32), 'k') - A = tvm.placeholder((96, 32), name='A') - B = tvm.compute( (96,), lambda m: - tvm.sum(A[m, k], axis=k), + k = te.reduce_axis((0, 32), 'k') + A = te.placeholder((96, 32), name='A') + B = te.compute( (96,), lambda m: + te.sum(A[m, k], axis=k), name='B') - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) s[B].reorder(B.op.reduce_axis[0], B.op.axis[0]) @@ -92,7 +93,7 @@ def test_rocm_reducition_binding(): def test_rocm_copy(): def check_rocm(dtype, n): - A = tvm.placeholder((n,), name='A', dtype=dtype) + A = te.placeholder((n,), name='A', dtype=dtype) ctx = tvm.rocm(0) a_np = np.random.uniform(size=(n,)).astype(A.dtype) a = tvm.nd.empty((n,), A.dtype, ctx).copyfrom(a_np) @@ -111,9 +112,9 @@ def test_rocm_vectorize_add(): num_thread = 8 def check_rocm(dtype, n, lanes): - A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) - B = tvm.compute((n,), lambda i: A[i]+tvm.const(1, A.dtype), name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) + B = te.compute((n,), lambda i: A[i]+tvm.tir.const(1, A.dtype), name='B') + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=num_thread) s[B].bind(xo, bx) s[B].bind(xi, tx) diff --git a/tests/python/unittest/test_codegen_static_init.py b/tests/python/unittest/test_codegen_static_init.py index 4d71cb3..3b5f17a 100644 --- a/tests/python/unittest/test_codegen_static_init.py +++ b/tests/python/unittest/test_codegen_static_init.py @@ -15,24 +15,25 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import ctypes import numpy as np def test_static_callback(): dtype = 'int64' - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) - i = tvm.size_var('i') - ib = tvm.ir_builder.create() + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) + i = te.size_var('i') + ib = tvm.tir.ir_builder.create() A = ib.buffer_ptr(Ab) - cp = tvm.thread_axis((0, 1), "cop") + cp = te.thread_axis((0, 1), "cop") finit = tvm.tir.StringImm("TVMBackendRunOnce") ib.scope_attr(cp, "coproc_uop_scope", finit) with ib.for_range(0, n, "i", for_type="parallel") as i: A[i] = A[i] + 1 stmt = ib.get() - fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True) - fapi = tvm.ir_pass.LowerTVMBuiltin(fapi) + fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True) + fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi) f = tvm.target.codegen.build_module(fapi, "llvm") a = tvm.nd.array(np.zeros(10, dtype=dtype)) f(a) @@ -41,13 +42,13 @@ def test_static_callback(): def test_static_init(): dtype = 'int64' - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) - i = tvm.size_var('i') - ib = tvm.ir_builder.create() - handle = tvm.call_intrin("handle", "tvm_static_handle") + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) + i = te.size_var('i') + ib = tvm.tir.ir_builder.create() + handle = tvm.tir.call_intrin("handle", "tvm_static_handle") ib.emit( - tvm.call_packed("test_static_callback", handle, Ab)) + tvm.tir.call_packed("test_static_callback", handle, Ab)) @tvm.register_func("test_static_callback") def test_cb(sh, A): @@ -55,8 +56,8 @@ def test_static_init(): return sh stmt = ib.get() - fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True) - fapi = tvm.ir_pass.LowerTVMBuiltin(fapi) + fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True) + fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi) f = tvm.target.codegen.build_module(fapi, "llvm") a = tvm.nd.array(np.zeros(10, dtype=dtype)) f(a) diff --git a/tests/python/unittest/test_codegen_vm_basic.py b/tests/python/unittest/test_codegen_vm_basic.py index 7f08c75..e2ff487 100644 --- a/tests/python/unittest/test_codegen_vm_basic.py +++ b/tests/python/unittest/test_codegen_vm_basic.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def run_jit(fapi, check): @@ -32,12 +33,12 @@ def test_stack_vm_basic(): print(shape0) assert shape0 == a.shape[0] - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), tvm.float32) - stmt = tvm.tir.Evaluate(tvm.call_packed("tvm_call_back_get_shape", Ab.shape[0])) - fapi = tvm.ir_pass.MakeAPI(stmt, "print_shape", [Ab], 0, True) - fapi = tvm.ir_pass.LowerTVMBuiltin(fapi) - fapi = tvm.ir_pass.LowerIntrin(fapi, "stackvm") + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), "float32") + stmt = tvm.tir.Evaluate(tvm.tir.call_packed("tvm_call_back_get_shape", Ab.shape[0])) + fapi = tvm.tir.ir_pass.MakeAPI(stmt, "print_shape", [Ab], 0, True) + fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi) + fapi = tvm.tir.ir_pass.LowerIntrin(fapi, "stackvm") run_jit(fapi, lambda f: f(a)) @@ -47,19 +48,19 @@ def tvm_stack_vm_print(*x): def test_stack_vm_loop(): dtype = 'int64' - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) - i = tvm.size_var('i') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) + i = te.size_var('i') - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() A = ib.buffer_ptr(Ab) with ib.for_range(0, n - 1, "i") as i: A[i + 1] = A[i] + 1 - ib.emit(tvm.call_packed("tvm_stack_vm_print", i)) + ib.emit(tvm.tir.call_packed("tvm_stack_vm_print", i)) stmt = ib.get() - fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True) - fapi = tvm.ir_pass.LowerTVMBuiltin(fapi) + fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True) + fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi) a = tvm.nd.array(np.zeros(10, dtype=dtype)) def check(f): f(a) @@ -69,10 +70,10 @@ def test_stack_vm_loop(): def test_stack_vm_cond(): dtype = 'int64' - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() A = ib.buffer_ptr(Ab) with ib.for_range(0, n - 1, "i") as i: with ib.if_scope(tvm.tir.EQ(i, 4)): @@ -81,8 +82,8 @@ def test_stack_vm_cond(): A[i + 1] = A[i] + 2 stmt = ib.get() - fapi = tvm.ir_pass.MakeAPI(stmt, "test", [Ab], 0, True) - fapi = tvm.ir_pass.LowerTVMBuiltin(fapi) + fapi = tvm.tir.ir_pass.MakeAPI(stmt, "test", [Ab], 0, True) + fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi) def check(f): a = tvm.nd.array(np.zeros(10, dtype=dtype)) f(a) @@ -93,16 +94,16 @@ def test_stack_vm_cond(): def test_vm_parallel(): dtype = 'int64' - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) - i = tvm.size_var('i') - ib = tvm.ir_builder.create() + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) + i = te.size_var('i') + ib = tvm.tir.ir_builder.create() A = ib.buffer_ptr(Ab) with ib.for_range(0, n, "i", for_type="parallel") as i: A[i] = A[i] + 1 stmt = ib.get() - fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True) - fapi = tvm.ir_pass.LowerTVMBuiltin(fapi) + fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True) + fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi) def check(f): a = tvm.nd.array(np.zeros(10, dtype=dtype)) f(a) diff --git a/tests/python/unittest/test_codegen_vulkan.py b/tests/python/unittest/test_codegen_vulkan.py index d480a0f..722a9ec 100644 --- a/tests/python/unittest/test_codegen_vulkan.py +++ b/tests/python/unittest/test_codegen_vulkan.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import re import numpy as np @@ -28,18 +29,18 @@ def test_vector_comparison(): def check_correct_assembly(dtype): n = (1024,) - A = tvm.placeholder(n, dtype=dtype, name='A') - B = tvm.compute( + A = te.placeholder(n, dtype=dtype, name='A') + B = te.compute( A.shape, lambda i: tvm.tir.Select( - A[i] >= 0, A[i] + tvm.const(1, dtype), - tvm.const(0, dtype)), name='B') - s = tvm.create_schedule(B.op) + A[i] >= 0, A[i] + tvm.tir.const(1, dtype), + tvm.tir.const(0, dtype)), name='B') + s = te.create_schedule(B.op) (bx, tx) = s[B].split(s[B].op.axis[0], factor=128) (tx, vx) = s[B].split(tx, factor=4) - s[B].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B].bind(tx, tvm.thread_axis("threadIdx.x")) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(tx, te.thread_axis("threadIdx.x")) s[B].vectorize(vx) f = tvm.build(s, [A, B], target) @@ -55,8 +56,8 @@ def test_vector_comparison(): check_correct_assembly('float16') -tx = tvm.thread_axis("threadIdx.x") -bx = tvm.thread_axis("blockIdx.x") +tx = te.thread_axis("threadIdx.x") +bx = te.thread_axis("blockIdx.x") def test_vulkan_copy(): @@ -65,7 +66,7 @@ def test_vulkan_copy(): if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"): print("skip because vulkan is not enabled..") return - A = tvm.placeholder((n,), name='A', dtype=dtype) + A = te.placeholder((n,), name='A', dtype=dtype) ctx = tvm.vulkan(0) a_np = np.random.uniform(size=(n,)).astype(A.dtype) a = tvm.nd.empty((n,), A.dtype, ctx).copyfrom(a_np) @@ -87,9 +88,9 @@ def test_vulkan_vectorize_add(): if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"): print("skip because vulkan is not enabled..") return - A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) - B = tvm.compute((n,), lambda i: A[i]+tvm.const(1, A.dtype), name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes)) + B = te.compute((n,), lambda i: A[i]+tvm.tir.const(1, A.dtype), name='B') + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=num_thread) s[B].bind(xo, bx) s[B].bind(xi, tx) @@ -120,21 +121,21 @@ def test_vulkan_stress(): if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"): print("skip because vulkan is not enabled..") return - A = tvm.placeholder((n,), name='A', dtype="float32") - B = tvm.placeholder((n,), name='B', dtype="float32") + A = te.placeholder((n,), name='A', dtype="float32") + B = te.placeholder((n,), name='B', dtype="float32") functions = [ - (lambda: tvm.compute((n,), lambda i: 2 * A[i] + 3 * B[i]), + (lambda: te.compute((n,), lambda i: 2 * A[i] + 3 * B[i]), lambda a, b: 2 * a + 3 * b), - (lambda: tvm.compute((n,), lambda i: A[i]+B[i]), + (lambda: te.compute((n,), lambda i: A[i]+B[i]), lambda a, b: a + b), - (lambda: tvm.compute((n,), lambda i: A[i]+2 * B[i]), + (lambda: te.compute((n,), lambda i: A[i]+2 * B[i]), lambda a, b: a + 2 * b), ] def build_f(f_ref): (C_f, ref) = f_ref C = C_f() - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=num_thread) s[C].bind(xo, bx) s[C].bind(xi, tx) diff --git a/tests/python/unittest/test_codegen_x86.py b/tests/python/unittest/test_codegen_x86.py index e17c6cf..cdba774 100644 --- a/tests/python/unittest/test_codegen_x86.py +++ b/tests/python/unittest/test_codegen_x86.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import re @@ -26,10 +27,10 @@ def test_fp16_to_fp32(): def fp16_to_fp32(target, width, match=None, not_match=None): elements = 64 - n = tvm.convert(elements) - A = tvm.placeholder((n, width), dtype="float16", name='A') - B = tvm.compute(A.shape, lambda *i: A(*i).astype("float32"), name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(elements) + A = te.placeholder((n, width), dtype="float16", name='A') + B = te.compute(A.shape, lambda *i: A(*i).astype("float32"), name='B') + s = te.create_schedule(B.op) s[B].vectorize(s[B].op.axis[1]) f = tvm.build(s, [A, B], target) diff --git a/tests/python/unittest/test_container.py b/tests/python/unittest/test_container.py index f7ffd02..84b26be 100644 --- a/tests/python/unittest/test_container.py +++ b/tests/python/unittest/test_container.py @@ -17,6 +17,7 @@ import numpy as np import tvm +from tvm import te from tvm import nd, relay from tvm.runtime import container as _container diff --git a/tests/python/unittest/test_custom_datatypes_mybfloat16.py b/tests/python/unittest/test_custom_datatypes_mybfloat16.py index cae4813..32f6e18 100644 --- a/tests/python/unittest/test_custom_datatypes_mybfloat16.py +++ b/tests/python/unittest/test_custom_datatypes_mybfloat16.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te from ctypes import * import topi import tvm.tir.ir_pass as ir_pass @@ -56,14 +57,14 @@ def lower_datatypes_and_build(schedule, args): return tvm.build(flist[0], target=tgt) def test_bfloat_add_and_cast_1(): - X = tvm.placeholder((3, ), name="X") - Y = tvm.placeholder((3, ), name="Y") + X = te.placeholder((3, ), name="X") + Y = te.placeholder((3, ), name="Y") Z = topi.cast( topi.cast(X, dtype="custom[bfloat]16") + topi.cast(Y, dtype="custom[bfloat]16"), dtype="float") - s = tvm.create_schedule([Z.op]) + s = te.create_schedule([Z.op]) built_cast = lower_datatypes_and_build(s, [X,Y,Z]) ctx = tvm.context(tgt, 0) @@ -87,14 +88,14 @@ def test_bfloat_add_and_cast_1(): def test_bfloat_add_and_cast_2(): - X = tvm.placeholder((3, ), name="X") - Y = tvm.placeholder((3, ), name="Y") + X = te.placeholder((3, ), name="X") + Y = te.placeholder((3, ), name="Y") Z = topi.cast( topi.cast(X, dtype="custom[bfloat]16") + topi.cast(Y, dtype="custom[bfloat]16"), dtype="float") - s = tvm.create_schedule([Z.op]) + s = te.create_schedule([Z.op]) built_cast = lower_datatypes_and_build(s, [X,Y,Z]) ctx = tvm.context(tgt, 0) @@ -122,14 +123,14 @@ def test_bfloat_add_and_cast_2(): def test_bfloat_add_and_cast_FloatImm(): - X = tvm.placeholder((3, ), name="X") + X = te.placeholder((3, ), name="X") Z = topi.cast( topi.add( topi.cast(X, dtype="custom[bfloat]16"), tvm.tir.FloatImm("custom[bfloat]16", 1.5)), dtype="float") - s = tvm.create_schedule([Z.op]) + s = te.create_schedule([Z.op]) built_cast = lower_datatypes_and_build(s, [X,Z]) ctx = tvm.context(tgt, 0) diff --git a/tests/python/unittest/test_graph_tuner_core.py b/tests/python/unittest/test_graph_tuner_core.py index 27e077f..a7be18a 100644 --- a/tests/python/unittest/test_graph_tuner_core.py +++ b/tests/python/unittest/test_graph_tuner_core.py @@ -24,6 +24,7 @@ import os import copy import numpy as np import tvm +from tvm import te import tvm.relay.testing from tvm import autotvm @@ -79,20 +80,20 @@ def _create_data(target, dshape, dtype, layout): records.append((ms_input, ms_output)) ltf_records = [] - ltf_arg = [tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] + ltf_arg = [te.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] ltf_task = autotvm.task.create('layout_transform', ltf_arg, target) ms_input = MeasureInput(target=target, task=ltf_task, config=None) ms_output = MeasureResult(costs=(1.91224744e-05,), error_no=0, all_cost=-1, timestamp=-1) ltf_records.append((ms_input, ms_output)) ltf_keys = [] - ltf_arg = [tvm.placeholder((1, 4, 8, 8, 4), dtype=dtype), "NCHW4c", "NCHW8c"] + ltf_arg = [te.placeholder((1, 4, 8, 8, 4), dtype=dtype), "NCHW4c", "NCHW8c"] ltf_wkl = autotvm.task.args_to_workload(ltf_arg, 'layout_transform') ltf_keys.append(ltf_wkl) - ltf_arg = [tvm.placeholder((1, 1, 8, 8, 32), dtype=dtype), "NCHW32c", "NCHW4c"] + ltf_arg = [te.placeholder((1, 1, 8, 8, 32), dtype=dtype), "NCHW32c", "NCHW4c"] ltf_wkl = autotvm.task.args_to_workload(ltf_arg, 'layout_transform') ltf_keys.append(ltf_wkl) - ltf_arg = [tvm.placeholder((1, 4, 8, 8, 8), dtype=dtype), "NCHW8c", "NCHW32c"] + ltf_arg = [te.placeholder((1, 4, 8, 8, 8), dtype=dtype), "NCHW8c", "NCHW32c"] ltf_wkl = autotvm.task.args_to_workload(ltf_arg, 'layout_transform') ltf_keys.append(ltf_wkl) @@ -314,7 +315,7 @@ def test_many_sub_graphs(): records.append((ms_input, ms_output)) ltf_records = [] - ltf_arg = [tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] + ltf_arg = [te.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] ltf_task = autotvm.task.create('layout_transform', ltf_arg, target) ms_input = MeasureInput(target=target, task=ltf_task, config=None) ms_output = MeasureResult(costs=(1.91224744e-05,), error_no=0, all_cost=-1, timestamp=-1) @@ -397,7 +398,7 @@ def test_tuple(): records.append((ms_input, ms_output)) ltf_records = [] - ltf_arg = [tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] + ltf_arg = [te.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] ltf_task = autotvm.task.create('layout_transform', ltf_arg, target) ms_input = MeasureInput(target=target, task=ltf_task, config=None) ms_output = MeasureResult(costs=(1.91224744e-05,), error_no=0, all_cost=-1, timestamp=-1) @@ -497,7 +498,7 @@ def test_triangle_block(): records.append((ms_input, ms_output)) ltf_records = [] - ltf_arg = [tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] + ltf_arg = [te.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] ltf_task = autotvm.task.create('layout_transform', ltf_arg, target) ms_input = MeasureInput(target=target, task=ltf_task, config=None) ms_output = MeasureResult(costs=(1.91224744e-05,), error_no=0, all_cost=-1, timestamp=-1) diff --git a/tests/python/unittest/test_graph_tuner_utils.py b/tests/python/unittest/test_graph_tuner_utils.py index 112c5b8..f620acc 100644 --- a/tests/python/unittest/test_graph_tuner_utils.py +++ b/tests/python/unittest/test_graph_tuner_utils.py @@ -21,6 +21,7 @@ # https://github.com/apache/incubator-tvm/issues/3240 # TODO: restore the file name after this issue is resolved. import tvm +from tvm import te from tvm import autotvm, relay from tvm.relay.testing import resnet diff --git a/tests/python/unittest/test_hybrid_script.py b/tests/python/unittest/test_hybrid_script.py index 311dae8..3e90442 100644 --- a/tests/python/unittest/test_hybrid_script.py +++ b/tests/python/unittest/test_hybrid_script.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. import tvm, inspect, sys, traceback, numpy, pytest, types, os + +from tvm import te from tvm.contrib import util from tvm.hybrid import script from tvm.hybrid.runtime import HYBRID_GLOBALS @@ -22,8 +24,8 @@ from tvm.hybrid.runtime import HYBRID_GLOBALS @pytest.mark.skip def run_and_check(func, args, var_dict={}, target='llvm', sch=None, outs=None): def tvm_val_2_py_val(val): - val = tvm.ir_pass.Substitute(val, var_dict) - val = tvm.ir_pass.Simplify(val) + val = tvm.tir.ir_pass.Substitute(val, var_dict) + val = tvm.tir.ir_pass.Simplify(val) assert isinstance(val, (tvm.tir.IntImm,)) return val.value @@ -31,9 +33,9 @@ def run_and_check(func, args, var_dict={}, target='llvm', sch=None, outs=None): op = None if sch is None: - outs = func(*tuple(tvm.convert(i) if isinstance(i, list) else i for i in args)) + outs = func(*tuple(tvm.runtime.convert(i) if isinstance(i, list) else i for i in args)) op = outs[0].op if isinstance(outs, list) else outs.op - sch = tvm.create_schedule(op) + sch = te.create_schedule(op) else: assert outs is not None assert isinstance(outs, list) @@ -42,7 +44,7 @@ def run_and_check(func, args, var_dict={}, target='llvm', sch=None, outs=None): emu_args = [] nd_args = [] for i in args: - if isinstance(i, tvm.tensor.Tensor): + if isinstance(i, te.tensor.Tensor): shape = [tvm_val_2_py_val(j) for j in i.shape] emu_args.append(numpy.random.randn(*shape).astype(i.dtype)) nd_args.append(tvm.nd.array(emu_args[-1], ctx)) @@ -53,7 +55,7 @@ def run_and_check(func, args, var_dict={}, target='llvm', sch=None, outs=None): assert isinstance(i, list) emu_args.append(numpy.array(i)) - compile_args = [i for i in args if isinstance(i, (tvm.tensor.Tensor, tvm.tir.Var))] + \ + compile_args = [i for i in args if isinstance(i, (te.tensor.Tensor, tvm.tir.Var))] + \ (outs if isinstance(outs, list) else [outs]) module = tvm.build(sch, compile_args, @@ -76,8 +78,8 @@ def run_and_check(func, args, var_dict={}, target='llvm', sch=None, outs=None): for nd, np in zip(out_tensors, ref_data): tvm.testing.assert_allclose(nd.asnumpy(), np, rtol=1e-5, atol=1e-5) - module_args = [i for i in args if isinstance(i, (tvm.tensor.Tensor, tvm.tir.Var))] - module_outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + module_args = [i for i in args if isinstance(i, (te.tensor.Tensor, tvm.tir.Var))] + module_outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs h_module = tvm.hybrid.build(sch, module_args, module_outs) return h_module, module_args, module_outs @@ -98,10 +100,10 @@ def outer_product(n, m, a, b): #Test global function #Test bridge between frontend and backend def test_outer_product(): - n = tvm.size_var('n') - m = tvm.size_var('m') - a = tvm.placeholder((n, ), name='a') - b = tvm.placeholder((m, ), name='b') + n = te.size_var('n') + m = te.size_var('m') + a = te.placeholder((n, ), name='a') + b = te.placeholder((m, ), name='b') try: c = outer_product(n, m, a, b) @@ -167,8 +169,8 @@ def test_fanout(): b[i] = sigma return b - n = tvm.size_var('n') - a = tvm.placeholder((n, ), 'float32', name='a') + n = te.size_var('n') + a = te.placeholder((n, ), 'float32', name='a') try: b = fanout(n, a) ir = b.op.body @@ -180,7 +182,7 @@ def test_fanout(): assert isinstance(ir, tvm.tir.For) assert ir.loop_var.name == 'i' assert ir.min.value == 0 - assert tvm.ir_pass.Equal(ir.extent, n - 3) + assert tvm.tir.ir_pass.Equal(ir.extent, n - 3) #Check loopbody ibody = ir.body assert isinstance(ibody, tvm.tir.AttrStmt) @@ -213,7 +215,7 @@ def test_fanout(): assert value.a.args[0].value == 0 assert value.b.name == 'a' assert len(value.b.args) == 1 - assert tvm.ir_pass.Equal(value.b.args[0], ir.loop_var + jloop.loop_var) + assert tvm.tir.ir_pass.Equal(value.b.args[0], ir.loop_var + jloop.loop_var) divide= rbody[2] assert isinstance(divide, tvm.tir.Provide) assert len(divide.args) == 1 @@ -249,9 +251,9 @@ def test_looptype(): f[k] = c[k] return d, e, f - a = tvm.placeholder((16, ), name='a', dtype='int32') - b = tvm.placeholder((16, ), name='b', dtype='int32') - c = tvm.placeholder((16, ), name='c', dtype='int32') + a = te.placeholder((16, ), name='a', dtype='int32') + b = te.placeholder((16, ), name='b', dtype='int32') + c = te.placeholder((16, ), name='c', dtype='int32') try: d, e, f = looptype(a, b, c) ir = d.op.body @@ -282,7 +284,7 @@ def test_if(): b[i] = -1 if i % 2 == 0 else 1 return b, c - a = tvm.placeholder((10, ), dtype='int32', name='a') + a = te.placeholder((10, ), dtype='int32', name='a') func, ins, outs = run_and_check(if_then_else, [a]) run_and_check(func, ins, outs=outs) @@ -326,8 +328,8 @@ def test_bind(): c[tx] = a[tx] + b[tx] return c - a = tvm.placeholder((1000, ), dtype='float32', name='a') - b = tvm.placeholder((1000, ), dtype='float32', name='b') + a = te.placeholder((1000, ), dtype='float32', name='a') + b = te.placeholder((1000, ), dtype='float32', name='b') func, ins, outs = run_and_check(vec_add, [a, b], target='cuda') run_and_check(func, ins, outs=outs, target='cuda') @@ -339,8 +341,8 @@ def test_bind(): return c c = raw(a, b) - sch = tvm.create_schedule(c.op) - x = tvm.thread_axis('threadIdx.x') + sch = te.create_schedule(c.op) + x = te.thread_axis('threadIdx.x') sch[c].bind(c.op.axis[0], x) func, ins, outs = run_and_check(raw, [a, b], sch=sch, outs=[c], target='cuda') run_and_check(func, ins, outs=outs, target='cuda') @@ -360,9 +362,9 @@ def test_bind(): return c - a = tvm.placeholder((8, 4), 'float32') + a = te.placeholder((8, 4), 'float32') c = foo(a) - s = tvm.create_schedule(c.op) + s = te.create_schedule(c.op) ir = tvm.lower(s, [a, c], simple_mode=True) assert not isinstance(ir, tvm.tir.AttrStmt) func, ins, outs = run_and_check(foo, [a], target='cuda') @@ -379,7 +381,7 @@ def test_bind(): b[i * m + j] = a[i * m + j] + a[i * m + j] return b - a = tvm.placeholder((10000, ), 'float32') + a = te.placeholder((10000, ), 'float32') with tvm.target.create('cuda'): func, ins, outs = run_and_check(max_threads, [a], target='cuda') run_and_check(func, ins, outs=outs, target='cuda') @@ -399,9 +401,9 @@ def test_math_intrin(): b[7] = max(a[5], a[6]) return b - a8 = tvm.placeholder((8, ), dtype='float32', name='a') + a8 = te.placeholder((8, ), dtype='float32', name='a') b8 = intrin_real(a8) - sch = tvm.create_schedule(b8.op) + sch = te.create_schedule(b8.op) func = tvm.build(sch, [a8, b8]) assert func a = numpy.arange(2, 10).astype('float32') @@ -417,9 +419,9 @@ def test_math_intrin(): b[0] = popcount(a[0]) return b - a1 = tvm.placeholder((1, ), dtype='int32') + a1 = te.placeholder((1, ), dtype='int32') b1 = intrin_int(a1) - sch = tvm.create_schedule(b1.op) + sch = te.create_schedule(b1.op) func = tvm.build(sch, [a1, b1]) assert func a = numpy.array([114514]).astype('int32') @@ -443,7 +445,7 @@ def test_non_zero(): b[i-2, j-2] = s / 9.0 return b - a = tvm.placeholder((32, 32), 'float32', 'a') + a = te.placeholder((32, 32), 'float32', 'a') func, ins, outs = run_and_check(blur, [a]) run_and_check(func, ins, outs=outs) @@ -455,8 +457,8 @@ def test_non_zero(): c[i, j] = a[i] * b[j] return c - a = tvm.placeholder((10, ), dtype='float32', name='a') - b = tvm.placeholder((10, ), dtype='float32', name='b') + a = te.placeholder((10, ), dtype='float32', name='a') + b = te.placeholder((10, ), dtype='float32', name='b') func, ins, outs = run_and_check(triangle, [a, b]) run_and_check(func, ins, outs=outs) @@ -474,9 +476,9 @@ def test_allocate(): b[i, j] = (ha[0, j] + ha[1, j] + ha[2, j]) / 9.0 return b - a = tvm.placeholder((32, 32), 'float32', 'a') + a = te.placeholder((32, 32), 'float32', 'a') b = blur2d(a) - sch = tvm.create_schedule(b.op) + sch = te.create_schedule(b.op) func, ins, outs = run_and_check(blur2d, [a]) run_and_check(func, ins, outs=outs) @@ -494,8 +496,8 @@ def test_allocate(): c[i] = shared[i] + local[i] return c - a = tvm.placeholder((256, ), dtype='float32', name='a') - b = tvm.placeholder((256, ), dtype='float32', name='b') + a = te.placeholder((256, ), dtype='float32', name='a') + b = te.placeholder((256, ), dtype='float32', name='b') c = share_vec_add(a, b) func, ins, outs = run_and_check(share_vec_add, [a, b], target='cuda') run_and_check(func, ins, outs=outs, target='cuda') @@ -510,11 +512,11 @@ def test_upstream(): b[i] = a[i] * i return b - a = tvm.placeholder((20, ), 'float32') - b = tvm.placeholder((20, ), 'float32') - c = tvm.compute((20, ), lambda x: a[x] + b[x]) + a = te.placeholder((20, ), 'float32') + b = te.placeholder((20, ), 'float32') + c = te.compute((20, ), lambda x: a[x] + b[x]) d = upstream(c) - sch = tvm.create_schedule([c.op, d.op]) + sch = te.create_schedule([c.op, d.op]) ir = tvm.lower(sch, [a, b, d], simple_mode=True) func = tvm.build(sch, [a, b, d]) assert(func) @@ -541,11 +543,11 @@ def test_downstream(): return b - a = tvm.placeholder((20, ), 'float32') + a = te.placeholder((20, ), 'float32') b = downstream(a) - c = tvm.compute((20, ), lambda x: b[x] + 1.0) + c = te.compute((20, ), lambda x: b[x] + 1.0) - sch = tvm.create_schedule(c.op) + sch = te.create_schedule(c.op) module = tvm.build(sch, [a, c]) assert module @@ -567,10 +569,10 @@ def test_const_param(): c[i] = a[i] + b return c - a = tvm.placeholder((11, ), dtype='int32', name='a') - b = tvm.const(11, 'int32') + a = te.placeholder((11, ), dtype='int32', name='a') + b = tvm.tir.const(11, 'int32') c = add_something(a, b) - sch = tvm.create_schedule(c.op) + sch = te.create_schedule(c.op) module = tvm.build(sch, [a, c], 'llvm') assert(module) @@ -603,10 +605,10 @@ def test_value_index(): c[i, j] = a[i * 4 + j] * b[i, j] return c - a = tvm.placeholder((16, ), 'int32') + a = te.placeholder((16, ), 'int32') b, c = kernel_a(a) d = kernel_b(c, b) - sch = tvm.create_schedule(d.op) + sch = te.create_schedule(d.op) module = tvm.build(sch, [a, d]) assert module @@ -632,8 +634,8 @@ def test_func_call(): d[i, j] = c[i, j] + i * j return d - a = tvm.placeholder((10, ), name='a') - b = tvm.placeholder((10, ), name='b') + a = te.placeholder((10, ), name='a') + b = te.placeholder((10, ), name='b') func, ins, outs = run_and_check(foo, [a, b]) run_and_check(func, ins, outs=outs) @@ -648,7 +650,7 @@ def test_bool(): else: b[i] = 0.0 return b - a = tvm.placeholder((10, ), name='a') + a = te.placeholder((10, ), name='a') func, ins, outs = run_and_check(foo, [a]) run_and_check(func, ins, outs=outs) @@ -668,7 +670,7 @@ def test_const_range(): return c, d - a = tvm.placeholder((2, 5), name='a', dtype='float32') + a = te.placeholder((2, 5), name='a', dtype='float32') b = [[1, 2, 3, 4, 5], [5, 4, 3, 2, 1]] func, ins, outs = run_and_check(foo, [a, b]) run_and_check(func, ins, outs=outs) @@ -683,10 +685,10 @@ def test_const_range(): else: c[i - len_b] = a[i - len_b] + b[i - len_b] return c - a = tvm.placeholder((5, ), name='a', dtype='int32') + a = te.placeholder((5, ), name='a', dtype='int32') b = [1, 2, 3, 4, 5] - c = goo(a, tvm.convert(b)) - sch = tvm.create_schedule(c.op) + c = goo(a, tvm.runtime.convert(b)) + sch = te.create_schedule(c.op) func, ins, outs = run_and_check(goo, [a, b]) run_and_check(func, ins, outs=outs) @@ -700,7 +702,7 @@ def test_const_range(): d += a[i] + b[j] c[i] = d return c - a = tvm.placeholder((5, ), name='a', dtype='int32') + a = te.placeholder((5, ), name='a', dtype='int32') b = [1, 2, 3, 4, 5] func, ins, outs = run_and_check(hoo, [a, b]) run_and_check(func, ins, outs=outs) @@ -713,14 +715,14 @@ def test_schedule(): for j in range(64): c[i, j] = a[i] * b[j] return c - a = tvm.placeholder((64,), name='a', dtype='float32') - b = tvm.placeholder((64,), name='b', dtype='float32') + a = te.placeholder((64,), name='a', dtype='float32') + b = te.placeholder((64,), name='b', dtype='float32') c = outer_product(a, b) # Test perfect loop split # Test loop reorder # Test loop annotation - sch = tvm.create_schedule(c.op) + sch = te.create_schedule(c.op) i, j = c.op.axis io, ii = sch[c].split(i, 4) sch[c].parallel(ii) @@ -749,7 +751,7 @@ def test_schedule(): run_and_check(func, ins, outs=outs) # Test fuse - sch = tvm.create_schedule(c.op) + sch = te.create_schedule(c.op) sch[c].fuse(c.op.axis[0], c.op.axis[1]) ir = tvm.lower(sch, [a, b, c], simple_mode=True) assert isinstance(ir, tvm.tir.ProducerConsumer) @@ -762,7 +764,7 @@ def test_schedule(): run_and_check(func, ins, outs=outs) # Test imperfect loop split - sch = tvm.create_schedule(c.op) + sch = te.create_schedule(c.op) sch[c].split(c.op.axis[0], 3) ir = tvm.lower(sch, [a, b, c], simple_mode=True) func, ins, outs = run_and_check(outer_product, [a, b], sch=sch, outs=[c]) @@ -784,7 +786,7 @@ def test_capture(): c[i] = a[i] + constant_list[1][const_value] return c - a = tvm.placeholder((n, ), dtype='int32', name='a') + a = te.placeholder((n, ), dtype='int32', name='a') func, ins, outs = run_and_check(add_something, [a]) run_and_check(func, ins, outs=outs) @@ -801,12 +803,12 @@ def test_array_inputs(): n = 5 inputs = [] for i in range(n): - inputs.append(tvm.placeholder((10,), name='t%s' % i, dtype='float32')) + inputs.append(te.placeholder((10,), name='t%s' % i, dtype='float32')) - out = sum_array(tvm.convert(inputs)) + out = sum_array(tvm.runtime.convert(inputs)) assert len(out.op.inputs) == n - sch = tvm.create_schedule(out.op) + sch = te.create_schedule(out.op) mod = tvm.build(sch, inputs + [out], target='llvm') assert mod diff --git a/tests/python/unittest/test_ir_builder.py b/tests/python/unittest/test_ir_builder.py index 5679625..9106be8 100644 --- a/tests/python/unittest/test_ir_builder.py +++ b/tests/python/unittest/test_ir_builder.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_for(): - ib = tvm.ir_builder.create() - n = tvm.size_var("n") + ib = tvm.tir.ir_builder.create() + n = te.size_var("n") A = ib.allocate("float32", n, name="A", scope="global") with ib.for_range(0, n, name="i") as i: A[i] = A[i] + 1 @@ -38,10 +39,10 @@ def test_for(): assert isinstance(body[1], tvm.tir.For) def test_if(): - ib = tvm.ir_builder.create() - n = tvm.size_var("n") + ib = tvm.tir.ir_builder.create() + n = te.size_var("n") A = ib.pointer("float32", name="A") - tmod = tvm.truncmod + tmod = tvm.tir.truncmod with ib.for_range(0, n, name="i") as i: with ib.if_scope(tmod(i, 2) == 0): A[i] = A[i] + 1 @@ -58,9 +59,9 @@ def test_if(): assert body.else_case.index.value == 0 def test_prefetch(): - A = tvm.placeholder((10, 20), name="A") - ib = tvm.ir_builder.create() - n = tvm.size_var("n") + A = te.placeholder((10, 20), name="A") + ib = tvm.tir.ir_builder.create() + n = te.size_var("n") with ib.for_range(0, n, name="i") as i: ib.emit( @@ -74,12 +75,12 @@ def test_prefetch(): def test_cpu(): n = 1024 dtype = "float32" - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') def test_device_ir(A, B, C): n = A.shape[0] max_threads = 8 - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() Aptr = ib.buffer_ptr(A) Bptr = ib.buffer_ptr(B) Cptr = ib.buffer_ptr(C) @@ -87,9 +88,9 @@ def test_cpu(): Cptr[i] = Aptr[i] + Bptr[i] body = ib.get() return body - C = tvm.extern(A.shape, [A, B], lambda ins, outs: test_device_ir(ins[0], ins[1], outs[0]), + C = te.extern(A.shape, [A, B], lambda ins, outs: test_device_ir(ins[0], ins[1], outs[0]), name="vector_add", dtype=dtype) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) def check_target(target): if not tvm.runtime.enabled(target): return @@ -105,18 +106,18 @@ def test_cpu(): check_target("llvm") def test_gpu(): - n = tvm.size_var('n') + n = te.size_var('n') dtype = "float32" - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - idxd = tvm.indexdiv + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + idxd = tvm.tir.indexdiv def test_device_ir(A, B, C): n = A.shape[0] max_threads = 32 - ib = tvm.ir_builder.create() - bx = tvm.thread_axis("blockIdx.x") - tx = tvm.thread_axis("threadIdx.x") + ib = tvm.tir.ir_builder.create() + bx = te.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") ib.scope_attr(bx, "thread_extent", idxd(n+max_threads-1, max_threads)) ib.scope_attr(tx, "thread_extent", max_threads) idx = bx.var * max_threads + tx.var @@ -127,11 +128,11 @@ def test_gpu(): Cptr[idx] = Aptr[idx] + Bptr[idx] body = ib.get() return body - C = tvm.extern(A.shape, [A, B], lambda ins, outs: test_device_ir(ins[0], ins[1], outs[0]), + C = te.extern(A.shape, [A, B], lambda ins, outs: test_device_ir(ins[0], ins[1], outs[0]), name="vector_add", dtype=dtype) - s = tvm.create_schedule(C.op) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + s = te.create_schedule(C.op) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def check_target(target): n = 1024 if not tvm.runtime.enabled(target): diff --git a/tests/python/unittest/test_lang_basic.py b/tests/python/unittest/test_lang_basic.py index 3b1431a..cd532a0 100644 --- a/tests/python/unittest/test_lang_basic.py +++ b/tests/python/unittest/test_lang_basic.py @@ -15,13 +15,14 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_const(): - x = tvm.const(1, "int32") + x = tvm.tir.const(1, "int32") print(x.dtype) - assert x.dtype == tvm.int32 + assert x.dtype == "int32" assert isinstance(x, tvm.tir.IntImm) @@ -29,28 +30,28 @@ def test_scalar_dtype_inference(): for data in [True, np.bool(1), np.uint8(1), np.uint16(1), np.uint32(1), np.uint64(1), np.int8(1), np.int16(1), np.int32(1), np.int64(1), np.float16(1), np.float32(1), np.float64(1)]: - assert tvm.const(data).dtype == str(np.array(data).dtype) - assert tvm.const(1).dtype == 'int32' - assert tvm.const(1.0).dtype == 'float32' + assert tvm.tir.const(data).dtype == str(np.array(data).dtype) + assert tvm.tir.const(1).dtype == 'int32' + assert tvm.tir.const(1.0).dtype == 'float32' for data in [True, np.bool(1), np.uint8(1), np.uint16(1), np.uint32(1), np.uint64(1), np.int8(1), np.int16(1), np.int32(1), np.int64(1), np.float16(1), np.float32(1), np.float64(1)]: - assert tvm.convert(data).dtype == str(np.array(data).dtype) - assert tvm.convert(1).dtype == 'int32' - assert tvm.convert(1.0).dtype == 'float32' + assert tvm.runtime.convert(data).dtype == str(np.array(data).dtype) + assert tvm.runtime.convert(1).dtype == 'int32' + assert tvm.runtime.convert(1.0).dtype == 'float32' def test_make(): - x = tvm.const(1, "int32") - y = tvm.var("x") + x = tvm.tir.const(1, "int32") + y = te.var("x") z = x + y - assert isinstance(tvm.max(x, y), tvm.tir.Max) - assert isinstance(tvm.min(x, y), tvm.tir.Min) + assert isinstance(tvm.te.max(x, y), tvm.tir.Max) + assert isinstance(tvm.te.min(x, y), tvm.tir.Min) def test_ir(): - x = tvm.const(1, "int32") + x = tvm.tir.const(1, "int32") y = tvm.tir.IntImm('int32', 1) z = x + y stmt = tvm.tir.Evaluate(z) @@ -58,22 +59,22 @@ def test_ir(): def test_ir2(): - x = tvm.var("n") - a = tvm.var("array", tvm.handle) + x = te.var("n") + a = te.var("array", "handle") st = tvm.tir.Store(a, x + 1, 1) assert isinstance(st, tvm.tir.Store) assert(st.buffer_var == a) def test_let(): - x = tvm.var('x') - y = tvm.var('y') + x = te.var('x') + y = te.var('y') stmt = tvm.tir.LetStmt( x, 10, tvm.tir.Evaluate(x + 1)); def test_cast(): - x = tvm.var('x', dtype="float32") + x = te.var('x', dtype="float32") y = x.astype("int32") z = x.astype("float32x4") assert isinstance(y, tvm.tir.Cast) @@ -82,13 +83,13 @@ def test_cast(): def test_attr(): - x = tvm.var('x') - y = tvm.var('y') + x = te.var('x') + y = te.var('y') stmt = tvm.tir.AttrStmt( y, "stride", 10, tvm.tir.Evaluate(x + 1)); assert stmt.node == y - a = tvm.convert(1) + a = tvm.runtime.convert(1) assert a.value == 1 try: a.no_field @@ -98,78 +99,78 @@ def test_attr(): def test_basic(): - a = tvm.var('a') - b = tvm.var('b') + a = te.var('a') + b = te.var('b') c = a + b assert str(c) == '(%s + %s)' % (a.name, b.name) def test_stmt(): x = tvm.tir.Evaluate(0) - tvm.tir.For(tvm.var('i'), 0, 1, + tvm.tir.For(te.var('i'), 0, 1, tvm.tir.For.Serial, 0, x) def test_dir(): - x = tvm.var('x') + x = te.var('x') dir(x) def test_dtype(): - x = tvm.var('x') + x = te.var('x') assert x.dtype == 'int32' - y = tvm.var('y') + y = te.var('y') assert (x > y).dtype == 'bool' def test_any(): - x = tvm.var('x') - y = tvm.var('y') - z = tvm.var('z') + x = te.var('x') + y = te.var('y') + z = te.var('z') try: t = x or x assert False except ValueError: pass try: - tvm.any() + tvm.tir.any() assert False except ValueError: pass - assert str(tvm.any(x < y)) == '(%s < %s)' % (x.name, y.name) - assert str(tvm.any(x < y, x > z)) == '((%s < %s) || (%s > %s))' % ( + assert str(tvm.tir.any(x < y)) == '(%s < %s)' % (x.name, y.name) + assert str(tvm.tir.any(x < y, x > z)) == '((%s < %s) || (%s > %s))' % ( x.name, y.name, x.name, z.name) - assert str(tvm.any(x < y, y > z + 1, x < z * 2)) == \ + assert str(tvm.tir.any(x < y, y > z + 1, x < z * 2)) == \ '(((%s < %s) || (%s > (%s + 1))) || (%s < (%s*2)))' % ( x.name, y.name, y.name, z.name, x.name, z.name) def test_all(): - x = tvm.var('x') - y = tvm.var('y') - z = tvm.var('z') + x = te.var('x') + y = te.var('y') + z = te.var('z') try: t = x and x assert False except ValueError: pass try: - tvm.all() + tvm.tir.all() assert False except ValueError: pass - assert str(tvm.all(x < y)) == '(%s < %s)' % (x.name, y.name) - assert str(tvm.all(x < y, x > z)) == '((%s < %s) && (%s > %s))' % ( + assert str(tvm.tir.all(x < y)) == '(%s < %s)' % (x.name, y.name) + assert str(tvm.tir.all(x < y, x > z)) == '((%s < %s) && (%s > %s))' % ( x.name, y.name, x.name, z.name) - assert str(tvm.all(x < y, y > z + 1, x < z * 2)) == \ + assert str(tvm.tir.all(x < y, y > z + 1, x < z * 2)) == \ '(((%s < %s) && (%s > (%s + 1))) && (%s < (%s*2)))' % ( x.name, y.name, y.name, z.name, x.name, z.name) def test_bitwise(): - x = tvm.var('x') - y = tvm.var('y') + x = te.var('x') + y = te.var('y') assert str(x << y) == 'shift_left(x, y)' assert str(x >> y) == 'shift_right(x, y)' assert str(x & y) == 'bitwise_and(x, y)' @@ -182,12 +183,12 @@ def test_bitwise(): assert str(10 << x) == 'shift_left(10, x)' assert str(10 % x) == 'floormod(10, x)' assert str(~x) == 'bitwise_not(x)' - assert(tvm.const(1, "int8x2") >> 1).dtype == "int8x2" - assert(x >> tvm.const(1, "int32x2")).dtype == "int32x2" - assert(tvm.var("z", "int8x2") << tvm.const(1, "int8x2")).dtype == "int8x2" + assert(tvm.tir.const(1, "int8x2") >> 1).dtype == "int8x2" + assert(x >> tvm.tir.const(1, "int32x2")).dtype == "int32x2" + assert(te.var("z", "int8x2") << tvm.tir.const(1, "int8x2")).dtype == "int8x2" def test_float_bitwise(): - t = tvm.const(1.5,dtype='float32') + t = tvm.tir.const(1.5,dtype='float32') for test in [lambda lhs, rhs : lhs << rhs, lambda lhs, rhs : lhs >> rhs, lambda lhs, rhs : lhs | rhs, @@ -206,20 +207,20 @@ def test_float_bitwise(): pass def test_isnan(): - x = tvm.var('x', 'float32') - assert str(tvm.isnan(x)) == 'isnan(x)' - assert str(tvm.isnan(x).dtype) == 'bool' - y = tvm.var('y', 'float16') - assert str(tvm.isnan(y)) == 'isnan(float32(y))' - z = tvm.var('z', 'int32') - assert str(tvm.isnan(z)) == '(bool)0' - k = tvm.var('k', 'int8x2') - assert str(tvm.isnan(k).dtype) == 'uint1x2' + x = te.var('x', 'float32') + assert str(tvm.tir.isnan(x)) == 'isnan(x)' + assert str(tvm.tir.isnan(x).dtype) == 'bool' + y = te.var('y', 'float16') + assert str(tvm.tir.isnan(y)) == 'isnan(float32(y))' + z = te.var('z', 'int32') + assert str(tvm.tir.isnan(z)) == '(bool)0' + k = te.var('k', 'int8x2') + assert str(tvm.tir.isnan(k).dtype) == 'uint1x2' def test_equality(): - a = tvm.var('a') - b = tvm.var('b') + a = te.var('a') + b = te.var('b') c = (a == b) assert not c d = (c != c) diff --git a/tests/python/unittest/test_lang_buffer.py b/tests/python/unittest/test_lang_buffer.py index 7568814..9203fb1 100644 --- a/tests/python/unittest/test_lang_buffer.py +++ b/tests/python/unittest/test_lang_buffer.py @@ -15,27 +15,28 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm.tir import Buffer import numpy as np def test_buffer(): - m = tvm.size_var('m') - n = tvm.size_var('n') - l = tvm.size_var('l') - Ab = tvm.decl_buffer((m, n), tvm.float32) - Bb = tvm.decl_buffer((n, l), tvm.float32) + m = te.size_var('m') + n = te.size_var('n') + l = te.size_var('l') + Ab = tvm.tir.decl_buffer((m, n), "float32") + Bb = tvm.tir.decl_buffer((n, l), "float32") assert isinstance(Ab, tvm.tir.Buffer) - assert Ab.dtype == tvm.float32 + assert Ab.dtype == "float32" assert tuple(Ab.shape) == (m, n) def test_buffer_access_ptr(): - m = tvm.size_var('m') - n = tvm.size_var('n') - Ab = tvm.decl_buffer((m, n), tvm.float32, strides=[n + 1 , 1]) + m = te.size_var('m') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((m, n), "float32", strides=[n + 1 , 1]) aptr = Ab.access_ptr("rw") - assert tvm.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m) + assert tvm.tir.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m) assert aptr.args[0].dtype == Ab.dtype assert aptr.args[4].value == Buffer.READ | Buffer.WRITE aptr = Ab.access_ptr("w") @@ -43,59 +44,59 @@ def test_buffer_access_ptr(): def test_buffer_access_ptr_offset(): - m = tvm.size_var('m') - n = tvm.size_var('n') - Ab = tvm.decl_buffer((m, n), tvm.float32) + m = te.size_var('m') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((m, n), "float32") aptr = Ab.access_ptr("rw", offset=100) - offset = tvm.ir_pass.Simplify(aptr.args[2]) - assert tvm.ir_pass.Equal(offset, 100) + offset = tvm.tir.ir_pass.Simplify(aptr.args[2]) + assert tvm.tir.ir_pass.Equal(offset, 100) assert aptr.args[4].value == Buffer.READ | Buffer.WRITE - v = tvm.size_var('int32') + v = te.size_var('int32') aptr = Ab.access_ptr("rw", offset=100 + 100 + v) - offset = tvm.ir_pass.Simplify(aptr.args[2]) - assert tvm.ir_pass.Equal(offset, 200 + v) + offset = tvm.tir.ir_pass.Simplify(aptr.args[2]) + assert tvm.tir.ir_pass.Equal(offset, 200 + v) assert aptr.args[4].value == Buffer.READ | Buffer.WRITE - aptr = Ab.access_ptr("rw", offset=tvm.call_extern('int32', "test_call", 100 + 100 + v)) - offset = tvm.ir_pass.Simplify(aptr.args[2]) - assert tvm.ir_pass.Equal(offset, tvm.call_extern('int32', "test_call", 200 + v)) + aptr = Ab.access_ptr("rw", offset=tvm.tir.call_extern('int32', "test_call", 100 + 100 + v)) + offset = tvm.tir.ir_pass.Simplify(aptr.args[2]) + assert tvm.tir.ir_pass.Equal(offset, tvm.tir.call_extern('int32', "test_call", 200 + v)) assert aptr.args[4].value == Buffer.READ | Buffer.WRITE def test_buffer_access_ptr_extent(): - m = tvm.size_var('m') - n = tvm.size_var('n') - Ab = tvm.decl_buffer((m, n), tvm.float32) + m = te.size_var('m') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((m, n), "float32") aptr = Ab.access_ptr("rw") - assert tvm.ir_pass.Equal(aptr.args[3], m * n) + assert tvm.tir.ir_pass.Equal(aptr.args[3], m * n) aptr = Ab.access_ptr("rw", offset=100) - assert tvm.ir_pass.Equal(aptr.args[3], m * n - 100) - Ab = tvm.decl_buffer((m, n), tvm.float32, strides=[n + 1 , 1]) + assert tvm.tir.ir_pass.Equal(aptr.args[3], m * n - 100) + Ab = tvm.tir.decl_buffer((m, n), "float32", strides=[n + 1 , 1]) aptr = Ab.access_ptr("rw", offset=100) - assert tvm.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m - 100) + assert tvm.tir.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m - 100) def test_buffer_vload(): - m = tvm.size_var('m') - n = tvm.size_var('n') - Ab = tvm.decl_buffer((m, n), tvm.float32, elem_offset=100) + m = te.size_var('m') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((m, n), "float32", elem_offset=100) load = Ab.vload([2, 3]) - offset = tvm.ir_pass.Simplify(load.index) - assert tvm.ir_pass.Equal(offset, n * 2 + 103) + offset = tvm.tir.ir_pass.Simplify(load.index) + assert tvm.tir.ir_pass.Equal(offset, n * 2 + 103) def test_buffer_index_merge_mult_mod(): - m = tvm.size_var('m') - n = tvm.size_var('n') - s = tvm.size_var('s') - k0 = tvm.size_var('k0') - k1 = tvm.size_var('k1') - A = tvm.decl_buffer((m, n), tvm.float32) - A_stride = tvm.decl_buffer((m, n), tvm.float32, strides=(s, 1)) + m = te.size_var('m') + n = te.size_var('n') + s = te.size_var('s') + k0 = te.size_var('k0') + k1 = te.size_var('k1') + A = tvm.tir.decl_buffer((m, n), "float32") + A_stride = tvm.tir.decl_buffer((m, n), "float32", strides=(s, 1)) def assert_simplified_equal(index_simplified, index_direct): - assert tvm.ir_pass.Equal(index_simplified, index_direct),\ + assert tvm.tir.ir_pass.Equal(index_simplified, index_direct),\ "index_simplified=%s, index_direct=%s" %(index_simplified, index_direct) - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod # Test Case1 index_simplified = A_stride.vload( (idxd(idxm(k0, k1), s), idxm(idxm(k0, k1), s) + idxd(k0, k1) * k1)) @@ -123,18 +124,18 @@ def test_buffer_index_merge_mult_mod(): def test_buffer_broadcast(): - m0, m1, m2 = tvm.size_var("m0"), tvm.size_var("m1"), tvm.size_var("m2") - n0, n1, n2 = tvm.size_var("n0"), tvm.size_var("n1"), tvm.size_var("n2") - o0, o1, o2 = tvm.size_var("o0"), tvm.size_var("o1"), tvm.size_var("o2") + m0, m1, m2 = te.size_var("m0"), te.size_var("m1"), te.size_var("m2") + n0, n1, n2 = te.size_var("n0"), te.size_var("n1"), te.size_var("n2") + o0, o1, o2 = te.size_var("o0"), te.size_var("o1"), te.size_var("o2") - A = tvm.placeholder((m0, m1, m2), name='A') - B = tvm.placeholder((n0, n1, n2), name='B') + A = te.placeholder((m0, m1, m2), name='A') + B = te.placeholder((n0, n1, n2), name='B') - C = tvm.compute((o0, o1, o2), lambda i, j, k: A[i, j, k] + B[i, j, k], name='C') + C = te.compute((o0, o1, o2), lambda i, j, k: A[i, j, k] + B[i, j, k], name='C') - Ab = tvm.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast") - Bb = tvm.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast") - s = tvm.create_schedule(C.op) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast") + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast") + s = te.create_schedule(C.op) def check(): if not tvm.runtime.enabled("llvm"): @@ -151,18 +152,18 @@ def test_buffer_broadcast(): def test_buffer_broadcast_expr(): - n0, m0, x = tvm.size_var('n0'), tvm.size_var('m0'), tvm.size_var('x') - n1, m1 = tvm.size_var('n1'), tvm.size_var('m1') - o0, o1 = tvm.size_var('o0'), tvm.size_var('o1') - - A = tvm.placeholder((m0, n0), name='A') - B = tvm.placeholder((m1, n1), name='B') - C = tvm.compute((o0, o1//x), lambda i, j: A[i, j] + B[i, j], name='C') - - Ab = tvm.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast") - Bb = tvm.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast") - Cc = tvm.decl_buffer(C.shape, C.dtype, name="Cc", buffer_type="auto_broadcast") - s = tvm.create_schedule(C.op) + n0, m0, x = te.size_var('n0'), te.size_var('m0'), te.size_var('x') + n1, m1 = te.size_var('n1'), te.size_var('m1') + o0, o1 = te.size_var('o0'), te.size_var('o1') + + A = te.placeholder((m0, n0), name='A') + B = te.placeholder((m1, n1), name='B') + C = te.compute((o0, o1//x), lambda i, j: A[i, j] + B[i, j], name='C') + + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast") + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast") + Cc = tvm.tir.decl_buffer(C.shape, C.dtype, name="Cc", buffer_type="auto_broadcast") + s = te.create_schedule(C.op) def check_stride(): if not tvm.runtime.enabled("llvm"): diff --git a/tests/python/unittest/test_lang_constructor.py b/tests/python/unittest/test_lang_constructor.py index 797a04f..9edaf92 100644 --- a/tests/python/unittest/test_lang_constructor.py +++ b/tests/python/unittest/test_lang_constructor.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_expr_constructor(): x = tvm.tir.Var("xx", "float32") @@ -47,8 +48,8 @@ def test_expr_constructor(): assert x.dtype == "float32" assert x.value.value == 1 - a = tvm.const(1.0, dtype="float32") - b = tvm.var("x", dtype="float32") + a = tvm.tir.const(1.0, dtype="float32") + b = te.var("x", dtype="float32") for cls in [tvm.tir.Add, tvm.tir.Sub, @@ -67,8 +68,8 @@ def test_expr_constructor(): assert x.b.same_as(b) - a = tvm.convert(tvm.var("x") > 1) - b = tvm.convert(tvm.var("x") == 1) + a = tvm.runtime.convert(te.var("x") > 1) + b = tvm.runtime.convert(te.var("x") == 1) for cls in [tvm.tir.And, tvm.tir.Or]: @@ -87,7 +88,7 @@ def test_expr_constructor(): assert x.false_value == b assert x.condition == a - buffer_var = tvm.var("x", dtype="handle") + buffer_var = te.var("x", dtype="handle") x = tvm.tir.Load("float32", buffer_var, 1, a) assert isinstance(x, tvm.tir.Load) assert x.dtype == "float32" @@ -120,7 +121,7 @@ def test_expr_constructor(): assert x.func == None assert x.value_index == 0 - v = tvm.var("aa") + v = te.var("aa") x = tvm.tir.Let(v, 1, v) assert x.var == v assert x.value.value == 1 @@ -128,8 +129,8 @@ def test_expr_constructor(): def test_stmt_constructor(): - v = tvm.var("aa") - buffer_var = tvm.var("buf", dtype="handle") + v = te.var("aa") + buffer_var = te.var("buf", dtype="handle") nop = tvm.tir.Evaluate(1) x = tvm.tir.LetStmt(v, 1, tvm.tir.Evaluate(1)) assert isinstance(x, tvm.tir.LetStmt) @@ -141,8 +142,8 @@ def test_stmt_constructor(): assert isinstance(x, tvm.tir.AttrStmt) assert x.value.value == 1 - x = tvm.tir.AssertStmt(tvm.const(1, "uint1"), - tvm.convert("hellow"), + x = tvm.tir.AssertStmt(tvm.tir.const(1, "uint1"), + tvm.runtime.convert("hellow"), nop) assert isinstance(x, tvm.tir.AssertStmt) assert x.body == nop @@ -151,26 +152,26 @@ def test_stmt_constructor(): assert isinstance(x, tvm.tir.ProducerConsumer) assert x.body == nop - x = tvm.tir.For(tvm.var("x"), 0, 10, 0, 0, nop) + x = tvm.tir.For(te.var("x"), 0, 10, 0, 0, nop) assert isinstance(x, tvm.tir.For) assert x.min.value == 0 assert x.extent.value == 10 assert x.body == nop - x = tvm.tir.Store(buffer_var, 1, 10, tvm.const(1, "uint1")) + x = tvm.tir.Store(buffer_var, 1, 10, tvm.tir.const(1, "uint1")) assert isinstance(x, tvm.tir.Store) assert x.buffer_var == buffer_var assert x.index.value == 10 assert x.value.value == 1 - tensor = tvm.placeholder((), dtype="float32") + tensor = te.placeholder((), dtype="float32") x = tvm.tir.Provide(tensor.op, 0, 10, []) assert isinstance(x, tvm.tir.Provide) assert x.value_index == 0 assert x.value.value == 10 x = tvm.tir.Allocate(buffer_var, "float32", [10], - tvm.const(1, "uint1"), nop) + tvm.tir.const(1, "uint1"), nop) assert isinstance(x, tvm.tir.Allocate) assert x.dtype == "float32" assert x.buffer_var == buffer_var @@ -186,11 +187,11 @@ def test_stmt_constructor(): assert isinstance(x, tvm.tir.Free) assert x.buffer_var == buffer_var - x = tvm.tir.Realize(None, 0, "float", [], tvm.const(1, "uint1"), nop) + x = tvm.tir.Realize(None, 0, "float", [], tvm.tir.const(1, "uint1"), nop) assert isinstance(x, tvm.tir.Realize) assert x.body == nop - x = tvm.tir.IfThenElse(tvm.const(1, "uint1"), + x = tvm.tir.IfThenElse(tvm.tir.const(1, "uint1"), tvm.tir.Evaluate(11), nop) assert isinstance(x, tvm.tir.IfThenElse) diff --git a/tests/python/unittest/test_lang_container.py b/tests/python/unittest/test_lang_container.py index 0b9fad9..c2d3aba 100644 --- a/tests/python/unittest/test_lang_container.py +++ b/tests/python/unittest/test_lang_container.py @@ -15,26 +15,27 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_array(): - a = tvm.convert([1,2,3]) + a = tvm.runtime.convert([1,2,3]) assert len(a) == 3 assert a[-1].value == 3 a_slice = a[-3:-1] assert (a_slice[0].value, a_slice[1].value) == (1, 2) def test_array_save_load_json(): - a = tvm.convert([1,2,3]) + a = tvm.runtime.convert([1,2,3]) json_str = tvm.ir.save_json(a) a_loaded = tvm.ir.load_json(json_str) assert(a_loaded[1].value == 2) def test_map(): - a = tvm.var('a') - b = tvm.var('b') - amap = tvm.convert({a: 2, + a = te.var('a') + b = te.var('b') + amap = tvm.runtime.convert({a: 2, b: 3}) assert a in amap assert len(amap) == 2 @@ -45,7 +46,7 @@ def test_map(): def test_str_map(): - amap = tvm.convert({'a': 2, 'b': 3}) + amap = tvm.runtime.convert({'a': 2, 'b': 3}) assert 'a' in amap assert len(amap) == 2 dd = dict(amap.items()) @@ -55,9 +56,9 @@ def test_str_map(): def test_map_save_load_json(): - a = tvm.var('a') - b = tvm.var('b') - amap = tvm.convert({a: 2, + a = te.var('a') + b = te.var('b') + amap = tvm.runtime.convert({a: 2, b: 3}) json_str = tvm.ir.save_json(amap) amap = tvm.ir.load_json(json_str) @@ -67,14 +68,14 @@ def test_map_save_load_json(): def test_in_container(): - arr = tvm.convert(['a', 'b', 'c']) + arr = tvm.runtime.convert(['a', 'b', 'c']) assert 'a' in arr assert tvm.tir.StringImm('a') in arr assert 'd' not in arr def test_ndarray_container(): x = tvm.nd.array([1,2,3]) - arr = tvm.convert([x, x]) + arr = tvm.runtime.convert([x, x]) assert arr[0].same_as(x) assert arr[1].same_as(x) assert isinstance(arr[0], tvm.nd.NDArray) diff --git a/tests/python/unittest/test_lang_data_layout.py b/tests/python/unittest/test_lang_data_layout.py index 4c1cafc..86a71da 100644 --- a/tests/python/unittest/test_lang_data_layout.py +++ b/tests/python/unittest/test_lang_data_layout.py @@ -17,6 +17,7 @@ """Test layout and bijective-layout node""" import tvm +from tvm import te from topi.util import get_const_tuple def test_layout(): @@ -51,18 +52,18 @@ def test_layout(): def test_bilayout_convertible(): # not convertible - assert tvm.bijective_layout("NCHW", "ABCD") is None - assert tvm.bijective_layout("__undef__", "NCHW") is None - assert tvm.bijective_layout("NCHW", "__undef__") is None - assert tvm.bijective_layout("__undef__", "__undef__") is None - assert tvm.bijective_layout("", "NCHW") is None - assert tvm.bijective_layout("NCHW", "") is None - assert tvm.bijective_layout("", "") is None + assert tvm.tir.bijective_layout("NCHW", "ABCD") is None + assert tvm.tir.bijective_layout("__undef__", "NCHW") is None + assert tvm.tir.bijective_layout("NCHW", "__undef__") is None + assert tvm.tir.bijective_layout("__undef__", "__undef__") is None + assert tvm.tir.bijective_layout("", "NCHW") is None + assert tvm.tir.bijective_layout("NCHW", "") is None + assert tvm.tir.bijective_layout("", "") is None # convertible - assert tvm.bijective_layout("NCHW", "NCHW16c") is not None + assert tvm.tir.bijective_layout("NCHW", "NCHW16c") is not None def test_bilayout_shape(): - bilayout = tvm.bijective_layout("NCHW", "NCHW16c") + bilayout = tvm.tir.bijective_layout("NCHW", "NCHW16c") assert isinstance(bilayout, tvm.tir.BijectiveLayout) dst_shape = bilayout.forward_shape((1, 32, 7, 7)) @@ -72,7 +73,7 @@ def test_bilayout_shape(): assert get_const_tuple(src_shape) == (1, 32, 7, 7) def test_bilayout_index(): - bilayout = tvm.bijective_layout("NCHW", "NCHW16c") + bilayout = tvm.tir.bijective_layout("NCHW", "NCHW16c") dst_index = bilayout.forward_index([0, 18, 6, 6]) assert get_const_tuple(dst_index) == (0, 1, 6, 6, 2) diff --git a/tests/python/unittest/test_lang_group.py b/tests/python/unittest/test_lang_group.py index e78ffb3..0f1118d 100644 --- a/tests/python/unittest/test_lang_group.py +++ b/tests/python/unittest/test_lang_group.py @@ -16,20 +16,21 @@ # under the License. """Test group effect""" import tvm +from tvm import te def test_scan_group(): - m = tvm.size_var("m") - n = tvm.size_var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - s_state = tvm.placeholder((m, n)) - s_init = tvm.compute((1, n), lambda _, i: x[0, i]) + m = te.size_var("m") + n = te.size_var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + s_state = te.placeholder((m, n)) + s_init = te.compute((1, n), lambda _, i: x[0, i]) - s_update1 = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + x[t, i]) - s_update2 = tvm.compute((m, n), lambda t, i: s_update1[t, i] + 1) - s_update3 = tvm.compute((m, n), lambda t, i: s_update2[t, i] + 1) - res = tvm.scan(s_init, s_update3, s_state, inputs=x) + s_update1 = te.compute((m, n), lambda t, i: s_state[t-1, i] + x[t, i]) + s_update2 = te.compute((m, n), lambda t, i: s_update1[t, i] + 1) + s_update3 = te.compute((m, n), lambda t, i: s_update2[t, i] + 1) + res = tvm.te.scan(s_init, s_update3, s_state, inputs=x) - s = tvm.create_schedule(res.op) + s = te.create_schedule(res.op) assert s[s_update1].group is not None assert s[s_update2].group == s[s_update1].group # Assign within group, is valid @@ -50,12 +51,12 @@ def test_scan_group(): pass def test_compute_group(): - m = tvm.size_var("m") - n = tvm.size_var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - x1 = tvm.compute(x.shape, lambda *i: x(*i) + 1, name="x1") - x2 = tvm.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") - s = tvm.create_schedule(x2.op) + m = te.size_var("m") + n = te.size_var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + x1 = te.compute(x.shape, lambda *i: x(*i) + 1, name="x1") + x2 = te.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") + s = te.create_schedule(x2.op) g = s.create_group(outputs=x1, inputs=x, include_inputs=True) assert s[x1].group == g assert s[x].group == g @@ -64,12 +65,12 @@ def test_compute_group(): assert g.num_child_stages == 2 def test_nest_group(): - m = tvm.size_var("m") - n = tvm.size_var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - x1 = tvm.compute(x.shape, lambda *i: x(*i) + 1, name="x1") - x2 = tvm.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") - s = tvm.create_schedule(x2.op) + m = te.size_var("m") + n = te.size_var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + x1 = te.compute(x.shape, lambda *i: x(*i) + 1, name="x1") + x2 = te.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") + s = te.create_schedule(x2.op) g1 = s.create_group(outputs=x1, inputs=x) g2 = s.create_group(outputs=x1, inputs=x, include_inputs=True) assert set(s.groups) == set([g1, g2]) diff --git a/tests/python/unittest/test_lang_operator.py b/tests/python/unittest/test_lang_operator.py index d32b4c5..23c5940 100644 --- a/tests/python/unittest/test_lang_operator.py +++ b/tests/python/unittest/test_lang_operator.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def check_throws(f): try: @@ -27,12 +28,12 @@ def check_throws(f): def test_const_fold(): def check(f, *args): - x = f(*[tvm.const(x, "int32") for x in args]) + x = f(*[tvm.tir.const(x, "int32") for x in args]) y = f(*args) if not isinstance(x, (tvm.tir.IntImm,)) or x.value != int(y): raise ValueError("check error: %s vs %s " % (x, y)) - tmod = tvm.truncmod + tmod = tvm.tir.truncmod check(lambda x, y: x + y, 3, 4) check(lambda x, y: x * y, 3, 12) check(lambda x, y: x * y - 10, 3, 12) @@ -47,9 +48,9 @@ def test_const_fold(): def test_const_fold2(): - x = tvm.var("x") - tmod = tvm.truncmod - tdiv = tvm.truncdiv + x = te.var("x") + tmod = tvm.tir.truncmod + tdiv = tvm.tir.truncdiv assert (x + 0).same_as(x) assert (0 + x).same_as(x) assert (x - 0).same_as(x) @@ -60,48 +61,48 @@ def test_const_fold2(): def test_const_fold3(): # Test that using ints with logic operations is forbidden - x = tvm.var("x") + x = te.var("x") for val in [0, 1]: - for func in [tvm.all, tvm.any]: - check_throws(lambda: func(tvm.const(val, 'uint1'), x)) - check_throws(lambda: func(x, tvm.const(val, 'uint1'))) + for func in [tvm.tir.all, tvm.tir.any]: + check_throws(lambda: func(tvm.tir.const(val, 'uint1'), x)) + check_throws(lambda: func(x, tvm.tir.const(val, 'uint1'))) # Test const folding when both arguments are const - for tvm_func, py_func in [(tvm.all, lambda a, b: a and b), (tvm.any, lambda a, b: a or b)]: + for tvm_func, py_func in [(tvm.tir.all, lambda a, b: a and b), (tvm.tir.any, lambda a, b: a or b)]: for v1 in [0, 1]: for v2 in [0, 1]: - assert tvm.ir_pass.Equal(tvm_func(tvm.const(v1, 'uint1'), tvm.const(v2, 'uint1')), - tvm.const(py_func(v1, v2), 'uint1')) + assert tvm.tir.ir_pass.Equal(tvm_func(tvm.tir.const(v1, 'uint1'), tvm.tir.const(v2, 'uint1')), + tvm.tir.const(py_func(v1, v2), 'uint1')) - x = tvm.var("x", 'uint1') - true = tvm.const(1, 'uint1') - false = tvm.const(0, 'uint1') + x = te.var("x", 'uint1') + true = tvm.tir.const(1, 'uint1') + false = tvm.tir.const(0, 'uint1') - assert tvm.all(x, true).same_as(x) - assert tvm.all(true, x).same_as(x) - assert tvm.any(x, false).same_as(x) - assert tvm.any(false, x).same_as(x) + assert tvm.tir.all(x, true).same_as(x) + assert tvm.tir.all(true, x).same_as(x) + assert tvm.tir.any(x, false).same_as(x) + assert tvm.tir.any(false, x).same_as(x) - assert tvm.all(x, false).same_as(false) - assert tvm.all(false, x).same_as(false) - assert tvm.any(x, true).same_as(true) - assert tvm.any(true, x).same_as(true) + assert tvm.tir.all(x, false).same_as(false) + assert tvm.tir.all(false, x).same_as(false) + assert tvm.tir.any(x, true).same_as(true) + assert tvm.tir.any(true, x).same_as(true) def test_const_fold4(): - x1 = tvm.const(4, "int32") + x1 = tvm.tir.const(4, "int32") x2 = x1 + 5 - tdiv = tvm.truncdiv + tdiv = tvm.tir.truncdiv assert isinstance(x2, tvm.tir.IntImm) and x2.value == 9 x3 = tdiv(x2, 3) assert isinstance(x3, tvm.tir.IntImm) and x3.value == 3 x4 = x3 + 0.55 assert isinstance(x4, tvm.tir.FloatImm) and abs(x4.value - 3.55) < 1e-6 - x5 = tvm.ceil(x4) + x5 = te.ceil(x4) assert isinstance(x5, tvm.tir.FloatImm) and x5.value == 4 x6 = x5.astype('int') assert isinstance(x6, tvm.tir.IntImm) and x6.value == 4, "x6={}".format(x6) - y = (tvm.round((tvm.const(6.5, 'float32') - 1) / 1.5) + 2).astype('int') + y = (te.round((tvm.tir.const(6.5, 'float32') - 1) / 1.5) + 2).astype('int') assert isinstance(y, tvm.tir.IntImm) and y.value == 6 @@ -112,8 +113,8 @@ def test_binary_dtype_match(): [('int32', 'int64'), 'int64'], [('uint32', 'int32'), 'int32']] for (lhs_dtype, rhs_dtype), out_dtype in rules: - lhs = tvm.var('lhs', dtype=lhs_dtype) - rhs = tvm.var('rhs', dtype=rhs_dtype) + lhs = te.var('lhs', dtype=lhs_dtype) + rhs = te.var('rhs', dtype=rhs_dtype) out = f(lhs, rhs) if not is_conditional: assert out.dtype == out_dtype @@ -132,8 +133,8 @@ def test_binary_dtype_match(): def verify_callop_float_only(f): for lhs_dtype in ['int32', 'float32', 'float64']: for rhs_dtype in ['int32', 'float32', 'float64']: - lhs = tvm.var('lhs', dtype=lhs_dtype) - rhs = tvm.var('rhs', dtype=rhs_dtype) + lhs = te.var('lhs', dtype=lhs_dtype) + rhs = te.var('rhs', dtype=rhs_dtype) if 'float' not in lhs_dtype and 'float' not in rhs_dtype: check_throws(lambda: f(lhs, rhs)) elif 'float' in lhs_dtype and 'float' in rhs_dtype and lhs_dtype != rhs_dtype: @@ -153,36 +154,36 @@ def test_binary_dtype_match(): verify_general_dtype_support(lambda a, b: a * b) verify_general_dtype_support(lambda a, b: a >= b, is_conditional=True) verify_general_dtype_support(lambda a, b: a <= b, is_conditional=True) - verify_callop_float_only(lambda a, b: tvm.power(a, b)) + verify_callop_float_only(lambda a, b: te.power(a, b)) def test_if_then_else(): - cases = [[(tvm.var('cond', dtype='bool'), 'bool', 'int32'), 'int32'], + cases = [[(te.var('cond', dtype='bool'), 'bool', 'int32'), 'int32'], [(True, 'int32', 'float32'), 'float32'], [(False, 'int32', 'int64'), 'int64'], - [(tvm.var('cond', dtype='bool'), 'uint32', 'int32'), 'int32'], - [(tvm.var('cond', dtype='int32'), 'uint32', 'int32'), 'int32']] + [(te.var('cond', dtype='bool'), 'uint32', 'int32'), 'int32'], + [(te.var('cond', dtype='int32'), 'uint32', 'int32'), 'int32']] for (cond, lhs_dtype, rhs_dtype), out_dtype in cases: - lhs = tvm.var('lhs', dtype=lhs_dtype) - rhs = tvm.var('rhs', dtype=rhs_dtype) + lhs = te.var('lhs', dtype=lhs_dtype) + rhs = te.var('rhs', dtype=rhs_dtype) if cond is True or cond is False: - out = tvm.if_then_else(cond, lhs, rhs) - out2 = tvm.if_then_else(not cond, rhs, lhs) - out3 = tvm.if_then_else(not cond, lhs, rhs) - assert tvm.ir_pass.Equal(out, out2) == 1 + out = tvm.tir.if_then_else(cond, lhs, rhs) + out2 = tvm.tir.if_then_else(not cond, rhs, lhs) + out3 = tvm.tir.if_then_else(not cond, lhs, rhs) + assert tvm.tir.ir_pass.Equal(out, out2) == 1 if cond: - assert tvm.ir_pass.Equal(out, lhs.astype(out_dtype)) == 1 - assert tvm.ir_pass.Equal(out3, rhs.astype(out_dtype)) == 1 + assert tvm.tir.ir_pass.Equal(out, lhs.astype(out_dtype)) == 1 + assert tvm.tir.ir_pass.Equal(out3, rhs.astype(out_dtype)) == 1 else: - assert tvm.ir_pass.Equal(out, rhs.astype(out_dtype)) == 1 - assert tvm.ir_pass.Equal(out3, lhs.astype(out_dtype)) == 1 + assert tvm.tir.ir_pass.Equal(out, rhs.astype(out_dtype)) == 1 + assert tvm.tir.ir_pass.Equal(out3, lhs.astype(out_dtype)) == 1 elif cond.dtype == 'bool': - out = tvm.if_then_else(cond, lhs, rhs) + out = tvm.tir.if_then_else(cond, lhs, rhs) assert out.dtype == out_dtype assert out.args[1].dtype == out_dtype assert out.args[2].dtype == out_dtype elif cond.dtype != 'bool': - check_throws(lambda: tvm.if_then_else(cond, lhs, rhs)) + check_throws(lambda: tvm.tir.if_then_else(cond, lhs, rhs)) else: raise ValueError('Unknown combinations') diff --git a/tests/python/unittest/test_lang_reflection.py b/tests/python/unittest/test_lang_reflection.py index e97e73a..1691d7d 100644 --- a/tests/python/unittest/test_lang_reflection.py +++ b/tests/python/unittest/test_lang_reflection.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_const_saveload_json(): # save load json - x = tvm.const(1, "int32") - y = tvm.const(10, "int32") + x = tvm.tir.const(1, "int32") + y = tvm.tir.const(10, "int32") z = x + y z = z + z json_str = tvm.ir.save_json(z) @@ -29,11 +30,11 @@ def test_const_saveload_json(): def test_make_smap(): # save load json - x = tvm.const(1, "int32") - y = tvm.const(10, "int32") + x = tvm.tir.const(1, "int32") + y = tvm.tir.const(10, "int32") z = tvm.tir.Add(x, y) - smap = tvm.convert({"z": z, "x": x}) - json_str = tvm.ir.save_json(tvm.convert([smap])) + smap = tvm.runtime.convert({"z": z, "x": x}) + json_str = tvm.ir.save_json(tvm.runtime.convert([smap])) arr = tvm.ir.load_json(json_str) assert len(arr) == 1 assert arr[0]["z"].a == arr[0]["x"] @@ -43,7 +44,7 @@ def test_make_node(): x = tvm.ir.make_node("IntImm", dtype="int32", value=10) assert isinstance(x, tvm.tir.IntImm) assert x.value == 10 - A = tvm.placeholder((10, ), name='A') + A = te.placeholder((10, ), name='A') AA = tvm.ir.make_node("Tensor", shape=A.shape, dtype=A.dtype, @@ -81,9 +82,9 @@ def test_make_attrs(): def test_make_sum(): - A = tvm.placeholder((2, 10), name='A') - k = tvm.reduce_axis((0,10), "k") - B = tvm.compute((2,), lambda i: tvm.sum(A[i, k], axis=k), name="B") + A = te.placeholder((2, 10), name='A') + k = te.reduce_axis((0,10), "k") + B = te.compute((2,), lambda i: te.sum(A[i, k], axis=k), name="B") json_str = tvm.ir.save_json(B) BB = tvm.ir.load_json(json_str) assert B.op.body[0].combiner is not None diff --git a/tests/python/unittest/test_lang_schedule.py b/tests/python/unittest/test_lang_schedule.py index 10843f9..dae43bb 100644 --- a/tests/python/unittest/test_lang_schedule.py +++ b/tests/python/unittest/test_lang_schedule.py @@ -16,17 +16,18 @@ # under the License. import pytest import tvm +from tvm import te import pickle as pkl def test_schedule_create(): - m = tvm.size_var('m') - n = tvm.size_var('n') - l = tvm.size_var('l') - A = tvm.placeholder((m, l), name='A') - B = tvm.placeholder((n, l), name='B') - AA = tvm.compute((m, l), lambda i, j: A[i, j]) - T = tvm.compute((m, n, l), lambda i, j, k: AA(i, k) * B(j, k)) - s = tvm.create_schedule(T.op) + m = te.size_var('m') + n = te.size_var('n') + l = te.size_var('l') + A = te.placeholder((m, l), name='A') + B = te.placeholder((n, l), name='B') + AA = te.compute((m, l), lambda i, j: A[i, j]) + T = te.compute((m, n, l), lambda i, j, k: AA(i, k) * B(j, k)) + s = te.create_schedule(T.op) s[AA].set_scope("shared") xo, xi = s[T].split(T.op.axis[0], factor=10) xi1, xi2 = s[T].split(xi, factor=2) @@ -38,22 +39,22 @@ def test_schedule_create(): # save load json json_str = tvm.ir.save_json(s) s_loaded = tvm.ir.load_json(json_str) - assert isinstance(s_loaded, tvm.schedule.Schedule) + assert isinstance(s_loaded, tvm.te.schedule.Schedule) assert(str(s_loaded.outputs[0].body) == str(s.outputs[0].body)) # pickle unpickle dump = pkl.dumps(s) s_loaded = pkl.loads(dump) - assert isinstance(s_loaded, tvm.schedule.Schedule) + assert isinstance(s_loaded, tvm.te.schedule.Schedule) assert(str(s_loaded.outputs[0].body) == str(s.outputs[0].body)) def test_reorder(): - m = tvm.size_var('m') - A = tvm.placeholder((m,), name='A') - T = tvm.compute(m, lambda i: A[i+1]) + m = te.size_var('m') + A = te.placeholder((m,), name='A') + T = te.compute(m, lambda i: A[i+1]) - s = tvm.create_schedule(T.op) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=10) xi1, xi2 = s[T].split(xi, factor=2) order = (xi2, xi1, xo) @@ -69,107 +70,107 @@ def test_reorder(): pass def test_split(): - m = tvm.size_var('m') - A = tvm.placeholder((m,), name='A') - T = tvm.compute((m,), lambda i: A[i]) + m = te.size_var('m') + A = te.placeholder((m,), name='A') + T = te.compute((m,), lambda i: A[i]) - s = tvm.create_schedule(T.op) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=10) assert tuple(s[T].leaf_iter_vars) == (xo, xi) def test_tile(): - m = tvm.size_var('m') - n = tvm.size_var('n') - A = tvm.placeholder((m, n), name='A') - T = tvm.compute((m, n), lambda i, j: A[i, j]) + m = te.size_var('m') + n = te.size_var('n') + A = te.placeholder((m, n), name='A') + T = te.compute((m, n), lambda i, j: A[i, j]) - s = tvm.create_schedule(T.op) + s = te.create_schedule(T.op) xo, yo, xi, yi = s[T].tile(T.op.axis[0], T.op.axis[1], x_factor=10, y_factor=5) assert tuple(s[T].leaf_iter_vars) == (xo, yo, xi, yi) def test_fuse(): - m = tvm.size_var('m') - n = tvm.size_var('n') - A = tvm.placeholder((m, n), name='A') - T = tvm.compute((m, n), lambda i, j: A[i, j]) + m = te.size_var('m') + n = te.size_var('n') + A = te.placeholder((m, n), name='A') + T = te.compute((m, n), lambda i, j: A[i, j]) - s = tvm.create_schedule(T.op) + s = te.create_schedule(T.op) xo, yo, xi, yi = s[T].tile(T.op.axis[0], T.op.axis[1], x_factor=10, y_factor=5) fused = s[T].fuse(xo, yo) - assert any(isinstance(x, tvm.schedule.Fuse) for x in s[T].relations) + assert any(isinstance(x, tvm.te.schedule.Fuse) for x in s[T].relations) assert tuple(s[T].leaf_iter_vars) == (fused, xi, yi) def test_singleton(): print("test singleton") - A = tvm.placeholder((), name='A') - T = tvm.compute((), lambda : A() + 1) - s = tvm.create_schedule(T.op) + A = te.placeholder((), name='A') + T = te.compute((), lambda : A() + 1) + s = te.create_schedule(T.op) print("test singleton fin1") fused = s[T].fuse() - assert any(isinstance(x, tvm.schedule.Singleton) for x in s[T].relations) + assert any(isinstance(x, tvm.te.schedule.Singleton) for x in s[T].relations) assert tuple(s[T].leaf_iter_vars) == (fused,) dump = pkl.dumps(s) print("test singleton fin3") s_loaded = pkl.loads(dump) print("test singleton fin2") - assert isinstance(s_loaded, tvm.schedule.Schedule) + assert isinstance(s_loaded, tvm.te.schedule.Schedule) print("test singleton fin") def test_vectorize(): - m = tvm.size_var('m') - n = tvm.size_var('n') - A = tvm.placeholder((m, n), name='A') - T = tvm.compute((m, n), lambda i, j: A[i, j]) + m = te.size_var('m') + n = te.size_var('n') + A = te.placeholder((m, n), name='A') + T = te.compute((m, n), lambda i, j: A[i, j]) - s = tvm.create_schedule(T.op) + s = te.create_schedule(T.op) xo, yo, xi, yi = s[T].tile(T.op.axis[0], T.op.axis[1], x_factor=10, y_factor=5) s[T].vectorize(yi) s[T].unroll(xi) - UNROLL = tvm.schedule.IterVar.Unrolled - VECTORIZE = tvm.schedule.IterVar.Vectorized + UNROLL = tvm.te.schedule.IterVar.Unrolled + VECTORIZE = tvm.te.schedule.IterVar.Vectorized assert s[T].iter_var_attrs[xi].iter_type == UNROLL assert s[T].iter_var_attrs[yi].iter_type == VECTORIZE @pytest.mark.xfail def test_vectorize_commreduce(): - V = tvm.placeholder((128,), name='V') - ax = tvm.reduce_axis((0, 128), name='ax') - O = tvm.compute((1,), lambda _: tvm.sum(V[ax], axis=[ax])) - s = tvm.create_schedule(O.op) + V = te.placeholder((128,), name='V') + ax = te.reduce_axis((0, 128), name='ax') + O = te.compute((1,), lambda _: te.sum(V[ax], axis=[ax])) + s = te.create_schedule(O.op) s[O].vectorize(ax) # should throw here def test_pragma(): m = 100 - A = tvm.placeholder((m,), name='A') - T = tvm.compute((m,), lambda i: A[i]) + A = te.placeholder((m,), name='A') + T = te.compute((m,), lambda i: A[i]) - s = tvm.create_schedule(T.op) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=10) s[T].pragma(xo, "pragma1") s[T].pragma(xi, "vectorize") - VECTORIZE = tvm.schedule.IterVar.Vectorized + VECTORIZE = tvm.te.schedule.IterVar.Vectorized assert s[T].iter_var_attrs[xo].pragma_keys[0].value == "pragma1" assert s[T].iter_var_attrs[xi].iter_type == VECTORIZE def test_rfactor(): - n = tvm.size_var('n') - k1 = tvm.reduce_axis((0, n), name="k1") - k2 = tvm.reduce_axis((0, n), name="k2") - A = tvm.placeholder((n, n, n), name='A') - B = tvm.compute((n, ), lambda i: tvm.sum(A[i, k1, k2], axis=[k1, k2])) + n = te.size_var('n') + k1 = te.reduce_axis((0, n), name="k1") + k2 = te.reduce_axis((0, n), name="k2") + A = te.placeholder((n, n, n), name='A') + B = te.compute((n, ), lambda i: te.sum(A[i, k1, k2], axis=[k1, k2])) # normal schedule - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) BF = s.rfactor(B, k1) assert(tuple(BF.shape) == (n, n)) assert(set(BF.op.body[0].axis) == set([k2])) assert(s[B].op.body[0].axis[0].dom.extent == n) assert(len(s[B].all_iter_vars) == 2) # schedule with splot - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) ko, ki = s[B].split(k1, factor=4) xo, xi = s[B].split(B.op.axis[0], factor=8) BF = s.rfactor(B, ki) @@ -179,7 +180,7 @@ def test_rfactor(): assert(BF.op.body[0].axis[1].var == ko.var) assert(s[B].op.body[0].axis[0].dom.extent.value == 4) # schedule with factor_axis - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) ko, ki = s[B].split(k1, factor=4) xo, xi = s[B].split(B.op.axis[0], factor=8) BF = s.rfactor(B, ki, 1) @@ -191,54 +192,54 @@ def test_rfactor(): def test_tensor_intrin(): n = 16 - x = tvm.placeholder((n,), name='x') - y = tvm.placeholder((n,), name='y') - z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z') + x = te.placeholder((n,), name='x') + y = te.placeholder((n,), name='y') + z = te.compute(x.shape, lambda i: x[i] + y[i], name='z') def intrin_func(ins, outs): - assert(isinstance(ins[0], tvm.schedule.Buffer)) + assert(isinstance(ins[0], tvm.te.schedule.Buffer)) assert(ins[0].shape[0].value == n) - return tvm.call_packed("vadd", ins[0].data, outs[0].data, ins[0].shape[0]) - intrin = tvm.decl_tensor_intrin(z.op, intrin_func) + return tvm.tir.call_packed("vadd", ins[0].data, outs[0].data, ins[0].shape[0]) + intrin = te.decl_tensor_intrin(z.op, intrin_func) assert intrin.op == z.op assert intrin.reduce_init is None assert tuple(intrin.inputs) == tuple(z.op.input_tensors) assert(intrin.buffers[0].shape[0].value == n) m = 32 - x = tvm.placeholder((m,), name='x') - y = tvm.placeholder((m,), name='y') - z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z') - s = tvm.create_schedule(z.op) + x = te.placeholder((m,), name='x') + y = te.placeholder((m,), name='y') + z = te.compute(x.shape, lambda i: x[i] + y[i], name='z') + s = te.create_schedule(z.op) xo, xi = s[z].split(z.op.axis[0], factor=n) s[z].tensorize(xi, intrin) assert(s[z].iter_var_attrs[xi].tensor_intrin == intrin) - assert(s[z].iter_var_attrs[xi].iter_type == tvm.schedule.IterVar.Tensorized) + assert(s[z].iter_var_attrs[xi].iter_type == tvm.te.schedule.IterVar.Tensorized) def test_tensor_intrin_scalar_params(): - n = tvm.size_var("n") - x = tvm.placeholder((n,), name='x') - v = tvm.size_var("v") - w = tvm.size_var("w") - z = tvm.compute((n,), lambda i: x[i]*v + w, name='z') + n = te.size_var("n") + x = te.placeholder((n,), name='x') + v = te.size_var("v") + w = te.size_var("w") + z = te.compute((n,), lambda i: x[i]*v + w, name='z') def intrin_func(ins, outs, sp): - assert(isinstance(ins[0], tvm.schedule.Buffer)) + assert(isinstance(ins[0], tvm.te.schedule.Buffer)) assert(ins[0].shape[0] == n) assert(sp[0] == v) assert(sp[1] == w) - return tvm.call_packed("hw_func", ins[0].data, outs[0].data, sp[0], sp[1]) + return tvm.tir.call_packed("hw_func", ins[0].data, outs[0].data, sp[0], sp[1]) - with tvm.build_config(offset_factor=1): - intrin = tvm.decl_tensor_intrin(z.op, intrin_func, scalar_params=[v, w]) + with tvm.target.build_config(offset_factor=1): + intrin = te.decl_tensor_intrin(z.op, intrin_func, scalar_params=[v, w]) assert intrin.op == z.op assert intrin.reduce_init is None assert tuple(intrin.inputs) == tuple(z.op.input_tensors) assert(intrin.buffers[0].shape[0] == n) assert tuple(intrin.scalar_params) == tuple((v, w)) - A = tvm.placeholder((10,10), name='A') + A = te.placeholder((10,10), name='A') # Pass scalar inputs to the TensorIntrin, interleaved with tensor inputs - C = tvm.compute((10,10), lambda i, j: intrin(i*i, A[i, j], i+j), name="C") - s = tvm.create_schedule(C.op) + C = te.compute((10,10), lambda i, j: intrin(i*i, A[i, j], i+j), name="C") + s = te.create_schedule(C.op) stmt = tvm.lower(s, [A, C], simple_mode=True) assert isinstance(stmt.body.body.body, tvm.tir.Evaluate) assert len(stmt.body.body.body.value.args) == 5 diff --git a/tests/python/unittest/test_lang_tag.py b/tests/python/unittest/test_lang_tag.py index 201abf1..6cfc0b1 100644 --- a/tests/python/unittest/test_lang_tag.py +++ b/tests/python/unittest/test_lang_tag.py @@ -16,32 +16,34 @@ # under the License. import json import tvm +from tvm import te +from tvm import te -@tvm.tag_scope(tag="conv") +@tvm.te.tag_scope(tag="conv") def compute_conv(data, weight): N, IC, H, W = data.shape OC, IC, KH, KW = weight.shape OH = H - KH + 1 OW = W - KW + 1 - ic = tvm.reduce_axis((0, IC), name='ic') - dh = tvm.reduce_axis((0, KH), name='dh') - dw = tvm.reduce_axis((0, KW), name='dw') + ic = te.reduce_axis((0, IC), name='ic') + dh = te.reduce_axis((0, KH), name='dh') + dw = te.reduce_axis((0, KW), name='dw') - return tvm.compute((N, OC, OH, OW), lambda i, oc, h, w: \ - tvm.sum(data[i, ic, h+dh, w+dw] * weight[oc, ic, dh, dw], + return te.compute((N, OC, OH, OW), lambda i, oc, h, w: \ + te.sum(data[i, ic, h+dh, w+dw] * weight[oc, ic, dh, dw], axis=[ic, dh, dw])) def test_with(): - n = tvm.size_var('n') - m = tvm.size_var('m') - l = tvm.size_var('l') + n = te.size_var('n') + m = te.size_var('m') + l = te.size_var('l') - A = tvm.placeholder((n, l), name='A') - B = tvm.placeholder((m, l), name='B') - with tvm.tag_scope(tag="gemm"): - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k] * B[j, k], axis=k), + A = te.placeholder((n, l), name='A') + B = te.placeholder((m, l), name='B') + with tvm.te.tag_scope(tag="gemm"): + k = te.reduce_axis((0, l), name='k') + C = te.compute((n, m), lambda i, j: te.sum(A[i, k] * B[j, k], axis=k), attrs={"hello" : 1, "arr": [10, 12]}) assert C.op.tag == 'gemm' @@ -56,31 +58,31 @@ def test_with(): def test_decorator(): - n = tvm.size_var('n') - c = tvm.size_var('c') - h = tvm.size_var('h') - w = tvm.size_var('w') - kh = tvm.size_var('kh') - kw = tvm.size_var('kw') + n = te.size_var('n') + c = te.size_var('c') + h = te.size_var('h') + w = te.size_var('w') + kh = te.size_var('kh') + kw = te.size_var('kw') - A = tvm.placeholder((n, c, h, w), name='A') - B = tvm.placeholder((c, c, kh, kw), name='B') + A = te.placeholder((n, c, h, w), name='A') + B = te.placeholder((c, c, kh, kw), name='B') C = compute_conv(A, B) assert C.op.tag == 'conv' assert len(C.op.attrs) == 0 def test_nested(): - n = tvm.size_var('n') - c = tvm.size_var('c') - h = tvm.size_var('h') - w = tvm.size_var('w') - kh = tvm.size_var('kh') - kw = tvm.size_var('kw') + n = te.size_var('n') + c = te.size_var('c') + h = te.size_var('h') + w = te.size_var('w') + kh = te.size_var('kh') + kw = te.size_var('kw') - A = tvm.placeholder((n, c, h, w), name='A') - B = tvm.placeholder((c, c, kh, kw), name='B') + A = te.placeholder((n, c, h, w), name='A') + B = te.placeholder((c, c, kh, kw), name='B') try: - with tvm.tag_scope(tag='conv'): + with te.tag_scope(tag='conv'): C = compute_conv(A, B) assert False except ValueError: diff --git a/tests/python/unittest/test_lang_target.py b/tests/python/unittest/test_lang_target.py index 6da99f8..da7bcee 100644 --- a/tests/python/unittest/test_lang_target.py +++ b/tests/python/unittest/test_lang_target.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te @tvm.target.generic_func def mygeneric(data): diff --git a/tests/python/unittest/test_lang_tensor.py b/tests/python/unittest/test_lang_tensor.py index 2de5e19..762b3fe 100644 --- a/tests/python/unittest/test_lang_tensor.py +++ b/tests/python/unittest/test_lang_tensor.py @@ -15,19 +15,20 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from topi.nn.pooling import pool def test_tensor(): - m = tvm.size_var('m') - n = tvm.size_var('n') - l = tvm.size_var('l') - A = tvm.placeholder((m, l), name='A') - B = tvm.placeholder((n, l), name='B') - T = tvm.compute((m, n, l), lambda i, j, k: A[i, k] * B[j, k]) + m = te.size_var('m') + n = te.size_var('n') + l = te.size_var('l') + A = te.placeholder((m, l), name='A') + B = te.placeholder((n, l), name='B') + T = te.compute((m, n, l), lambda i, j, k: A[i, k] * B[j, k]) print(T) print(T.op.body) assert(tuple(T.shape) == (m, n, l)) - assert(isinstance(A.op, tvm.tensor.PlaceholderOp)) + assert(isinstance(A.op, tvm.te.PlaceholderOp)) assert(A == A) assert(T.op.output(0) == T) assert(T.op.output(0).__hash__() == T.__hash__()) @@ -37,68 +38,68 @@ def test_tensor(): def test_rank_zero(): - m = tvm.size_var('m') - A = tvm.placeholder((m,), name='A') - scale = tvm.placeholder((), name='s') - k = tvm.reduce_axis((0, m), name="k") - T = tvm.compute((), lambda : tvm.sum(A[k] * scale(), axis=k)) + m = te.size_var('m') + A = te.placeholder((m,), name='A') + scale = te.placeholder((), name='s') + k = te.reduce_axis((0, m), name="k") + T = te.compute((), lambda : te.sum(A[k] * scale(), axis=k)) print(T) print(T.op.body) assert(tuple(T.shape) == ()) def test_conv1d(): - n = tvm.size_var('n') - A = tvm.placeholder((n+2), name='A') + n = te.size_var('n') + A = te.placeholder((n+2), name='A') def computeB(ii): i = ii + 1 return A[i-1] + A[i] + A[i+1] - B = tvm.compute(n, computeB) + B = te.compute(n, computeB) def test_tensor_slice(): - n = tvm.size_var('n') - A = tvm.compute((n, n), lambda i, j: 1) - B = tvm.compute((n,), lambda i: A[0][i] + A[0][i]) + n = te.size_var('n') + A = te.compute((n, n), lambda i, j: 1) + B = te.compute((n,), lambda i: A[0][i] + A[0][i]) def test_tensor_reduce_multi_axis(): - m = tvm.size_var('m') - n = tvm.size_var('n') - A = tvm.placeholder((m, n), name='A') - k1 = tvm.reduce_axis((0, n), "k") - k2 = tvm.reduce_axis((0, m), "k") - C = tvm.compute((1,), lambda _: tvm.sum(A[k1, k2], axis=(k1, k2))) - C = tvm.compute((1,), lambda _: tvm.sum(A[k1, k2], axis=[k1, k2])) + m = te.size_var('m') + n = te.size_var('n') + A = te.placeholder((m, n), name='A') + k1 = te.reduce_axis((0, n), "k") + k2 = te.reduce_axis((0, m), "k") + C = te.compute((1,), lambda _: te.sum(A[k1, k2], axis=(k1, k2))) + C = te.compute((1,), lambda _: te.sum(A[k1, k2], axis=[k1, k2])) def test_tensor_comm_reducer(): - m = tvm.size_var('m') - n = tvm.size_var('n') - A = tvm.placeholder((m, n), name='A') - k = tvm.reduce_axis((0, n), "k") - mysum = tvm.comm_reducer(lambda x, y: x+y, lambda t: tvm.const(0, dtype=t)) - C = tvm.compute((m,), lambda i: mysum(A[i, k], axis=k)) + m = te.size_var('m') + n = te.size_var('n') + A = te.placeholder((m, n), name='A') + k = te.reduce_axis((0, n), "k") + mysum = te.comm_reducer(lambda x, y: x+y, lambda t: tvm.tir.const(0, dtype=t)) + C = te.compute((m,), lambda i: mysum(A[i, k], axis=k)) def test_tensor_comm_reducer_overload(): - m = tvm.size_var('m') - n = tvm.size_var('n') - mysum = tvm.comm_reducer(lambda x, y: x+y, lambda t: tvm.const(0, dtype=t)) + m = te.size_var('m') + n = te.size_var('n') + mysum = te.comm_reducer(lambda x, y: x+y, lambda t: tvm.tir.const(0, dtype=t)) sum_res = mysum(m, n) def test_tensor_reduce(): - m = tvm.size_var('m') - n = tvm.size_var('n') - l = tvm.size_var('l') - A = tvm.placeholder((m, l), name='A') - B = tvm.placeholder((n, l), name='B') - T = tvm.compute((m, n, l), lambda i, j, k: A[i, k] * B[j, k]) - rv = tvm.reduce_axis((0, A.shape[1]), "k") - C = tvm.compute((m, n), lambda i, j: tvm.sum(T(i, j, rv+1), axis=rv)) + m = te.size_var('m') + n = te.size_var('n') + l = te.size_var('l') + A = te.placeholder((m, l), name='A') + B = te.placeholder((n, l), name='B') + T = te.compute((m, n, l), lambda i, j, k: A[i, k] * B[j, k]) + rv = te.reduce_axis((0, A.shape[1]), "k") + C = te.compute((m, n), lambda i, j: te.sum(T(i, j, rv+1), axis=rv)) # json load save C_json = tvm.ir.save_json(C) C_loaded = tvm.ir.load_json(C_json) - assert(isinstance(C_loaded, tvm.tensor.Tensor)) + assert(isinstance(C_loaded, te.tensor.Tensor)) assert(str(C_loaded) == str(C)) def test_tensor_compute1(): @@ -107,26 +108,26 @@ def test_tensor_compute1(): dtype = 'float32' def intrin_vadd(n): - x = tvm.placeholder((n,)) - y = tvm.placeholder((n,)) - z = tvm.compute(x.shape, lambda i: x[i] + y[i]) + x = te.placeholder((n,)) + y = te.placeholder((n,)) + z = te.compute(x.shape, lambda i: x[i] + y[i]) def intrin_func(ins, outs): - ib = tvm.ir_builder.create() - ib.emit(tvm.call_extern(outs[0].dtype, 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr'))) + ib = tvm.tir.ir_builder.create() + ib.emit(tvm.tir.call_extern(outs[0].dtype, 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr'))) return ib.get() - with tvm.build_config(offset_factor=n): - return tvm.decl_tensor_intrin(z.op, intrin_func) + with tvm.target.build_config(offset_factor=n): + return te.decl_tensor_intrin(z.op, intrin_func) vadd = intrin_vadd(factor) - A = tvm.placeholder((m//factor, factor), name="A", dtype=dtype) - B = tvm.placeholder((m//factor, factor), name="B", dtype=dtype) - C = tvm.compute((m//factor, factor), + A = te.placeholder((m//factor, factor), name="A", dtype=dtype) + B = te.placeholder((m//factor, factor), name="B", dtype=dtype) + C = te.compute((m//factor, factor), lambda i: vadd(A[i, 0:factor], B[i, 0:factor])) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) stmt = tvm.lower(s, [A, B, C], simple_mode=True) assert isinstance(stmt.body.body, tvm.tir.Evaluate) @@ -140,102 +141,102 @@ def test_tensor_compute2(): dtype = 'float32' def intrin_gemm(m, n, l): - k = tvm.reduce_axis((0, l)) - x = tvm.placeholder((m, l)) - y = tvm.placeholder((n, l)) + k = te.reduce_axis((0, l)) + x = te.placeholder((m, l)) + y = te.placeholder((n, l)) # in theory, no relation - z = tvm.compute((m, n), lambda i, j: tvm.sum(x[i][k] * y[j][k], axis=k)) + z = te.compute((m, n), lambda i, j: te.sum(x[i][k] * y[j][k], axis=k)) def intrin_func(ins, outs): x_ptr = ins[0].access_ptr("r") y_ptr = ins[1].access_ptr("r") z_ptr = outs[0].access_ptr("w") - body = tvm.call_packed( + body = tvm.tir.call_packed( "gemv", x_ptr, y_ptr, z_ptr, m, n, l) - reset = tvm.call_packed( + reset = tvm.tir.call_packed( "fill_zero", z_ptr, m, n) - update = tvm.call_packed( + update = tvm.tir.call_packed( "gemv_add", x_ptr, y_ptr, z_ptr, m, n, l) return body, reset, update - with tvm.build_config(offset_factor=n): - return tvm.decl_tensor_intrin(z.op, intrin_func) + with tvm.target.build_config(offset_factor=n): + return te.decl_tensor_intrin(z.op, intrin_func) vgemm = intrin_gemm(factor1, factor2, factor) - A = tvm.placeholder((M//factor1, L//factor, factor1, factor), name="A", dtype=dtype) - B = tvm.placeholder((N//factor2, L//factor, factor2, factor), name="B", dtype=dtype) - k = tvm.reduce_axis((0, L//factor), name='k') - C = tvm.compute((M//factor1, N//factor2, factor1, factor2), + A = te.placeholder((M//factor1, L//factor, factor1, factor), name="A", dtype=dtype) + B = te.placeholder((N//factor2, L//factor, factor2, factor), name="B", dtype=dtype) + k = te.reduce_axis((0, L//factor), name='k') + C = te.compute((M//factor1, N//factor2, factor1, factor2), lambda i, j: vgemm(A[i, k, 0:factor1, 0:factor], B[j, k, 0:factor2, 0:factor], reduce_axis=k)) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) stmt = tvm.lower(s, [A, B, C], simple_mode=True) assert isinstance(stmt.body.body.body[0], tvm.tir.Evaluate) assert isinstance(stmt.body.body.body[1].body, tvm.tir.Evaluate) def test_tensor_scan(): - m = tvm.size_var("m") - n = tvm.size_var("n") - x = tvm.placeholder((m, n)) - s = tvm.placeholder((m, n)) - res = tvm.scan(tvm.compute((1, n), lambda _, i: x[0, i]), - tvm.compute((m, n), lambda t, i: s[t-1, i] + x[t, i]), + m = te.size_var("m") + n = te.size_var("n") + x = te.placeholder((m, n)) + s = te.placeholder((m, n)) + res = tvm.te.scan(te.compute((1, n), lambda _, i: x[0, i]), + te.compute((m, n), lambda t, i: s[t-1, i] + x[t, i]), s) assert tuple(res.shape) == (m, n) def test_scan_multi_out(): - m = tvm.size_var("m") - n = tvm.size_var("n") - x1 = tvm.placeholder((m, n)) - s1 = tvm.placeholder((m, n)) - x2 = tvm.placeholder((m, n)) - s2 = tvm.placeholder((m, n)) - s1_init = tvm.compute((1, n), lambda _, i: x1[0, i]) - s2_init = tvm.compute((1, n), lambda _, i: x2[0, i]) - s1_update = tvm.compute((m, n), lambda t, i: s1[t-1, i] + s2[t-1, i] + x1[t, i]) - s2_update = tvm.compute((m, n), lambda t, i: x2[t, i] + s2[t-1,i]) - - r0, r1 = tvm.scan([s1_init, s2_init], + m = te.size_var("m") + n = te.size_var("n") + x1 = te.placeholder((m, n)) + s1 = te.placeholder((m, n)) + x2 = te.placeholder((m, n)) + s2 = te.placeholder((m, n)) + s1_init = te.compute((1, n), lambda _, i: x1[0, i]) + s2_init = te.compute((1, n), lambda _, i: x2[0, i]) + s1_update = te.compute((m, n), lambda t, i: s1[t-1, i] + s2[t-1, i] + x1[t, i]) + s2_update = te.compute((m, n), lambda t, i: x2[t, i] + s2[t-1,i]) + + r0, r1 = tvm.te.scan([s1_init, s2_init], [s1_update, s2_update], [s1, s2]) assert(r0.value_index == 0) assert(r1.value_index == 1) json_str = tvm.ir.save_json(r0.op) zz = tvm.ir.load_json(json_str) - assert isinstance(zz, tvm.tensor.ScanOp) + assert isinstance(zz, tvm.te.ScanOp) def test_extern(): - m = tvm.size_var('m') - A = tvm.placeholder((m,), name='A') + m = te.size_var('m') + A = te.placeholder((m,), name='A') def extern_func(ins, outs): - assert(isinstance(ins[0], tvm.schedule.Buffer)) - return tvm.call_packed("myadd", ins[0].data, outs[0].data, m) - B = tvm.extern((m,), [A], extern_func) + assert(isinstance(ins[0], tvm.te.schedule.Buffer)) + return tvm.tir.call_packed("myadd", ins[0].data, outs[0].data, m) + B = te.extern((m,), [A], extern_func) assert(tuple(B.shape) == (m,)) def test_extern_multi_out(): - m = tvm.size_var('m') - A = tvm.placeholder((m,), name='A') - B = tvm.compute((m,), lambda i: A[i] * 10) + m = te.size_var('m') + A = te.placeholder((m,), name='A') + B = te.compute((m,), lambda i: A[i] * 10) def extern_func(ins, outs): - assert(isinstance(ins[0], tvm.schedule.Buffer)) - return tvm.call_packed( + assert(isinstance(ins[0], tvm.te.schedule.Buffer)) + return tvm.tir.call_packed( "myadd", ins[0].data, outs[0].data, outs[1].data, m) - res = tvm.extern([A.shape, A.shape], [A, B], extern_func) + res = te.extern([A.shape, A.shape], [A, B], extern_func) assert(len(res) == 2) assert(res[1].value_index == 1) def test_tuple_inputs(): - m = tvm.size_var('m') - n = tvm.size_var('n') - A0 = tvm.placeholder((m, n), name='A0') - A1 = tvm.placeholder((m, n), name='A1') - T0, T1 = tvm.compute((m, n), lambda i, j: (A0[i, j] * 2, A1[i, j] * 3), name='T') - s = tvm.create_schedule(T0.op) + m = te.size_var('m') + n = te.size_var('n') + A0 = te.placeholder((m, n), name='A0') + A1 = te.placeholder((m, n), name='A1') + T0, T1 = te.compute((m, n), lambda i, j: (A0[i, j] * 2, A1[i, j] * 3), name='T') + s = te.create_schedule(T0.op) for i in range(len(T0.shape)): assert(T0.shape[i] == T1.shape[i]) @@ -244,58 +245,58 @@ def test_tuple_inputs(): assert(T1.value_index == 1) def test_tuple_with_different_deps(): - m = tvm.size_var('m') - n = tvm.size_var('n') - A0 = tvm.placeholder((m, n), name='A1') - A1 = tvm.placeholder((m, n), name='A2') - B0, B1 = tvm.compute((m, n), lambda i, j: (A0[i, j] * 2, A1[i, j] * 3), name='B') - C = tvm.compute((m, n), lambda i, j: B0[i, j] + 4, name='C') - - s = tvm.create_schedule(C.op) + m = te.size_var('m') + n = te.size_var('n') + A0 = te.placeholder((m, n), name='A1') + A1 = te.placeholder((m, n), name='A2') + B0, B1 = te.compute((m, n), lambda i, j: (A0[i, j] * 2, A1[i, j] * 3), name='B') + C = te.compute((m, n), lambda i, j: B0[i, j] + 4, name='C') + + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=10) s[B0.op].compute_at(s[C], xo) sch = s.normalize() - bounds = tvm.schedule.InferBound(sch) - stmt = tvm.schedule.ScheduleOps(sch, bounds) + bounds = tvm.te.schedule.InferBound(sch) + stmt = tvm.te.schedule.ScheduleOps(sch, bounds) def get_B1_realize(x): if isinstance(x, tvm.tir.Realize) and \ x.func == B1.op and x.value_index == 1: ret.append(x) ret = [] - tvm.ir_pass.PostOrderVisit(stmt, get_B1_realize) + tvm.tir.ir_pass.PostOrderVisit(stmt, get_B1_realize) assert stmt.node == C.op and len(ret) == 1 def test_tensor_inputs(): - x = tvm.placeholder((1,), name='x') - y = tvm.compute(x.shape, lambda i: x[i] + x[i]) + x = te.placeholder((1,), name='x') + y = te.compute(x.shape, lambda i: x[i] + x[i]) assert tuple(y.op.input_tensors) == (x,) def test_tensor_pool(): def intrin_pool(): - A = tvm.placeholder((64, 16, 16), name='A') - kh = tvm.reduce_axis((0, 3), name='kh') - kw = tvm.reduce_axis((0, 3), name='kw') - P = tvm.compute((64, 14, 14), - lambda c, oh, ow: tvm.max(A[c, oh + kh, ow + kw], + A = te.placeholder((64, 16, 16), name='A') + kh = te.reduce_axis((0, 3), name='kh') + kw = te.reduce_axis((0, 3), name='kw') + P = te.compute((64, 14, 14), + lambda c, oh, ow: tvm.te.max(A[c, oh + kh, ow + kw], axis=[kh, kw]), name='p') def intrin_func(ins, outs): dinp = ins[0] dout = outs[0] - return tvm.call_packed("op", dinp, dout) + return tvm.tir.call_packed("op", dinp, dout) - with tvm.build_config(offset_factor=1): - return tvm.decl_tensor_intrin(P.op, intrin_func) + with tvm.target.build_config(offset_factor=1): + return te.decl_tensor_intrin(P.op, intrin_func) - A = tvm.placeholder((1, 64, 16, 16), name='A') + A = te.placeholder((1, 64, 16, 16), name='A') P = pool(data=A, kernel=(3, 3), stride=(1, 1), padding=(0, 0, 0, 0), pool_type='max') - s = tvm.create_schedule(P.op) + s = te.create_schedule(P.op) _, oh, _, _ = P.op.axis intrin = intrin_pool() s[P].tensorize(oh, intrin) diff --git a/tests/python/unittest/test_lang_tensor_overload_op.py b/tests/python/unittest/test_lang_tensor_overload_op.py index 01c0d26..2e46962 100644 --- a/tests/python/unittest/test_lang_tensor_overload_op.py +++ b/tests/python/unittest/test_lang_tensor_overload_op.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -23,27 +24,27 @@ from topi.util import get_const_tuple def test_operator_type_and_tags(): k = 1 - n = tvm.var('n') - A = tvm.placeholder((), name='A') - B = tvm.placeholder((10, 5), name='B') + n = te.var('n') + A = te.placeholder((), name='A') + B = te.placeholder((10, 5), name='B') B1 = B[0] B2 = B[0,0] assert isinstance(k + n, tvm.tir.PrimExpr) assert isinstance(n + n, tvm.tir.PrimExpr) - assert isinstance(k + A, tvm.tensor.Tensor) - assert isinstance(A + k, tvm.tensor.Tensor) - assert isinstance(n + A, tvm.tensor.Tensor) - assert isinstance(A + n, tvm.tensor.Tensor) - assert isinstance(A + A, tvm.tensor.Tensor) - - assert isinstance(k + B, tvm.tensor.Tensor) - assert isinstance(B + k, tvm.tensor.Tensor) - assert isinstance(n + B, tvm.tensor.Tensor) - assert isinstance(B + n, tvm.tensor.Tensor) - assert isinstance(A + B, tvm.tensor.Tensor) - assert isinstance(B + A, tvm.tensor.Tensor) - assert isinstance(B + B, tvm.tensor.Tensor) + assert isinstance(k + A, te.tensor.Tensor) + assert isinstance(A + k, te.tensor.Tensor) + assert isinstance(n + A, te.tensor.Tensor) + assert isinstance(A + n, te.tensor.Tensor) + assert isinstance(A + A, te.tensor.Tensor) + + assert isinstance(k + B, te.tensor.Tensor) + assert isinstance(B + k, te.tensor.Tensor) + assert isinstance(n + B, te.tensor.Tensor) + assert isinstance(B + n, te.tensor.Tensor) + assert isinstance(A + B, te.tensor.Tensor) + assert isinstance(B + A, te.tensor.Tensor) + assert isinstance(B + B, te.tensor.Tensor) assert (k + B).op.tag == topi.tag.ELEMWISE assert (B + k).op.tag == topi.tag.ELEMWISE @@ -58,22 +59,22 @@ def test_operator_type_and_tags(): assert isinstance(n + B2, tvm.tir.PrimExpr) assert isinstance(B2 + n, tvm.tir.PrimExpr) assert isinstance(B2 + B2, tvm.tir.PrimExpr) - assert isinstance(B2 + A, tvm.tensor.Tensor) - assert isinstance(A + B2, tvm.tensor.Tensor) - assert isinstance(B2 + B, tvm.tensor.Tensor) - assert isinstance(B + B2, tvm.tensor.Tensor) + assert isinstance(B2 + A, te.tensor.Tensor) + assert isinstance(A + B2, te.tensor.Tensor) + assert isinstance(B2 + B, te.tensor.Tensor) + assert isinstance(B + B2, te.tensor.Tensor) def test_combination(): k = 3 n = 5 m = 10 - x = tvm.var('x') - A = tvm.placeholder((n, m), name='A') - B = tvm.placeholder((n, m), name='B') - C = tvm.placeholder((n, m), name='C') + x = te.var('x') + A = te.placeholder((n, m), name='A') + B = te.placeholder((n, m), name='B') + C = te.placeholder((n, m), name='C') D = k + A - B * C + x - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) foo = tvm.build(s, [x, A, B, C, D], "llvm") ctx = tvm.cpu(0) x = 2 @@ -87,9 +88,9 @@ def test_combination(): def verify_tensor_scalar_bop(shape, typ="add"): """Verify non-constant Tensor and scalar binary operations.""" - sh = [tvm.size_var('n%d' % i) for i in range(0, len(shape))] - k = tvm.var('k') - A = tvm.placeholder(sh, name='A') + sh = [te.size_var('n%d' % i) for i in range(0, len(shape))] + k = te.var('k') + A = te.placeholder(sh, name='A') if typ == "add": B = A + k elif typ == "sub": @@ -134,8 +135,8 @@ def verify_tensor_scalar_bop(shape, typ="add"): def verify_broadcast_bop(lhs_shape, rhs_shape, typ="add"): - A = tvm.placeholder(shape=lhs_shape, name="A") - B = tvm.placeholder(shape=rhs_shape, name="B") + A = te.placeholder(shape=lhs_shape, name="A") + B = te.placeholder(shape=rhs_shape, name="B") if typ == "add": C = A + B elif typ == "sub": @@ -195,8 +196,8 @@ def verify_conv2d_scalar_bop(batch, in_size, in_channel, num_filter, kernel, str k = 10.0 dilation = (1, 1) with tvm.target.create(device): - A = tvm.placeholder((batch, in_channel, in_size, in_size), name='A') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W') + A = te.placeholder((batch, in_channel, in_size, in_size), name='A') + W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W') B = conv2d_nchw(A, W, stride, padding, dilation, A.dtype) if typ == "add": C = B + k diff --git a/tests/python/unittest/test_lang_verify_compute.py b/tests/python/unittest/test_lang_verify_compute.py index 6d17a0c..4231f48 100644 --- a/tests/python/unittest/test_lang_verify_compute.py +++ b/tests/python/unittest/test_lang_verify_compute.py @@ -15,38 +15,39 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_verify_compute(): - n = tvm.size_var("n") - m = tvm.size_var("m") - A = tvm.placeholder((n, m), name='A') - k = tvm.reduce_axis((0, m), "k") - k_ = tvm.reduce_axis((0, m-1), "k_") - f1 = lambda i: tvm.sum(A[i, k], axis=k) + n = te.size_var("n") + m = te.size_var("m") + A = te.placeholder((n, m), name='A') + k = te.reduce_axis((0, m), "k") + k_ = te.reduce_axis((0, m-1), "k_") + f1 = lambda i: te.sum(A[i, k], axis=k) f2 = lambda i: A[i,0] + 1 - f3 = lambda i: tvm.sum(A[i, k], axis=k) + 1 - f4 = lambda i: A[i,0] * (tvm.sum(A[i, k], axis=k) + 1) - f5 = lambda i: (tvm.sum(A[i, k], axis=k), A[i,0] + 1) - f6 = lambda i: (tvm.sum(A[i, k], axis=k), tvm.sum(A[i, k_], axis=k_)) + f3 = lambda i: te.sum(A[i, k], axis=k) + 1 + f4 = lambda i: A[i,0] * (te.sum(A[i, k], axis=k) + 1) + f5 = lambda i: (te.sum(A[i, k], axis=k), A[i,0] + 1) + f6 = lambda i: (te.sum(A[i, k], axis=k), te.sum(A[i, k_], axis=k_)) # # Valid compute try: - B = tvm.compute((n,), f1, name="B") + B = te.compute((n,), f1, name="B") except tvm._ffi.base.TVMError as ex: assert False # # Valid compute try: - B = tvm.compute((n,), f2, name="B") + B = te.compute((n,), f2, name="B") except tvm._ffi.base.TVMError as ex: assert False # # Invalid compute with non top level reduction try: - B = tvm.compute((n,), f3, name="B") + B = te.compute((n,), f3, name="B") assert False except tvm._ffi.base.TVMError as ex: pass @@ -54,7 +55,7 @@ def test_verify_compute(): # # Invalid compute with non top level reduction try: - B = tvm.compute((n,), f4, name="B") + B = te.compute((n,), f4, name="B") assert False except tvm._ffi.base.TVMError as ex: pass @@ -62,7 +63,7 @@ def test_verify_compute(): # # Invalid compute with reduction and non-reduction batch ops try: - B0, B1 = tvm.compute((n,), f5, name="B") + B0, B1 = te.compute((n,), f5, name="B") assert False except tvm._ffi.base.TVMError as ex: pass @@ -70,7 +71,7 @@ def test_verify_compute(): # # Invalid compute with unequal batch reduction ops try: - B0, B1 = tvm.compute((n,), f6, name="B") + B0, B1 = te.compute((n,), f6, name="B") assert False except tvm._ffi.base.TVMError as ex: pass diff --git a/tests/python/unittest/test_pass_attrs_hash_equal.py b/tests/python/unittest/test_pass_attrs_hash_equal.py index 2bd94e0..b3587cd 100644 --- a/tests/python/unittest/test_pass_attrs_hash_equal.py +++ b/tests/python/unittest/test_pass_attrs_hash_equal.py @@ -15,33 +15,34 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_attrs_equal(): x = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3, 4)) y = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3, 4)) z = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3,4,1)) - assert tvm.ir_pass.AttrsEqual(x, y) - assert not tvm.ir_pass.AttrsEqual(x, z) + assert tvm.tir.ir_pass.AttrsEqual(x, y) + assert not tvm.tir.ir_pass.AttrsEqual(x, z) dattr = tvm.ir.make_node("DictAttrs", x=1, y=10, name="xyz", padding=(0,0)) - assert not tvm.ir_pass.AttrsEqual(dattr, x) + assert not tvm.tir.ir_pass.AttrsEqual(dattr, x) dattr2 = tvm.ir.make_node("DictAttrs", x=1, y=10, name="xyz", padding=(0,0)) - assert tvm.ir_pass.AttrsEqual(dattr, dattr2) + assert tvm.tir.ir_pass.AttrsEqual(dattr, dattr2) - assert tvm.ir_pass.AttrsEqual({"x": x}, {"x": y}) + assert tvm.tir.ir_pass.AttrsEqual({"x": x}, {"x": y}) # array related checks - assert tvm.ir_pass.AttrsEqual({"x": [x, x]}, {"x": [y, x]}) - assert not tvm.ir_pass.AttrsEqual({"x": [x, 1]}, {"x": [y, 2]}) + assert tvm.tir.ir_pass.AttrsEqual({"x": [x, x]}, {"x": [y, x]}) + assert not tvm.tir.ir_pass.AttrsEqual({"x": [x, 1]}, {"x": [y, 2]}) - n = tvm.var("n") - assert tvm.ir_pass.AttrsEqual({"x": n+1}, {"x": n+1}) + n = te.var("n") + assert tvm.tir.ir_pass.AttrsEqual({"x": n+1}, {"x": n+1}) def test_attrs_hash(): - fhash = tvm.ir_pass.AttrsHash + fhash = tvm.tir.ir_pass.AttrsHash x = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3, 4)) y = tvm.ir.make_node("attrs.TestAttrs", name="xx", padding=(3, 4)) assert fhash({"x": x}) == fhash({"x": y}) diff --git a/tests/python/unittest/test_pass_basic.py b/tests/python/unittest/test_pass_basic.py index 93c815a..f7eaa21 100644 --- a/tests/python/unittest/test_pass_basic.py +++ b/tests/python/unittest/test_pass_basic.py @@ -15,41 +15,42 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_simplify(): - tdiv = tvm.truncdiv - tmod = tvm.truncmod - x = tvm.var('x') - e1 = tvm.ir_pass.Simplify(x + 2 + 1) - assert(tvm.ir_pass.Equal(e1, x + 3)) - e2 = tvm.ir_pass.Simplify(x * 3 + 5 * x) - assert(tvm.ir_pass.Equal(e2, x * 8)) - e3 = tvm.ir_pass.Simplify(x - tdiv(x, 3) * 3) - assert(tvm.ir_pass.Equal(e3, tmod(x, 3))) + tdiv = tvm.tir.truncdiv + tmod = tvm.tir.truncmod + x = te.var('x') + e1 = tvm.tir.ir_pass.Simplify(x + 2 + 1) + assert(tvm.tir.ir_pass.Equal(e1, x + 3)) + e2 = tvm.tir.ir_pass.Simplify(x * 3 + 5 * x) + assert(tvm.tir.ir_pass.Equal(e2, x * 8)) + e3 = tvm.tir.ir_pass.Simplify(x - tdiv(x, 3) * 3) + assert(tvm.tir.ir_pass.Equal(e3, tmod(x, 3))) def test_verify_ssa(): - x = tvm.var('x') - y = tvm.var() + x = te.var('x') + y = te.var() z = tvm.tir.Evaluate(x + y) - assert(tvm.ir_pass.VerifySSA(z)) + assert(tvm.tir.ir_pass.VerifySSA(z)) def test_convert_ssa(): - x = tvm.var('x') - y = tvm.var() + x = te.var('x') + y = te.var() let1 = tvm.tir.Let(x, 1, x + 1) let2 = tvm.tir.Let(x, 1, x + y) z = tvm.tir.Evaluate(let1 + let2) - assert(not tvm.ir_pass.VerifySSA(z)) - z_ssa = tvm.ir_pass.ConvertSSA(z) - assert(tvm.ir_pass.VerifySSA(z_ssa)) + assert(not tvm.tir.ir_pass.VerifySSA(z)) + z_ssa = tvm.tir.ir_pass.ConvertSSA(z) + assert(tvm.tir.ir_pass.VerifySSA(z_ssa)) def test_expr_use_var(): - x = tvm.var('x') - assert(tvm.ir_pass.ExprUseVar(x+1, x)) - assert(not tvm.ir_pass.ExprUseVar(1+10, x)) + x = te.var('x') + assert(tvm.tir.ir_pass.ExprUseVar(x+1, x)) + assert(not tvm.tir.ir_pass.ExprUseVar(1+10, x)) if __name__ == "__main__": diff --git a/tests/python/unittest/test_pass_bound_checkers.py b/tests/python/unittest/test_pass_bound_checkers.py index 6b959e0..b339097 100644 --- a/tests/python/unittest/test_pass_bound_checkers.py +++ b/tests/python/unittest/test_pass_bound_checkers.py @@ -16,41 +16,42 @@ # under the License. import pytest import tvm +from tvm import te import numpy as np def collect_visit(stmt, f): ret = [] - tvm.ir_pass.PostOrderVisit(stmt, lambda x: ret.append(f(x))) + tvm.tir.ir_pass.PostOrderVisit(stmt, lambda x: ret.append(f(x))) return ret def lower(sch, args): binds = {} arg_list = [] for x in args: - if isinstance(x, tvm.tensor.Tensor): - buf = tvm.decl_buffer(x.shape, dtype=x.dtype, name=x.name) + if isinstance(x, te.tensor.Tensor): + buf = tvm.tir.decl_buffer(x.shape, dtype=x.dtype, name=x.name) assert x not in binds binds[x] = buf arg_list.append(buf) else: raise ValueError("args must be Tensor, Buffer or Var") sch = sch.normalize() - bounds = tvm.schedule.InferBound(sch) - stmt = tvm.schedule.ScheduleOps(sch, bounds) - stmt = tvm.ir_pass.LoopPartition(stmt, True) - stmt = tvm.ir_pass.RemoveNoOp(stmt) - stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 64, True) - stmt = tvm.ir_pass.CanonicalSimplify(stmt) - stmt = tvm.ir_pass.VectorizeLoop(stmt) - stmt = tvm.ir_pass.Simplify(stmt) + bounds = tvm.te.schedule.InferBound(sch) + stmt = tvm.te.schedule.ScheduleOps(sch, bounds) + stmt = tvm.tir.ir_pass.LoopPartition(stmt, True) + stmt = tvm.tir.ir_pass.RemoveNoOp(stmt) + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, binds, 64, True) + stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt) + stmt = tvm.tir.ir_pass.VectorizeLoop(stmt) + stmt = tvm.tir.ir_pass.Simplify(stmt) return stmt @pytest.mark.xfail def test_out_of_bounds_llvm(index_a, index_b): - n = tvm.size_var("n") - A = tvm.placeholder ((n,), name='A') - B = tvm.placeholder ((n,), name='B') - C = tvm.compute(A.shape, lambda i: A[i + index_a] + B[i + index_b], name='C') - s = tvm.create_schedule (C.op) + n = te.size_var("n") + A = te.placeholder ((n,), name='A') + B = te.placeholder ((n,), name='B') + C = te.compute(A.shape, lambda i: A[i + index_a] + B[i + index_b], name='C') + s = te.create_schedule (C.op) tgt = "llvm" tgt_host = "llvm" stmt = tvm.lower (s, [A, B, C], simple_mode=True) @@ -63,11 +64,11 @@ def test_out_of_bounds_llvm(index_a, index_b): fadd (a, b, c) def test_in_bounds_llvm(): - n = tvm.size_var("n") - A = tvm.placeholder ((n,), name='A') - B = tvm.placeholder ((n,), name='B') - C = tvm.compute(A.shape, lambda i: A[i] + B[i], name='C') - s = tvm.create_schedule (C.op) + n = te.size_var("n") + A = te.placeholder ((n,), name='A') + B = te.placeholder ((n,), name='B') + C = te.compute(A.shape, lambda i: A[i] + B[i], name='C') + s = te.create_schedule (C.op) tgt = "llvm" tgt_host = "llvm" stmt = tvm.lower (s, [A, B, C], simple_mode=True) @@ -81,11 +82,11 @@ def test_in_bounds_llvm(): @pytest.mark.xfail def test_out_of_bounds_vectorize_llvm(nn, index_a, index_b): - n = tvm.convert(nn) - a = tvm.placeholder((n), name='a') - b = tvm.placeholder((n), name='b') - c = tvm.compute((n,), lambda i: a[i + index_a] + b[i + index_b], name='c') - s = tvm.create_schedule(c.op) + n = tvm.runtime.convert(nn) + a = te.placeholder((n), name='a') + b = te.placeholder((n), name='b') + c = te.compute((n,), lambda i: a[i + index_a] + b[i + index_b], name='c') + s = te.create_schedule(c.op) xo, xi = s[c].split(c.op.axis[0], factor=8) s[c].parallel(xo) s[c].vectorize(xi) @@ -104,10 +105,10 @@ def test_out_of_bounds_vectorize_llvm(nn, index_a, index_b): def test_in_bounds_vectorize_llvm(): n = 512 lanes = 2 - A = tvm.placeholder((n,), name='A', dtype="float32x%d" % lanes) - B = tvm.compute((n,), lambda i: A[i], name='B') - C = tvm.compute((n,), lambda i: B[i] + tvm.const(1, A.dtype), name='C') - s = tvm.create_schedule(C.op) + A = te.placeholder((n,), name='A', dtype="float32x%d" % lanes) + B = te.compute((n,), lambda i: A[i], name='B') + C = te.compute((n,), lambda i: B[i] + tvm.tir.const(1, A.dtype), name='C') + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], nparts=2) _, xi = s[C].split(xi, factor=2) s[C].parallel(xo) @@ -128,12 +129,12 @@ def test_in_bounds_vectorize_llvm(): tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1) def test_in_bounds_loop_partition_basic_llvm(): - n = tvm.size_var('n') - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((n, ), name='B') + n = te.size_var('n') + A = te.placeholder((n, ), name='A') + B = te.placeholder((n, ), name='B') - T = tvm.compute((n, ), lambda i: A[i]+B[i]) - s = tvm.create_schedule(T.op) + T = te.compute((n, ), lambda i: A[i]+B[i]) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=4) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) @@ -147,12 +148,12 @@ def test_in_bounds_loop_partition_basic_llvm(): @pytest.mark.xfail def test_out_of_bounds_loop_partition_basic_llvm(index_a, index_b): - n = tvm.size_var('n') - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((n, ), name='B') + n = te.size_var('n') + A = te.placeholder((n, ), name='A') + B = te.placeholder((n, ), name='B') - T = tvm.compute((n, ), lambda i: A[i + index_a]+B[i + index_b]) - s = tvm.create_schedule(T.op) + T = te.compute((n, ), lambda i: A[i + index_a]+B[i + index_b]) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=4) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) @@ -187,20 +188,20 @@ def test_in_bounds_const_loop_partition_ir(): branch_collector.append(x) n = 21 - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((n, ), name='B') + A = te.placeholder((n, ), name='A') + B = te.placeholder((n, ), name='B') - T = tvm.compute((n, ), lambda i: A[i]+B[i]) - s = tvm.create_schedule(T.op) + T = te.compute((n, ), lambda i: A[i]+B[i]) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=4) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) stmt = lower (s, [A, B, T]) # num_attributes = num_buffers * num_splits = 2 * 3 # before instrumentation assert_bound_instrumentation(stmt, check_attr_stmt, 2 * 3) assert_bound_instrumentation(stmt, check_branch_stmt, 0) - stmt = tvm.ir_pass.InstrumentBoundCheckers(stmt) + stmt = tvm.tir.ir_pass.InstrumentBoundCheckers(stmt) # after instrumentation assert_bound_instrumentation(stmt, check_attr_stmt, 2 * 3) assert_bound_instrumentation(stmt, check_branch_stmt, 2) @@ -212,13 +213,13 @@ def test_in_bounds_const_loop_partition_ir(): print (branch_collector[1].condition) def test_in_bounds_const_loop_partition_llvm(): - with tvm.build_config(instrument_bound_checkers=True, partition_const_loop=True): + with tvm.target.build_config(instrument_bound_checkers=True, partition_const_loop=True): n = 21 - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((n, ), name='B') + A = te.placeholder((n, ), name='A') + B = te.placeholder((n, ), name='B') - T = tvm.compute((n, ), lambda i: A[i]+B[i]) - s = tvm.create_schedule(T.op) + T = te.compute((n, ), lambda i: A[i]+B[i]) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=4) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) @@ -232,13 +233,13 @@ def test_in_bounds_const_loop_partition_llvm(): @pytest.mark.xfail def test_out_of_bounds_const_loop_partition_llvm(index_a, index_b): - with tvm.build_config(instrument_bound_checkers=True, partition_const_loop=True): + with tvm.target.build_config(instrument_bound_checkers=True, partition_const_loop=True): n = 21 - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((n, ), name='B') + A = te.placeholder((n, ), name='A') + B = te.placeholder((n, ), name='B') - T = tvm.compute((n, ), lambda i: A[i + index_a]+B[i + index_b]) - s = tvm.create_schedule(T.op) + T = te.compute((n, ), lambda i: A[i + index_a]+B[i + index_b]) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=4) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) @@ -258,18 +259,18 @@ def test_in_bounds_conv_llvm(loop_tiling=False): batch_size = 1 in_height = in_width = 64 out_height = out_width = in_height - kernel_height + 1 - data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data') - kernel = tvm.placeholder((kernel_height, kernel_width, in_channel, + data = te.placeholder((batch_size, in_channel, in_height, in_width), name='data') + kernel = te.placeholder((kernel_height, kernel_width, in_channel, out_channel), name='kernel') - ic = tvm.reduce_axis((0, in_channel), name='ic') - kh = tvm.reduce_axis((0, kernel_height), name='kh') - kw = tvm.reduce_axis((0, kernel_width), name='kw') - conv = tvm.compute((batch_size, out_channel, out_height, out_width), - lambda n, oc, oh, ow: tvm.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] * + ic = te.reduce_axis((0, in_channel), name='ic') + kh = te.reduce_axis((0, kernel_height), name='kh') + kw = te.reduce_axis((0, kernel_width), name='kw') + conv = te.compute((batch_size, out_channel, out_height, out_width), + lambda n, oc, oh, ow: te.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] * kernel[kh, kw, ic, oc], axis=[ic, kh, kw]), name="conv2d") - s = tvm.create_schedule(conv.op) + s = te.create_schedule(conv.op) n, oc, oh, ow = conv.op.axis if loop_tiling: @@ -280,10 +281,10 @@ def test_in_bounds_conv_llvm(loop_tiling=False): f = tvm.build(s, [data, kernel, conv], "llvm") data_input = tvm.nd.array(np.random.uniform( - size=(batch_size, in_channel, in_height, in_width)).astype(tvm.float32), ctx) + size=(batch_size, in_channel, in_height, in_width)).astype("float32"), ctx) kernel_input = tvm.nd.array(np.random.uniform( - size=(kernel_height, kernel_width, in_channel, out_channel)).astype(tvm.float32), ctx) - conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), tvm.float32, ctx) + size=(kernel_height, kernel_width, in_channel, out_channel)).astype("float32"), ctx) + conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), "float32", ctx) f(data_input, kernel_input, conv_out) @pytest.mark.xfail @@ -295,14 +296,14 @@ def test_out_of_bounds_conv_llvm(data_offsets, kernel_offsets, loop_tiling=False batch_size = 1 in_height = in_width = 64 out_height = out_width = in_height - kernel_height + 1 - data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data') - kernel = tvm.placeholder((kernel_height, kernel_width, in_channel, + data = te.placeholder((batch_size, in_channel, in_height, in_width), name='data') + kernel = te.placeholder((kernel_height, kernel_width, in_channel, out_channel), name='kernel') - ic = tvm.reduce_axis((0, in_channel), name='ic') - kh = tvm.reduce_axis((0, kernel_height), name='kh') - kw = tvm.reduce_axis((0, kernel_width), name='kw') - conv = tvm.compute((batch_size, out_channel, out_height, out_width), - lambda n, oc, oh, ow: tvm.sum(data[n + data_offsets[0], + ic = te.reduce_axis((0, in_channel), name='ic') + kh = te.reduce_axis((0, kernel_height), name='kh') + kw = te.reduce_axis((0, kernel_width), name='kw') + conv = te.compute((batch_size, out_channel, out_height, out_width), + lambda n, oc, oh, ow: te.sum(data[n + data_offsets[0], ic + data_offsets[1], oh*HSTR + kh + data_offsets[2], ow*WSTR + kw + data_offsets[3]] @@ -313,7 +314,7 @@ def test_out_of_bounds_conv_llvm(data_offsets, kernel_offsets, loop_tiling=False oc + kernel_offsets[3]], axis=[ic, kh, kw]), name="conv2d") - s = tvm.create_schedule(conv.op) + s = te.create_schedule(conv.op) n, oc, oh, ow = conv.op.axis if loop_tiling: @@ -324,21 +325,21 @@ def test_out_of_bounds_conv_llvm(data_offsets, kernel_offsets, loop_tiling=False f = tvm.build(s, [data, kernel, conv], "llvm") data_input = tvm.nd.array(np.random.uniform( - size=(batch_size, in_channel, in_height, in_width)).astype(tvm.float32), ctx) + size=(batch_size, in_channel, in_height, in_width)).astype("float32"), ctx) kernel_input = tvm.nd.array(np.random.uniform( - size=(kernel_height, kernel_width, in_channel, out_channel)).astype(tvm.float32), ctx) - conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), tvm.float32, ctx) + size=(kernel_height, kernel_width, in_channel, out_channel)).astype("float32"), ctx) + conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), "float32", ctx) f(data_input, kernel_input, conv_out) def test_in_bounds_tensors_with_same_shapes1D_llvm(): - n = tvm.size_var('n') - k = tvm.size_var('k') - m = tvm.size_var('m') - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((k, ), name='B') - - T = tvm.compute((m, ), lambda i: A[i]*B[i]) - s = tvm.create_schedule(T.op) + n = te.size_var('n') + k = te.size_var('k') + m = te.size_var('m') + A = te.placeholder((n, ), name='A') + B = te.placeholder((k, ), name='B') + + T = te.compute((m, ), lambda i: A[i]*B[i]) + s = te.create_schedule(T.op) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) ctx = tvm.cpu(0) @@ -351,14 +352,14 @@ def test_in_bounds_tensors_with_same_shapes1D_llvm(): @pytest.mark.xfail def test_out_of_bounds_tensors_with_diff_shapes1D_llvm(a_shape, b_shape, c_shape): - n = tvm.size_var('n') - k = tvm.size_var('k') - m = tvm.size_var('m') - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((k, ), name='B') - - T = tvm.compute((m, ), lambda i: A[i]*B[i]) - s = tvm.create_schedule(T.op) + n = te.size_var('n') + k = te.size_var('k') + m = te.size_var('m') + A = te.placeholder((n, ), name='A') + B = te.placeholder((k, ), name='B') + + T = te.compute((m, ), lambda i: A[i]*B[i]) + s = te.create_schedule(T.op) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) ctx = tvm.cpu(0) @@ -370,14 +371,14 @@ def test_out_of_bounds_tensors_with_diff_shapes1D_llvm(a_shape, b_shape, c_shape f(a, b, t) def test_in_bounds_tensors_with_same_shapes2D_llvm(): - n = tvm.size_var('n') - k = tvm.size_var('k') - m = tvm.size_var('m') - A = tvm.placeholder((n, n), name='A') - B = tvm.placeholder((k, k), name='B') - - T = tvm.compute((m, m), lambda i, j: A[i][j]*B[i][j]) - s = tvm.create_schedule(T.op) + n = te.size_var('n') + k = te.size_var('k') + m = te.size_var('m') + A = te.placeholder((n, n), name='A') + B = te.placeholder((k, k), name='B') + + T = te.compute((m, m), lambda i, j: A[i][j]*B[i][j]) + s = te.create_schedule(T.op) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) ctx = tvm.cpu(0) @@ -390,14 +391,14 @@ def test_in_bounds_tensors_with_same_shapes2D_llvm(): @pytest.mark.xfail def test_out_of_bounds_tensors_with_diff_shapes2D_llvm(a_shape, b_shape, c_shape): - n = tvm.size_var('n') - k = tvm.size_var('k') - m = tvm.size_var('m') - A = tvm.placeholder((n, n), name='A') - B = tvm.placeholder((k, k), name='B') - - T = tvm.compute((m, m), lambda i, j: A[i][j]*B[i][j]) - s = tvm.create_schedule(T.op) + n = te.size_var('n') + k = te.size_var('k') + m = te.size_var('m') + A = te.placeholder((n, n), name='A') + B = te.placeholder((k, k), name='B') + + T = te.compute((m, m), lambda i, j: A[i][j]*B[i][j]) + s = te.create_schedule(T.op) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) ctx = tvm.cpu(0) @@ -409,14 +410,14 @@ def test_out_of_bounds_tensors_with_diff_shapes2D_llvm(a_shape, b_shape, c_shape f(a, b, t) def test_in_bounds_tensors_with_same_shapes3D_llvm(): - n = tvm.size_var('n') - k = tvm.size_var('k') - m = tvm.size_var('m') - A = tvm.placeholder((n, n, n), name='A') - B = tvm.placeholder((k, k, k), name='B') - - T = tvm.compute((m, m, m), lambda i, j, p: A[i][j][p]*B[i][j][p]) - s = tvm.create_schedule(T.op) + n = te.size_var('n') + k = te.size_var('k') + m = te.size_var('m') + A = te.placeholder((n, n, n), name='A') + B = te.placeholder((k, k, k), name='B') + + T = te.compute((m, m, m), lambda i, j, p: A[i][j][p]*B[i][j][p]) + s = te.create_schedule(T.op) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) ctx = tvm.cpu(0) @@ -429,14 +430,14 @@ def test_in_bounds_tensors_with_same_shapes3D_llvm(): @pytest.mark.xfail def test_out_of_bounds_tensors_with_diff_shapes3D_llvm(a_shape, b_shape, c_shape): - n = tvm.size_var('n') - k = tvm.size_var('k') - m = tvm.size_var('m') - A = tvm.placeholder((n, n, n), name='A') - B = tvm.placeholder((k, k, k), name='B') - - T = tvm.compute((m, m, m), lambda i, j, p: A[i][j][p]*B[i][j][p]) - s = tvm.create_schedule(T.op) + n = te.size_var('n') + k = te.size_var('k') + m = te.size_var('m') + A = te.placeholder((n, n, n), name='A') + B = te.placeholder((k, k, k), name='B') + + T = te.compute((m, m, m), lambda i, j, p: A[i][j][p]*B[i][j][p]) + s = te.create_schedule(T.op) lowered_func = tvm.lower (s, [A, B, T], "llvm", simple_mode=False) print (lowered_func.body) ctx = tvm.cpu(0) @@ -452,12 +453,12 @@ def test_out_of_bounds_tensors_with_zero_shape_op_with_not_zero_shape_llvm(): if not tvm.runtime.enabled("llvm"): return n = 64 - A = tvm.placeholder((n, ), name='A') - scale = tvm.placeholder((), name='scale') - k = tvm.reduce_axis((0, n), name="k") - C = tvm.compute((), lambda : tvm.sum(A[k + k + k] * scale, axis=k), name="C") - D = tvm.compute((), lambda : C + 1) - s = tvm.create_schedule(D.op) + A = te.placeholder((n, ), name='A') + scale = te.placeholder((), name='scale') + k = te.reduce_axis((0, n), name="k") + C = te.compute((), lambda : te.sum(A[k + k + k] * scale, axis=k), name="C") + D = te.compute((), lambda : C + 1) + s = te.create_schedule(D.op) stmt = tvm.lower (s, [A, scale, D], simple_mode=True) print (stmt) # build and invoke the kernel. @@ -473,7 +474,7 @@ def test_out_of_bounds_tensors_with_zero_shape_op_with_not_zero_shape_llvm(): tvm.testing.assert_allclose(d.asnumpy(), d_np) if __name__ == "__main__": - with tvm.build_config(instrument_bound_checkers=True): + with tvm.target.build_config(instrument_bound_checkers=True): # zero scale test_out_of_bounds_tensors_with_zero_shape_op_with_not_zero_shape_llvm() # in bound diff --git a/tests/python/unittest/test_pass_combine_context_call.py b/tests/python/unittest/test_pass_combine_context_call.py index ef741a4..e51d4d8 100644 --- a/tests/python/unittest/test_pass_combine_context_call.py +++ b/tests/python/unittest/test_pass_combine_context_call.py @@ -15,28 +15,29 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_for(): - dev_type = tvm.var("dev_type") + dev_type = te.var("dev_type") def device_context(dev_id): - ctx = tvm.call_extern("handle", "device_context", dev_type, dev_id) + ctx = tvm.tir.call_extern("handle", "device_context", dev_type, dev_id) return tvm.tir.Call( "handle", "tvm_thread_context", [ctx], tvm.tir.Call.Intrinsic, None, 0) - ib = tvm.ir_builder.create() - n = tvm.var("n") + ib = tvm.tir.ir_builder.create() + n = te.var("n") A = ib.allocate("float32", n, name="A", scope="global") with ib.for_range(0, n, name="i") as i: - ib.emit(tvm.call_extern + ib.emit(tvm.tir.call_extern ("int32", "fadd", device_context(0), A)) with ib.for_range(0, 10, name="j") as j: - ib.emit(tvm.call_extern + ib.emit(tvm.tir.call_extern ("int32", "fadd", device_context(1), A)) - ib.emit(tvm.call_extern + ib.emit(tvm.tir.call_extern ("int32", "fadd", device_context(0), A)) body = ib.get() - f = tvm.ir_pass.MakeAPI(body, "func", [dev_type, n], 2, True) - f = tvm.ir_pass.CombineContextCall(f) + f = tvm.tir.ir_pass.MakeAPI(body, "func", [dev_type, n], 2, True) + f = tvm.tir.ir_pass.CombineContextCall(f) assert f.body.value.dtype == "handle" assert f.body.body.value.dtype == "handle" diff --git a/tests/python/unittest/test_pass_decorate_device_scope.py b/tests/python/unittest/test_pass_decorate_device_scope.py index b464354..327cfd9 100644 --- a/tests/python/unittest/test_pass_decorate_device_scope.py +++ b/tests/python/unittest/test_pass_decorate_device_scope.py @@ -15,24 +15,25 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_decorate_device(): - m = tvm.size_var('m') - l = tvm.size_var('l') - A = tvm.placeholder((m, l), name='A') + m = te.size_var('m') + l = te.size_var('l') + A = te.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + s = te.create_schedule(A2.op) xo, xi = s[A2].split(A2.op.axis[0], factor=8) s[A1].compute_at(s[A2], xo) s[A1].set_scope("shared") - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - stmt1 = tvm.ir_pass.Simplify(stmt) - stmt2 = tvm.ir_pass.DecorateDeviceScope(stmt1) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + stmt1 = tvm.tir.ir_pass.Simplify(stmt) + stmt2 = tvm.tir.ir_pass.DecorateDeviceScope(stmt1) assert isinstance(stmt2, tvm.tir.AttrStmt) assert stmt2.attr_key == "device_scope" assert stmt1 == stmt2.body diff --git a/tests/python/unittest/test_pass_equal.py b/tests/python/unittest/test_pass_equal.py index 1f5bb9c..873cb7b 100644 --- a/tests/python/unittest/test_pass_equal.py +++ b/tests/python/unittest/test_pass_equal.py @@ -15,39 +15,40 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_equal_expr(): - x = tvm.var('x') - y = tvm.var('y') + x = te.var('x') + y = te.var('y') def func1(): return x + y + 1 def func2(): - return tvm.exp(tvm.truncdiv((x + y + 1) * y, 4)) + return te.exp(tvm.tir.truncdiv((x + y + 1) * y, 4)) - assert tvm.ir_pass.Equal(func1(), func1()) - assert tvm.ir_pass.Equal(func2(), func2()) - assert not tvm.ir_pass.Equal(func2(), func1()) + assert tvm.tir.ir_pass.Equal(func1(), func1()) + assert tvm.tir.ir_pass.Equal(func2(), func2()) + assert not tvm.tir.ir_pass.Equal(func2(), func1()) def test_equal_compute(): - x = tvm.var('x') - y = tvm.var('y') + x = te.var('x') + y = te.var('y') n = 128 - A = tvm.placeholder((n, n), name='A') - B = tvm.placeholder((n, n), name='B') - ii = tvm.var('i') - jj = tvm.var('j') + A = te.placeholder((n, n), name='A') + B = te.placeholder((n, n), name='B') + ii = te.var('i') + jj = te.var('j') def func1(): - k = tvm.reduce_axis((0, n), name='k') - return tvm.sum(A[ii, k] * B[jj, k], axis=k) + k = te.reduce_axis((0, n), name='k') + return te.sum(A[ii, k] * B[jj, k], axis=k) - Ab = tvm.decl_buffer((n,), name='A') - n = tvm.var("n") + Ab = tvm.tir.decl_buffer((n,), name='A') + n = te.var("n") def func2(): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() A = ib.buffer_ptr(Ab) with ib.for_range(0, n, name="i") as i: A[i] = A[i] + 1 @@ -56,8 +57,8 @@ def test_equal_compute(): A[j] = A[j] + 2 return ib.get() - assert tvm.ir_pass.Equal(func1(), func1()) - assert tvm.ir_pass.Equal(func2(), func2()) + assert tvm.tir.ir_pass.Equal(func1(), func1()) + assert tvm.tir.ir_pass.Equal(func2(), func2()) if __name__ == "__main__": diff --git a/tests/python/unittest/test_pass_hoist_if.py b/tests/python/unittest/test_pass_hoist_if.py index 2eb641b..f6bdbd6 100644 --- a/tests/python/unittest/test_pass_hoist_if.py +++ b/tests/python/unittest/test_pass_hoist_if.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te var_list = [] @@ -31,7 +32,7 @@ def verify_structure(stmt, expected_struct): key = op if isinstance(op, tvm.tir.IfThenElse): global var_list - tvm.ir_pass.PostOrderVisit(op.condition, _extract_vars) + tvm.tir.ir_pass.PostOrderVisit(op.condition, _extract_vars) val = [(op.then_case, op.else_case), ("IfThenElse", tuple(var_list))] var_list.clear() elif isinstance(op, tvm.tir.For): @@ -42,7 +43,7 @@ def verify_structure(stmt, expected_struct): return node_dict[key] = val - tvm.ir_pass.PostOrderVisit(stmt, _visit) + tvm.tir.ir_pass.PostOrderVisit(stmt, _visit) for key, val in node_dict.items(): struct[val[1]] = tuple(node_dict[child][1] if child in node_dict else None for child in val[0]) @@ -52,10 +53,10 @@ def verify_structure(stmt, expected_struct): var_list.clear() def test_basic(): - ib = tvm.ir_builder.create() - l = tvm.var('l') - m = tvm.var('m') - n = tvm.var('n') + ib = tvm.tir.ir_builder.create() + l = te.var('l') + m = te.var('m') + n = te.var('n') with ib.for_range(0, l, "i") as i: with ib.for_range(0, m, "j") as j: @@ -66,17 +67,17 @@ def test_basic(): ib.emit(tvm.tir.Evaluate(n)) stmt = ib.get() - new_stmt = tvm.ir_pass.HoistIfThenElse(stmt) + new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt) expected_struct = {('For', 'k'): (None,), ('For', 'j'): (('For', 'k'),), ('IfThenElse', ('i',)): (('For', 'j'), ('For', 'j')), ('For', 'i'): (('IfThenElse', ('i',)),)} verify_structure(new_stmt, expected_struct) def test_no_else(): - ib = tvm.ir_builder.create() - l = tvm.var('l') - m = tvm.var('m') - n = tvm.var('n') + ib = tvm.tir.ir_builder.create() + l = te.var('l') + m = te.var('m') + n = te.var('n') with ib.for_range(0, l, "i") as i: with ib.for_range(0, m, "j") as j: @@ -85,34 +86,34 @@ def test_no_else(): ib.emit(tvm.tir.Evaluate(m)) stmt = ib.get() - new_stmt = tvm.ir_pass.HoistIfThenElse(stmt) + new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt) expected_struct = {('For', 'k'): (None,), ('For', 'j'): (('For', 'k'),), ('IfThenElse', ('i',)): (('For', 'j'), None), ('For', 'i'): (('IfThenElse', ('i',)),)} verify_structure(new_stmt, expected_struct) def test_attr_stmt(): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() dshape = (32, 64) data = ib.pointer("float32", name="data") - l = tvm.var('l') - m = tvm.var('m') - n = tvm.var('n') + l = te.var('l') + m = te.var('m') + n = te.var('n') - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", dshape[0]) ib.scope_attr(bx, "thread_extent", dshape[1]) with ib.for_range(0, l, "i") as i: with ib.for_range(0, m, "j") as j: with ib.for_range(0, n, "k") as k: - with ib.if_scope(tvm.any(i < 4, j >= 8)): + with ib.if_scope(tvm.tir.any(i < 4, j >= 8)): data[bx * j + tx * j * k] = data[bx * j + tx * j * k] + 0.5 with ib.else_scope(): data[bx * j + tx * j * k] = data[bx * j + tx * j * k] + 1.0 stmt = ib.get() - new_stmt = tvm.ir_pass.HoistIfThenElse(stmt) + new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt) expected_struct = {('For', 'k'): (None,), ('IfThenElse', ('i', 'j')): (('For', 'k'), ('For', 'k')), ('For', 'j'): (('IfThenElse', ('i', 'j')),), ('For', 'i'): (('For', 'j'),), ('AttrStmt', 'thread_extent', 64): (('For', 'i'),), @@ -120,7 +121,7 @@ def test_attr_stmt(): verify_structure(new_stmt, expected_struct) def test_nested_for(): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() data = ib.pointer("float32", name="data") @@ -130,22 +131,22 @@ def test_nested_for(): data[i * 3 + j] = data[i * 3 + j] + 0.5 with ib.for_range(0, 15, "k") as k: with ib.for_range(0, 20, "l") as l: - with ib.if_scope(tvm.any(i < 4, j >= 8)): + with ib.if_scope(tvm.tir.any(i < 4, j >= 8)): data[i * 3 + j + k + l] = data[i * 3 + j + k + l] * 2 with ib.else_scope(): data[i * 3 + j + k + l] = data[i * 3 + j + k + l] * 1.5 stmt = ib.get() - new_stmt = tvm.ir_pass.HoistIfThenElse(stmt) + new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt) expected_struct = {('IfThenElse', ('i', 'j')): (None, None), ('For', 'l'): (('IfThenElse', ('i', 'j')),), ('For', 'k'): (('For', 'l'),), ('For', 'j'): (None,), ('IfThenElse', ('i',)): (('For', 'j'), None), ('For', 'i'): (('IfThenElse', ('i',)),)} verify_structure(new_stmt, expected_struct) def test_if_block(): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() data = ib.pointer("float32", name="data") - n = tvm.var("n") + n = te.var("n") with ib.for_range(0, 5, "i") as i: @@ -154,7 +155,7 @@ def test_if_block(): data[i * 3 + j] = data[i * 3 + j] + 0.5 with ib.for_range(0, 15, "k") as k: with ib.for_range(0, 20, "l") as l: - with ib.if_scope(tvm.any(i < 4, j >= 8)): + with ib.if_scope(tvm.tir.any(i < 4, j >= 8)): data[i * 3 + j + k + l] = data[i * 3 + j + k + l] * 2 with ib.else_scope(): data[i * 3 + j + k + l] = data[i * 3 + j + k + l] * 1.5 @@ -169,7 +170,7 @@ def test_if_block(): data[i * 3 + j + k] = data[i * 3 + j + k] + 0.6 stmt = ib.get() - new_stmt = tvm.ir_pass.HoistIfThenElse(stmt) + new_stmt = tvm.tir.ir_pass.HoistIfThenElse(stmt) expected_struct = {('IfThenElse', ('i', 'j')): (None, None), ('IfThenElse', ('j',)): (None, None), ('For', 'l'): (None,), ('For', 'k'): (None,), ('For', 'j'): (('For', 'j'),), ('IfThenElse', ('i',)): (('For', 'j'), None), ('For', 'i'): (('IfThenElse', ('i',)),), diff --git a/tests/python/unittest/test_pass_inject_copy_intrin.py b/tests/python/unittest/test_pass_inject_copy_intrin.py index f49388d..8c34e34 100644 --- a/tests/python/unittest/test_pass_inject_copy_intrin.py +++ b/tests/python/unittest/test_pass_inject_copy_intrin.py @@ -15,102 +15,103 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_copy2d(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - B = tvm.compute((m, l), lambda i, j: A[i, j], name='B') - s = tvm.create_schedule(B.op) + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + B = te.compute((m, l), lambda i, j: A[i, j], name='B') + s = te.create_schedule(B.op) s[B].pragma(B.op.axis[0], "memcpy") - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) def cb(src, dst, pad_before, pad_after, pad_value): assert dst.strides[0] == l assert dst.strides[1].value == 1 assert src.strides[0] == l assert tuple(src.shape) == (m, l) return tvm.tir.Evaluate(0) - stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb) + stmt = tvm.tir.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb) def test_copy_pad(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - B = tvm.compute((m + 2, l), lambda i, j: - tvm.if_then_else(tvm.all(i >= 1, i < m + 1), + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + B = te.compute((m + 2, l), lambda i, j: + tvm.tir.if_then_else(tvm.tir.all(i >= 1, i < m + 1), A[i - 1, j], 1.0), name='B') - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) s[B].pragma(B.op.axis[0], "memcpy") - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) def cb(src, dst, pad_before, pad_after, pad_value): - assert tvm.ir_pass.Simplify(src.elem_offset).value == 0 + assert tvm.tir.ir_pass.Simplify(src.elem_offset).value == 0 assert pad_before[0].value == 1 assert pad_before[1].value == 0 assert pad_after[0].value == 1 assert pad_after[1].value == 0 assert pad_value.value == 1.0 return tvm.tir.Evaluate(0) - stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb) + stmt = tvm.tir.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb) def test_single_point_test(): - A = tvm.placeholder((1,), name='A') - B = tvm.compute((1,), lambda i: + A = te.placeholder((1,), name='A') + B = te.compute((1,), lambda i: A[i], name='B') - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) s[B].pragma(B.op.axis[0], "memcpy") - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) def cb(src, dst, pad_before, pad_after, pad_value): - assert tvm.ir_pass.Simplify(src.elem_offset).value == 0 - assert tvm.ir_pass.Simplify(dst.elem_offset).value == 0 - assert tvm.ir_pass.Simplify(src.strides[0]).value == 1 - assert tvm.ir_pass.Simplify(dst.strides[0]).value == 1 + assert tvm.tir.ir_pass.Simplify(src.elem_offset).value == 0 + assert tvm.tir.ir_pass.Simplify(dst.elem_offset).value == 0 + assert tvm.tir.ir_pass.Simplify(src.strides[0]).value == 1 + assert tvm.tir.ir_pass.Simplify(dst.strides[0]).value == 1 return tvm.tir.Evaluate(0) - stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb) + stmt = tvm.tir.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb) def assert_expr_equal(a, b): - assert tvm.ir_pass.Simplify(a - b).value == 0 + assert tvm.tir.ir_pass.Simplify(a - b).value == 0 def test_copy_pad_split(): m = 4 * 3 - A = tvm.placeholder((m, ), name="A") - Apad = tvm.compute((m + 2,), lambda i: - tvm.if_then_else(tvm.all(i >= 1, i <= m), + A = te.placeholder((m, ), name="A") + Apad = te.compute((m + 2,), lambda i: + tvm.tir.if_then_else(tvm.tir.all(i >= 1, i <= m), A[i - 1], 0.0), "Apad") - B = tvm.compute((m,), lambda i: Apad[i] + Apad[i + 1] + Apad[i + 2]) - s = tvm.create_schedule(B.op) + B = te.compute((m,), lambda i: Apad[i] + Apad[i + 1] + Apad[i + 2]) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=4) s[Apad].compute_at(s[B], xo) s[Apad].pragma(s[Apad].op.axis[0], "memcpy") - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) - stmt = tvm.ir_pass.Simplify(stmt) - stmt = tvm.ir_pass.CanonicalSimplify(stmt) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) + stmt = tvm.tir.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt) def cb(src, dst, pad_before, pad_after, pad_value): assert(dst.elem_offset.value == 0) - assert_expr_equal(src.elem_offset, tvm.max(xo * 4, 1) - 1) + assert_expr_equal(src.elem_offset, tvm.te.max(xo * 4, 1) - 1) - rpad_before = tvm.max(1 - xo * 4, 0) - rpad_after = tvm.max(xo * 4 - 7, 0) + rpad_before = tvm.te.max(1 - xo * 4, 0) + rpad_after = tvm.te.max(xo * 4 - 7, 0) assert_expr_equal(pad_before[0], rpad_before) assert_expr_equal(pad_after[0], rpad_after) assert_expr_equal(src.shape[0], 6 - rpad_before - rpad_after) return tvm.tir.Evaluate(0) - stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb) + stmt = tvm.tir.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb) if __name__ == "__main__": diff --git a/tests/python/unittest/test_pass_inject_double_buffer.py b/tests/python/unittest/test_pass_inject_double_buffer.py index cf8f78c..0fe3f61 100644 --- a/tests/python/unittest/test_pass_inject_double_buffer.py +++ b/tests/python/unittest/test_pass_inject_double_buffer.py @@ -15,13 +15,14 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_double_buffer(): dtype = 'int64' n = 100 m = 4 - tx = tvm.thread_axis("threadIdx.x") - ib = tvm.ir_builder.create() + tx = te.thread_axis("threadIdx.x") + ib = tvm.tir.ir_builder.create() A = ib.pointer("float32", name="A") C = ib.pointer("float32", name="C") ib.scope_attr(tx, "thread_extent", 1) @@ -35,17 +36,17 @@ def test_double_buffer(): C[j] = B[j] + 1 stmt = ib.get() - stmt = tvm.ir_pass.InjectDoubleBuffer(stmt, 2) - stmt = tvm.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.InjectDoubleBuffer(stmt, 2) + stmt = tvm.tir.ir_pass.Simplify(stmt) assert isinstance(stmt.body.body, tvm.tir.Allocate) assert stmt.body.body.extents[0].value == 2 - f = tvm.ir_pass.MakeAPI(stmt, "db", [A.asobject(), C.asobject()], 2, True) - f = tvm.ir_pass.ThreadSync(f, "shared") + f = tvm.tir.ir_pass.MakeAPI(stmt, "db", [A.asobject(), C.asobject()], 2, True) + f = tvm.tir.ir_pass.ThreadSync(f, "shared") count = [0] def count_sync(op): if isinstance(op, tvm.tir.Call) and op.name == "tvm_storage_sync": count[0] += 1 - tvm.ir_pass.PostOrderVisit(f.body, count_sync) + tvm.tir.ir_pass.PostOrderVisit(f.body, count_sync) assert count[0] == 4 diff --git a/tests/python/unittest/test_pass_inject_vthread.py b/tests/python/unittest/test_pass_inject_vthread.py index 08e261b..8fbd829 100644 --- a/tests/python/unittest/test_pass_inject_vthread.py +++ b/tests/python/unittest/test_pass_inject_vthread.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_vthread(): dtype = 'int64' @@ -22,9 +23,9 @@ def test_vthread(): m = 4 nthread = 2 def get_vthread(name): - tx = tvm.thread_axis(name) - ty = tvm.thread_axis(name) - ib = tvm.ir_builder.create() + tx = te.thread_axis(name) + ty = te.thread_axis(name) + ib = tvm.tir.ir_builder.create() A = ib.pointer("float32", name="A") C = ib.pointer("float32", name="C") with ib.for_range(0, n) as i: @@ -32,16 +33,16 @@ def test_vthread(): ib.scope_attr(ty, "virtual_thread", nthread) B = ib.allocate("float32", m, name="B", scope="shared") B[i] = A[i * nthread + tx] - bbuffer = tvm.decl_buffer((m,), dtype=B.dtype, data=B.asobject()) - ib.emit(tvm.call_extern("int32", "Run", + bbuffer = tvm.tir.decl_buffer((m,), dtype=B.dtype, data=B.asobject()) + ib.emit(tvm.tir.call_extern("int32", "Run", bbuffer.access_ptr("r"), - tvm.call_pure_intrin("int32", "tvm_context_id"))) + tvm.tir.call_pure_intrin("int32", "tvm_context_id"))) C[i * nthread + tx] = B[i] + 1 return ib.get() - stmt = tvm.ir_pass.InjectVirtualThread(get_vthread("vthread")) + stmt = tvm.tir.ir_pass.InjectVirtualThread(get_vthread("vthread")) assert stmt.body.body.extents[0].value == 2 - stmt = tvm.ir_pass.InjectVirtualThread(get_vthread("cthread")) + stmt = tvm.tir.ir_pass.InjectVirtualThread(get_vthread("cthread")) assert len(stmt.body.body.extents) == 3 @@ -51,35 +52,35 @@ def test_vthread_extern(): m = 4 nthread = 2 def get_vthread(name): - tx = tvm.thread_axis(name) - ty = tvm.thread_axis(name) - ib = tvm.ir_builder.create() + tx = te.thread_axis(name) + ty = te.thread_axis(name) + ib = tvm.tir.ir_builder.create() with ib.for_range(0, n) as i: ib.scope_attr(tx, "virtual_thread", nthread) ib.scope_attr(ty, "virtual_thread", nthread) A = ib.allocate("float32", m, name="A", scope="shared") B = ib.allocate("float32", m, name="B", scope="shared") C = ib.allocate("float32", m, name="C", scope="shared") - cbuffer = tvm.decl_buffer((m,), dtype=C.dtype, data=C.asobject()) - abuffer = tvm.decl_buffer((m,), dtype=A.dtype, data=A.asobject()) - bbuffer = tvm.decl_buffer((m,), dtype=B.dtype, data=B.asobject()) + cbuffer = tvm.tir.decl_buffer((m,), dtype=C.dtype, data=C.asobject()) + abuffer = tvm.tir.decl_buffer((m,), dtype=A.dtype, data=A.asobject()) + bbuffer = tvm.tir.decl_buffer((m,), dtype=B.dtype, data=B.asobject()) A[tx] = tx + 1.0 B[ty] = ty + 1.0 - ib.emit(tvm.call_extern("int32", "Run", + ib.emit(tvm.tir.call_extern("int32", "Run", abuffer.access_ptr("r"), bbuffer.access_ptr("r"), cbuffer.access_ptr("rw"))) return ib.get() - stmt = tvm.ir_pass.InjectVirtualThread(get_vthread("vthread")) + stmt = tvm.tir.ir_pass.InjectVirtualThread(get_vthread("vthread")) assert stmt.body.body.extents[0].value == 2 assert stmt.body.body.body.body.body.body.extents[0].value == 2 assert len(stmt.body.body.body.body.body.body.extents) == 3 def test_vthread_if_then_else(): nthread = 2 - tx = tvm.thread_axis("vthread") - ib = tvm.ir_builder.create() + tx = te.thread_axis("vthread") + ib = tvm.tir.ir_builder.create() A = ib.pointer("float32", name="A") with ib.for_range(0, 100) as i: ib.scope_attr(tx, "virtual_thread", nthread) @@ -91,7 +92,7 @@ def test_vthread_if_then_else(): with ib.if_scope(i == 0): B[i] = A[i * nthread + tx] + 2 stmt = ib.get() - stmt = tvm.ir_pass.InjectVirtualThread(stmt) + stmt = tvm.tir.ir_pass.InjectVirtualThread(stmt) assert stmt.body.body.body[0].else_case != None assert stmt.body.body.body[1].else_case == None diff --git a/tests/python/unittest/test_pass_inline.py b/tests/python/unittest/test_pass_inline.py index 521a6f9..ad0591d 100644 --- a/tests/python/unittest/test_pass_inline.py +++ b/tests/python/unittest/test_pass_inline.py @@ -15,37 +15,38 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_inline(): - m = tvm.size_var('m') - A = tvm.placeholder((m,), name='A') - T = tvm.compute((m,), lambda i,: A[i] + 10, name='T') + m = te.size_var('m') + A = te.placeholder((m,), name='A') + T = te.compute((m,), lambda i,: A[i] + 10, name='T') stmt = tvm.tir.Evaluate(T[10] + 11 * T[100]) - stmt = tvm.ir_pass.Inline( + stmt = tvm.tir.ir_pass.Inline( stmt, T.op, [x.var for x in T.op.axis], T.op.body[0]) print(stmt) - assert(tvm.ir_pass.VerifySSA(stmt)) + assert(tvm.tir.ir_pass.VerifySSA(stmt)) try: # pass in int array(wrong argument type) # must raise an error - stmt = tvm.ir_pass.Inline( + stmt = tvm.tir.ir_pass.Inline( T.op, [1,2,3], T.op.body, stmt) assert False except tvm.error.TVMError: pass def test_inline2(): - m = tvm.size_var('m') - A = tvm.placeholder((m,), name='A') - T = tvm.compute((m,), lambda i,: A[i] + 10, name='T') - stmt = tvm.tir.Evaluate(tvm.exp(T[10]) + 11 * T[100]) - stmt = tvm.ir_pass.Inline( + m = te.size_var('m') + A = te.placeholder((m,), name='A') + T = te.compute((m,), lambda i,: A[i] + 10, name='T') + stmt = tvm.tir.Evaluate(te.exp(T[10]) + 11 * T[100]) + stmt = tvm.tir.ir_pass.Inline( stmt, T.op, [x.var for x in T.op.axis], T.op.body[0]) def check(op): if isinstance(op, tvm.tir.Call): assert op.func != T.op - tvm.ir_pass.PostOrderVisit(stmt, check) + tvm.tir.ir_pass.PostOrderVisit(stmt, check) if __name__ == "__main__": diff --git a/tests/python/unittest/test_pass_ir_transform.py b/tests/python/unittest/test_pass_ir_transform.py index b024a3c..cb7417a 100644 --- a/tests/python/unittest/test_pass_ir_transform.py +++ b/tests/python/unittest/test_pass_ir_transform.py @@ -15,28 +15,29 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_ir_transform(): - ib = tvm.ir_builder.create() - n = tvm.var("n") + ib = tvm.tir.ir_builder.create() + n = te.var("n") with ib.for_range(0, n, name="i") as i: with ib.for_range(0, 10, name="j") as j: - x = tvm.call_extern("int32", "TestA", i * 3 + j * 1) - ib.emit(tvm.call_extern("int32", "TestB", x)) - ib.emit(tvm.call_extern("int32", "TestC", x)) + x = tvm.tir.call_extern("int32", "TestA", i * 3 + j * 1) + ib.emit(tvm.tir.call_extern("int32", "TestB", x)) + ib.emit(tvm.tir.call_extern("int32", "TestC", x)) body = ib.get() def preorder(op): if op.name == "TestC": - return tvm.const(0, "int32") + return tvm.tir.const(0, "int32") return None def postorder(op): assert isinstance(op, tvm.tir.Call) if op.name == "TestA": - return tvm.call_extern("int32", "TestB", op.args[0] + 1) + return tvm.tir.call_extern("int32", "TestB", op.args[0] + 1) return op - body = tvm.ir_pass.IRTransform(body, preorder, postorder, ["Call"]) + body = tvm.tir.ir_pass.IRTransform(body, preorder, postorder, ["Call"]) stmt_list = tvm.tir.stmt_list(body.body.body) assert stmt_list[0].value.args[0].name == "TestB" assert stmt_list[1].value.value == 0 diff --git a/tests/python/unittest/test_pass_lift_attr_scope.py b/tests/python/unittest/test_pass_lift_attr_scope.py index 181f4ef..0831565 100644 --- a/tests/python/unittest/test_pass_lift_attr_scope.py +++ b/tests/python/unittest/test_pass_lift_attr_scope.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_coproc_lift(): - ib = tvm.ir_builder.create() - n = tvm.var("n") - cp = tvm.thread_axis((0, 1), "cop") + ib = tvm.tir.ir_builder.create() + n = te.var("n") + cp = te.thread_axis((0, 1), "cop") value = tvm.tir.StringImm("xxx") A = ib.allocate("float32", n, name="A", scope="global") @@ -34,11 +35,11 @@ def test_coproc_lift(): A[j] = A[j] + 3 A[j] = A[j] + 3 body = ib.get() - body = tvm.ir_pass.LiftAttrScope(body, "coproc_uop_scope") + body = tvm.tir.ir_pass.LiftAttrScope(body, "coproc_uop_scope") assert body.body.body.node == cp # only able to lift to the common pattern of the last two fors. - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() A = ib.allocate("float32", n, name="A", scope="global") with ib.for_range(0, n, name="i") as i: with ib.for_range(0, 10, name="j") as j: @@ -51,7 +52,7 @@ def test_coproc_lift(): A[i] = A[i] + 2 body = ib.get() - body = tvm.ir_pass.LiftAttrScope(body, "coproc_uop_scope") + body = tvm.tir.ir_pass.LiftAttrScope(body, "coproc_uop_scope") assert body.body.body.body[1].node == cp assert len(body.body.body.body) == 2 diff --git a/tests/python/unittest/test_pass_loop_partition.py b/tests/python/unittest/test_pass_loop_partition.py index e9df98e..7ec35e6 100644 --- a/tests/python/unittest/test_pass_loop_partition.py +++ b/tests/python/unittest/test_pass_loop_partition.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy def collect_visit(stmt, f): ret = [] - tvm.ir_pass.PostOrderVisit(stmt, lambda x : ret.append(f(x))) + tvm.tir.ir_pass.PostOrderVisit(stmt, lambda x : ret.append(f(x))) return ret def find_top_produce(stmt): @@ -27,65 +28,65 @@ def find_top_produce(stmt): if isinstance(x, tvm.tir.ProducerConsumer): ret.append(x) ret = [] - tvm.ir_pass.PostOrderVisit(stmt, lambda x : f(x, ret)) + tvm.tir.ir_pass.PostOrderVisit(stmt, lambda x : f(x, ret)) return ret[-1] def lower(sch, args): binds = {} arg_list = [] for x in args: - if isinstance(x, tvm.tensor.Tensor): - buf = tvm.decl_buffer(x.shape, dtype=x.dtype, name=x.name) + if isinstance(x, te.tensor.Tensor): + buf = tvm.tir.decl_buffer(x.shape, dtype=x.dtype, name=x.name) assert x not in binds binds[x] = buf arg_list.append(buf) else: raise ValueError("args must be Tensor, Buffer or Var") sch = sch.normalize() - bounds = tvm.schedule.InferBound(sch) - stmt = tvm.schedule.ScheduleOps(sch, bounds) - stmt = tvm.ir_pass.LoopPartition(stmt, False) - stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 64) - stmt = tvm.ir_pass.CanonicalSimplify(stmt) - stmt = tvm.ir_pass.VectorizeLoop(stmt) - stmt = tvm.ir_pass.Simplify(stmt) + bounds = tvm.te.schedule.InferBound(sch) + stmt = tvm.te.schedule.ScheduleOps(sch, bounds) + stmt = tvm.tir.ir_pass.LoopPartition(stmt, False) + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, binds, 64) + stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt) + stmt = tvm.tir.ir_pass.VectorizeLoop(stmt) + stmt = tvm.tir.ir_pass.Simplify(stmt) return stmt def test_basic(): - n = tvm.size_var('n') - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((n, ), name='B') + n = te.size_var('n') + A = te.placeholder((n, ), name='A') + B = te.placeholder((n, ), name='B') - T = tvm.compute((n, ), lambda i: A[i]+B[i]) - s = tvm.create_schedule(T.op) + T = te.compute((n, ), lambda i: A[i]+B[i]) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=4) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - stmt = tvm.ir_pass.LoopPartition(stmt, False) - stmt = tvm.ir_pass.Simplify(stmt) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + stmt = tvm.tir.ir_pass.LoopPartition(stmt, False) + stmt = tvm.tir.ir_pass.Simplify(stmt) assert('if' not in str(stmt.body.body.body[0])) assert('if' in str(stmt.body.body.body[1])) def test_const_loop(): n = 21 - A = tvm.placeholder((n, ), name='A') - B = tvm.placeholder((n, ), name='B') + A = te.placeholder((n, ), name='A') + B = te.placeholder((n, ), name='B') - T = tvm.compute((n, ), lambda i: A[i]+B[i]) - s = tvm.create_schedule(T.op) + T = te.compute((n, ), lambda i: A[i]+B[i]) + s = te.create_schedule(T.op) xo, xi = s[T].split(T.op.axis[0], factor=4) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - stmt = tvm.ir_pass.LoopPartition(stmt, True) - stmt = tvm.ir_pass.Simplify(stmt) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + stmt = tvm.tir.ir_pass.LoopPartition(stmt, True) + stmt = tvm.tir.ir_pass.Simplify(stmt) assert('if' not in str(stmt.body.body.body[0])) def test_multi_loop(): - ib = tvm.ir_builder.create() - m = tvm.size_var('m') - n = tvm.size_var('n') + ib = tvm.tir.ir_builder.create() + m = te.size_var('m') + n = te.size_var('n') with ib.for_range(0, 4, "i") as i: with ib.for_range(0, n, "j") as j: with ib.for_range(0, m, "k") as k: @@ -94,14 +95,14 @@ def test_multi_loop(): with ib.else_scope(): ib.emit(tvm.tir.Evaluate(n)) stmt = ib.get() - stmt = tvm.ir_pass.LoopPartition(stmt, False) - stmt = tvm.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.LoopPartition(stmt, False) + stmt = tvm.tir.ir_pass.Simplify(stmt) assert(not any(collect_visit(stmt.body[0], lambda x: isinstance(x, tvm.tir.IfThenElse)))) def test_multi_if(): - ib = tvm.ir_builder.create() - m = tvm.size_var('m') - n = tvm.size_var('n') + ib = tvm.tir.ir_builder.create() + m = te.size_var('m') + n = te.size_var('n') with ib.for_range(0, 4, 'i') as i: with ib.for_range(0, n, 'j') as j: with ib.for_range(0, m, 'k') as k: @@ -114,45 +115,45 @@ def test_multi_if(): with ib.else_scope(): ib.emit(tvm.tir.Evaluate(n)) stmt = ib.get() - stmt = tvm.ir_pass.LoopPartition(stmt, False) - stmt = tvm.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.LoopPartition(stmt, False) + stmt = tvm.tir.ir_pass.Simplify(stmt) assert('if' not in str(stmt.body[0])) def test_thread_axis(): - m = tvm.size_var('m') - l = tvm.size_var('l') - A = tvm.placeholder((m, l), name='A') - B = tvm.compute((m, l), lambda i, j: A[i, j] + 3, name='B') - s = tvm.create_schedule(B.op) + m = te.size_var('m') + l = te.size_var('l') + A = te.placeholder((m, l), name='A') + B = te.compute((m, l), lambda i, j: A[i, j] + 3, name='B') + s = te.create_schedule(B.op) s[B].set_scope("shared") num_thread = 16 xo, xi = s[B].split(B.op.axis[0], 32) xi0, xi1 = s[B].split(xi, nparts=num_thread) - s[B].bind(xi0, tvm.thread_axis("threadIdx.x")) + s[B].bind(xi0, te.thread_axis("threadIdx.x")) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - stmt = tvm.ir_pass.LoopPartition(stmt, False) - stmt = tvm.ir_pass.Simplify(stmt) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + stmt = tvm.tir.ir_pass.LoopPartition(stmt, False) + stmt = tvm.tir.ir_pass.Simplify(stmt) assert('if' not in str(stmt.body.body.body[0])) def test_vectorize(): - n = tvm.size_var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - bias = tvm.size_var("bias", dtype="float32") - scale = tvm.size_var("scale", dtype="float32") - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i) * scale + bias, name='C') + n = te.size_var('n') + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + bias = te.size_var("bias", dtype="float32") + scale = te.size_var("scale", dtype="float32") + C = te.compute(A.shape, lambda *i: A(*i) + B(*i) * scale + bias, name='C') # schedule - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) # create iter var and assign them tags. num_thread = 32 bx, x = s[C].split(C.op.axis[0], factor=num_thread*4) tx, x = s[C].split(x, nparts=num_thread) _, x = s[C].split(x, factor=4) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) s[C].vectorize(x) stmt = lower(s, [A, B]) body = stmt.body.body.body.body.body @@ -160,135 +161,135 @@ def test_vectorize(): assert(any(collect_visit(body.then_case, lambda x: isinstance(x, tvm.tir.Ramp)))) def test_condition(): - ib = tvm.ir_builder.create() - m = tvm.size_var('m') - n = tvm.size_var('n') - with ib.for_range(0, tvm.truncdiv(n+3,4), 'i') as i: + ib = tvm.tir.ir_builder.create() + m = te.size_var('m') + n = te.size_var('n') + with ib.for_range(0, tvm.tir.truncdiv(n+3,4), 'i') as i: with ib.for_range(0, 4, 'j') as j: ib.emit(tvm.tir.Evaluate( tvm.tir.Select(ib.likely(i*4+j 0)): with ib.if_scope(ib.likely(ow < 15)): - out[ow] = tvm.max(out[ow], data[ow + kw - 1]) + out[ow] = tvm.te.max(out[ow], data[ow + kw - 1]) with ib.for_range(0, 16, 'ow') as ow: with ib.for_range(0, 3, 'kw') as kw: with ib.if_scope(ib.likely(ow < 1)): with ib.if_scope(ib.likely(kw > 0)): - out[ow] = tvm.max(out[ow], data[ow + kw - 1]) + out[ow] = tvm.te.max(out[ow], data[ow + kw - 1]) with ib.for_range(0, 16, 'ow') as ow: with ib.for_range(0, 3, 'kw') as kw: with ib.if_scope(ib.likely(ow > 14)): with ib.if_scope(ib.likely(kw < 2)): - out[ow] = tvm.max(out[ow], data[ow + kw - 1]) + out[ow] = tvm.te.max(out[ow], data[ow + kw - 1]) stmt = ib.get() - stmt = tvm.ir_pass.LoopPartition(stmt, True) - stmt = tvm.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.LoopPartition(stmt, True) + stmt = tvm.tir.ir_pass.Simplify(stmt) assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse)))) def test_cce_loop_1(): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() dtype = 'float16' n = 514 m = 514 - _A = tvm.placeholder((n*m,), name = 'A') - Ab = tvm.decl_buffer((n*m,), dtype, name="A") + _A = te.placeholder((n*m,), name = 'A') + Ab = tvm.tir.decl_buffer((n*m,), dtype, name="A") A = ib.buffer_ptr(Ab) - _B = tvm.placeholder((n*m,), name = 'B') - Bb = tvm.decl_buffer((n*m,), dtype, name="B") + _B = te.placeholder((n*m,), name = 'B') + Bb = tvm.tir.decl_buffer((n*m,), dtype, name="B") B = ib.buffer_ptr(Bb) #for i in 0 to n-1: with ib.for_range(0, 11, name="i") as i: @@ -296,12 +297,12 @@ def test_cce_loop_1(): with ib.if_scope(ib.likely(((i*160) + j) < 1600)): A[(i+1)*m+j+1] = B[(i)*m+j+1] + B[(i+1)*m+j+1] + B[(i+2)*m+j+1] stmt = ib.get() - stmt = tvm.ir_pass.LoopPartition(stmt, True) - stmt = tvm.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.LoopPartition(stmt, True) + stmt = tvm.tir.ir_pass.Simplify(stmt) assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse)))) def test_cce_loop_2(): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() len = 112 tile = 32 loop = (len + tile - 1) // tile @@ -309,19 +310,19 @@ def test_cce_loop_2(): head = i * tile with ib.if_scope(ib.likely(head + tile > len)): tail = len - ib.emit(tvm.call_extern('float32', "cce_intrisic", head, tail)) + ib.emit(tvm.tir.call_extern('float32', "cce_intrisic", head, tail)) with ib.else_scope(): tail = head + tile - ib.emit(tvm.call_extern('float32', "cce_intrisic", head, tail)) + ib.emit(tvm.tir.call_extern('float32', "cce_intrisic", head, tail)) stmt = ib.get() - stmt = tvm.ir_pass.LoopPartition(stmt, True) - stmt = tvm.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.LoopPartition(stmt, True) + stmt = tvm.tir.ir_pass.Simplify(stmt) assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse)))) def test_cce_loop_3(): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() loop1 = 4 loop2 = 9998 tile = 39991 @@ -330,11 +331,11 @@ def test_cce_loop_3(): head1 = i head2 = j with ib.if_scope(ib.likely(head1*loop1 + head2 < tile)): - ib.emit(tvm.call_extern('float16',"cce_intrisic",head1)) + ib.emit(tvm.tir.call_extern('float16',"cce_intrisic",head1)) stmt = ib.get() - stmt = tvm.ir_pass.LoopPartition(stmt,True) - stmt = tvm.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.LoopPartition(stmt,True) + stmt = tvm.tir.ir_pass.Simplify(stmt) assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse)))) def test_conv_tiling(): @@ -345,33 +346,33 @@ def test_conv_tiling(): batch_size = 1 in_height = in_width = 64 out_height = out_width = in_height - kernel_height + 1 - data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data') - kernel = tvm.placeholder((kernel_height, kernel_width, in_channel, + data = te.placeholder((batch_size, in_channel, in_height, in_width), name='data') + kernel = te.placeholder((kernel_height, kernel_width, in_channel, out_channel), name='kernel') - ic = tvm.reduce_axis((0, in_channel), name='ic') - kh = tvm.reduce_axis((0, kernel_height), name='kh') - kw = tvm.reduce_axis((0, kernel_width), name='kw') - conv = tvm.compute((batch_size, out_channel, out_height, out_width), - lambda n, oc, oh, ow: tvm.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] * + ic = te.reduce_axis((0, in_channel), name='ic') + kh = te.reduce_axis((0, kernel_height), name='kh') + kw = te.reduce_axis((0, kernel_width), name='kw') + conv = te.compute((batch_size, out_channel, out_height, out_width), + lambda n, oc, oh, ow: te.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] * kernel[kh, kw, ic, oc], axis=[ic, kh, kw]), name="conv2d") - s = tvm.create_schedule(conv.op) + s = te.create_schedule(conv.op) n, oc, oh, ow = conv.op.axis oho, owo, ohi, owi = s[conv].tile(oh, ow, 16, 16) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) - stmt = tvm.ir_pass.LoopPartition(stmt, True) - stmt = tvm.ir_pass.Simplify(stmt) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + stmt = tvm.tir.ir_pass.LoopPartition(stmt, True) + stmt = tvm.tir.ir_pass.Simplify(stmt) assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.tir.IfThenElse)))) def test_multilevel_splitting_with_indivisble_factors(): import topi - A = tvm.placeholder((130,), dtype="float32") + A = te.placeholder((130,), dtype="float32") B = topi.nn.relu(A) - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) (y,) = s[B].op.axis (yo, yi) = s[B].split(y, factor=8) (yoo, yoi) = s[B].split(yo, factor=16) @@ -379,7 +380,7 @@ def test_multilevel_splitting_with_indivisble_factors(): s[B].unroll(yi) ## But this does the right thing. - with tvm.build_config(partition_const_loop=True): + with tvm.target.build_config(partition_const_loop=True): lowered_body = tvm.lower(s, [A, B]).body def visit_stmt(op): return(isinstance(op, tvm.tir.Max)) @@ -390,17 +391,17 @@ def test_multilevel_splitting_with_indivisble_factors(): def test_double_splitting_with_indivisible_factors(): m = 48 dtype="float32" - A = tvm.placeholder((m,), name='A', dtype=dtype) - C = tvm.compute((m,), lambda i: A[i], name='C') - D = tvm.compute((m,), lambda i: C[i], name='D') + A = te.placeholder((m,), name='A', dtype=dtype) + C = te.compute((m,), lambda i: A[i], name='C') + D = te.compute((m,), lambda i: C[i], name='D') - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) co, ci = s[C].split(C.op.axis[0], factor=10) do, di = s[D].split(D.op.axis[0], 32) s[C].compute_at(s[D], do) target = 'llvm' - with tvm.build_config(partition_const_loop=True): + with tvm.target.build_config(partition_const_loop=True): f = tvm.lower(s, [A, C, D], name="fadd1", simple_mode=False) func = tvm.build(f, target=target) @@ -420,30 +421,30 @@ def test_double_splitting_with_indivisible_factors(): def test_simple_rfactor(): K = 16*4+4 - k = tvm.reduce_axis((0, K), 'k') + k = te.reduce_axis((0, K), 'k') - A = tvm.placeholder((1, K), name='A') + A = te.placeholder((1, K), name='A') - B = tvm.compute( (1,), lambda b: - tvm.sum(A[b, k], axis=k), + B = te.compute( (1,), lambda b: + te.sum(A[b, k], axis=k), name='B' ) - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) ko, _ = s[B].split(s[B].op.reduce_axis[0], 16) BF = s.rfactor(B, ko, 0) s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) - stmt1 = tvm.schedule.ScheduleOps(s, bounds) - stmt1 = tvm.ir_pass.Simplify(stmt1) + stmt1 = tvm.te.schedule.ScheduleOps(s, bounds) + stmt1 = tvm.tir.ir_pass.Simplify(stmt1) - stmt2 = tvm.ir_pass.LoopPartition(stmt1, True) - stmt2 = tvm.ir_pass.Simplify(stmt2) + stmt2 = tvm.tir.ir_pass.LoopPartition(stmt1, True) + stmt2 = tvm.tir.ir_pass.Simplify(stmt2) #make sure loop partition actually did something - assert not tvm.ir_pass.Equal(stmt1.body, stmt2.body) + assert not tvm.tir.ir_pass.Equal(stmt1.body, stmt2.body) if __name__ == "__main__": diff --git a/tests/python/unittest/test_pass_lower_intrin.py b/tests/python/unittest/test_pass_lower_intrin.py index 1e54f38..f36b4a5 100644 --- a/tests/python/unittest/test_pass_lower_intrin.py +++ b/tests/python/unittest/test_pass_lower_intrin.py @@ -15,21 +15,22 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def lower_intrin(stmt): """wrapper to call transformation in stmt""" lower_expr = isinstance(stmt, tvm.tir.PrimExpr) stmt = tvm.tir.Evaluate(stmt) if lower_expr else stmt - stmt = tvm.ir_pass.CanonicalSimplify(stmt) - stmt = tvm.ir_pass._LowerIntrinStmt(stmt, "llvm") + stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt) + stmt = tvm.tir.ir_pass._LowerIntrinStmt(stmt, "llvm") return stmt.value if lower_expr else stmt.body def check_value(expr, vx, vy, data, fref): n = len(data) - A = tvm.placeholder((n,), name="A", dtype=expr.dtype) - B = tvm.placeholder((n,), name="B", dtype=expr.dtype) + A = te.placeholder((n,), name="A", dtype=expr.dtype) + B = te.placeholder((n,), name="B", dtype=expr.dtype) def make_binds(i): x = expr @@ -37,8 +38,8 @@ def check_value(expr, vx, vy, data, fref): x = tvm.tir.Let(vy, B[i], x) return x - C = tvm.compute((n,), make_binds) - s = tvm.create_schedule([C.op]) + C = te.compute((n,), make_binds) + s = te.create_schedule([C.op]) if not tvm.runtime.enabled("llvm"): return @@ -65,43 +66,43 @@ def get_ref_data(): def test_lower_floordiv(): data = get_ref_data() for dtype in ["int32", "int64", "int16"]: - x = tvm.var("x", dtype=dtype) - y = tvm.var("y", dtype=dtype) - zero = tvm.const(0, dtype) + x = te.var("x", dtype=dtype) + y = te.var("y", dtype=dtype) + zero = tvm.tir.const(0, dtype) # no constraints - res = lower_intrin(tvm.floordiv(x, y)) + res = lower_intrin(tvm.te.floordiv(x, y)) check_value(res, x, y, data, lambda a, b: a // b) # rhs >= 0 - res = lower_intrin(tvm.tir.Select(y >= 0, tvm.floordiv(x, y), zero)) + res = lower_intrin(tvm.tir.Select(y >= 0, tvm.te.floordiv(x, y), zero)) check_value(res, x, y, data, lambda a, b: a // b if b > 0 else 0) # involves max - res = lower_intrin(tvm.tir.Select(y >= 0, tvm.max(tvm.floordiv(x, y), zero), zero)) + res = lower_intrin(tvm.tir.Select(y >= 0, tvm.te.max(tvm.te.floordiv(x, y), zero), zero)) check_value(res, x, y, data, lambda a, b: max(a // b, 0) if b > 0 else 0) # lhs >= 0 - res = lower_intrin(tvm.tir.Select(tvm.all(y >= 0, x >= 0), tvm.floordiv(x, y), zero)) + res = lower_intrin(tvm.tir.Select(tvm.tir.all(y >= 0, x >= 0), tvm.te.floordiv(x, y), zero)) check_value(res, x, y, data, lambda a, b: a // b if b > 0 and a >= 0 else 0) # const power of two - res = lower_intrin(tvm.floordiv(x, tvm.const(8, dtype=dtype))) + res = lower_intrin(tvm.te.floordiv(x, tvm.tir.const(8, dtype=dtype))) check_value(res, x, y, [(a, b) for a, b in data if b == 8], lambda a, b: a // b) def test_lower_floormod(): data = get_ref_data() for dtype in ["int32", "int64", "int16"]: - x = tvm.var("x", dtype=dtype) - y = tvm.var("y", dtype=dtype) - zero = tvm.const(0, dtype) + x = te.var("x", dtype=dtype) + y = te.var("y", dtype=dtype) + zero = tvm.tir.const(0, dtype) # no constraints - res = lower_intrin(tvm.floormod(x, y)) + res = lower_intrin(tvm.te.floormod(x, y)) check_value(res, x, y, data, lambda a, b: a % b) # rhs >= 0 - res = lower_intrin(tvm.tir.Select(y >= 0, tvm.floormod(x, y), zero)) + res = lower_intrin(tvm.tir.Select(y >= 0, tvm.te.floormod(x, y), zero)) check_value(res, x, y, data, lambda a, b: a % b if b > 0 else 0) # lhs >= 0 - res = lower_intrin(tvm.tir.Select(tvm.all(y >= 0, x >= 0), tvm.floormod(x, y), zero)) + res = lower_intrin(tvm.tir.Select(tvm.tir.all(y >= 0, x >= 0), tvm.te.floormod(x, y), zero)) check_value(res, x, y, data, lambda a, b: a % b if b > 0 and a >= 0 else 0) # const power of two - res = lower_intrin(tvm.floormod(x, tvm.const(8, dtype=dtype))) + res = lower_intrin(tvm.te.floormod(x, tvm.tir.const(8, dtype=dtype))) check_value(res, x, y, [(a, b) for a, b in data if b == 8], lambda a, b: a % b) diff --git a/tests/python/unittest/test_pass_lower_warp_memory.py b/tests/python/unittest/test_pass_lower_warp_memory.py index 4f09271..266ca7e 100644 --- a/tests/python/unittest/test_pass_lower_warp_memory.py +++ b/tests/python/unittest/test_pass_lower_warp_memory.py @@ -15,26 +15,27 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_lower_warp_mem(): m = 128 - A = tvm.placeholder((m,), name='A') - B = tvm.compute((m,), lambda i: A[i] + 3, name='B') + A = te.placeholder((m,), name='A') + B = te.compute((m,), lambda i: A[i] + 3, name='B') - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) AA = s.cache_read(A, "warp", [B]) xo, xi = s[B].split(B.op.axis[0], 32) xi0, xi1 = s[B].split(xi, factor=16) - tx = tvm.thread_axis("threadIdx.x") + tx = te.thread_axis("threadIdx.x") s[B].bind(xi1, tx) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) s[AA].compute_at(s[B], xo) xo, xi = s[AA].split(s[AA].op.axis[0], 16) s[AA].bind(xi, tx) f = tvm.lower(s, [A, B]) - fhost, fdevice = tvm.ir_pass.SplitHostDevice(f) - fdevice = tvm.ir_pass.LowerWarpMemory(fdevice, 16) + fhost, fdevice = tvm.tir.ir_pass.SplitHostDevice(f) + fdevice = tvm.tir.ir_pass.LowerWarpMemory(fdevice, 16) assert(fdevice.body.body.value.value == "local") assert(fdevice.body.body.body.extents[0].value == 2) diff --git a/tests/python/unittest/test_pass_makeapi.py b/tests/python/unittest/test_pass_makeapi.py index 34f32ef..6b28ef6 100644 --- a/tests/python/unittest/test_pass_makeapi.py +++ b/tests/python/unittest/test_pass_makeapi.py @@ -15,26 +15,27 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy def test_makeapi(): """Not yet working, mock design""" - n = tvm.size_var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') - s = tvm.create_schedule(C.op) + n = te.size_var('n') + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = te.create_schedule(C.op) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') - Cb = tvm.decl_buffer(C.shape, C.dtype, name='C') - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, C:Cb}, 64) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') + Cb = tvm.tir.decl_buffer(C.shape, C.dtype, name='C') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, C:Cb}, 64) num_unpacked_args = 2 - f = tvm.ir_pass.MakeAPI( + f = tvm.tir.ir_pass.MakeAPI( stmt, "myadd", [n, Ab, Bb, Cb], num_unpacked_args, True) assert(f.handle_data_type[Ab.data].dtype == Ab.dtype) assert(len(f.args) == 7) diff --git a/tests/python/unittest/test_pass_remove_no_op.py b/tests/python/unittest/test_pass_remove_no_op.py index a3927f7..c9ecfbe 100644 --- a/tests/python/unittest/test_pass_remove_no_op.py +++ b/tests/python/unittest/test_pass_remove_no_op.py @@ -15,18 +15,19 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def nop(): return tvm.tir.Evaluate(0) def test_remove_no_op(): - i = tvm.var('i') - j = tvm.var('j') - k = tvm.var('k') - m = tvm.var('m') - n = tvm.var('n') + i = te.var('i') + j = te.var('j') + k = te.var('k') + m = te.var('m') + n = te.var('n') dtype = 'int64' - Ab = tvm.decl_buffer((n, ), dtype) + Ab = tvm.tir.decl_buffer((n, ), dtype) stmt = tvm.tir.For( i, 0, 4, 0, 0, tvm.tir.For( @@ -35,16 +36,16 @@ def test_remove_no_op(): k, 0, m, 0, 0, tvm.tir.IfThenElse( (i*m+j+k < n), tvm.tir.Evaluate(m), tvm.tir.Evaluate(n))))) - ret = tvm.ir_pass.RemoveNoOp(stmt) + ret = tvm.tir.ir_pass.RemoveNoOp(stmt) assert(isinstance(ret, tvm.tir.Evaluate)) store = tvm.tir.Store(Ab.data, tvm.tir.Load(dtype, Ab.data, i) + 1, i + 1) stmt2 = tvm.tir.SeqStmt([nop(), tvm.tir.SeqStmt([store, nop()])]) - assert(tvm.ir_pass.RemoveNoOp(stmt2) == store) + assert(tvm.tir.ir_pass.RemoveNoOp(stmt2) == store) # remove zero extent loop stmt3 = tvm.tir.For(i, 0, 0, 0, 0, store) - ret = tvm.ir_pass.RemoveNoOp(stmt3) + ret = tvm.tir.ir_pass.RemoveNoOp(stmt3) assert(isinstance(ret, tvm.tir.Evaluate)) diff --git a/tests/python/unittest/test_pass_rewrite_for_tensor_core.py b/tests/python/unittest/test_pass_rewrite_for_tensor_core.py index cc99a25..977dfc3 100644 --- a/tests/python/unittest/test_pass_rewrite_for_tensor_core.py +++ b/tests/python/unittest/test_pass_rewrite_for_tensor_core.py @@ -15,16 +15,17 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import topi import numpy as np from tvm.contrib import nvcc def tensor_core_matmul(warp_tile_m=16, m=64, n=32, l=96): - A = tvm.placeholder((n, l), name='A', dtype='float16') - B = tvm.placeholder((l, m), name='B', dtype='float16') - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k].astype('float32') * B[k, j].astype('float32'), axis=k)) - s = tvm.create_schedule(C.op) + A = te.placeholder((n, l), name='A', dtype='float16') + B = te.placeholder((l, m), name='B', dtype='float16') + k = te.reduce_axis((0, l), name='k') + C = te.compute((n, m), lambda i, j: te.sum(A[i, k].astype('float32') * B[k, j].astype('float32'), axis=k)) + s = te.create_schedule(C.op) y, x = s[C].op.axis k = s[C].op.reduce_axis[0] @@ -57,12 +58,12 @@ def tensor_core_matmul(warp_tile_m=16, m=64, n=32, l=96): kl, ki = s[CL].split(ki, tile_k) s[C].reorder(yo, xo, tz, ty, tx, yi, xi) - s[C].bind(yo, tvm.thread_axis("blockIdx.y")) - s[C].bind(xo, tvm.thread_axis("blockIdx.x")) - s[C].bind(ty, tvm.thread_axis("threadIdx.y")) - s[C].bind(tz, tvm.thread_axis("threadIdx.z")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) - s[C].bind(vy, tvm.thread_axis((0, vthread), "vthread", name="vy")) + s[C].bind(yo, te.thread_axis("blockIdx.y")) + s[C].bind(xo, te.thread_axis("blockIdx.x")) + s[C].bind(ty, te.thread_axis("threadIdx.y")) + s[C].bind(tz, te.thread_axis("threadIdx.z")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) + s[C].bind(vy, te.thread_axis((0, vthread), "vthread", name="vy")) s[CL].compute_at(s[C], tx) yo, xo = CL.op.axis s[CL].reorder(ko, kl, ki, yo, xo) @@ -73,9 +74,9 @@ def tensor_core_matmul(warp_tile_m=16, m=64, n=32, l=96): tx, vec = s[AA].split(tx, factor=v) fused = s[AA].fuse(s[AA].op.axis[0], xo) _, ty = s[AA].split(fused, factor=by) - s[AA].bind(ty, tvm.thread_axis("threadIdx.y")) - s[AA].bind(tz, tvm.thread_axis("threadIdx.z")) - s[AA].bind(tx, tvm.thread_axis("threadIdx.x")) + s[AA].bind(ty, te.thread_axis("threadIdx.y")) + s[AA].bind(tz, te.thread_axis("threadIdx.z")) + s[AA].bind(tx, te.thread_axis("threadIdx.x")) s[AA].vectorize(vec) s[BB].compute_at(s[CL], ko) @@ -84,9 +85,9 @@ def tensor_core_matmul(warp_tile_m=16, m=64, n=32, l=96): tx, vec = s[BB].split(tx, factor=v) fused = s[BB].fuse(s[BB].op.axis[0], xo) _, ty = s[BB].split(fused, factor=by) - s[BB].bind(ty, tvm.thread_axis("threadIdx.y")) - s[BB].bind(tz, tvm.thread_axis("threadIdx.z")) - s[BB].bind(tx, tvm.thread_axis("threadIdx.x")) + s[BB].bind(ty, te.thread_axis("threadIdx.y")) + s[BB].bind(tz, te.thread_axis("threadIdx.z")) + s[BB].bind(tx, te.thread_axis("threadIdx.x")) s[BB].vectorize(vec) s[AL].compute_at(s[CL], kl) @@ -111,11 +112,11 @@ def tensor_core_matmul(warp_tile_m=16, m=64, n=32, l=96): np.testing.assert_allclose(c_np, c.asnumpy(), rtol=1e-3) def tensor_core_batch_matmul(warp_tile_m=16, m=64, n=32, l=96, batch=2): - A = tvm.placeholder((batch, n, l), name='A', dtype='float16') - B = tvm.placeholder((batch, l, m), name='B', dtype='float16') - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute((batch, n, m), lambda b, i, j: tvm.sum((A[b, i, k] * B[b, k, j]).astype('float32'), axis=k)) - s = tvm.create_schedule(C.op) + A = te.placeholder((batch, n, l), name='A', dtype='float16') + B = te.placeholder((batch, l, m), name='B', dtype='float16') + k = te.reduce_axis((0, l), name='k') + C = te.compute((batch, n, m), lambda b, i, j: te.sum((A[b, i, k] * B[b, k, j]).astype('float32'), axis=k)) + s = te.create_schedule(C.op) z, y, x = s[C].op.axis k = s[C].op.reduce_axis[0] @@ -148,13 +149,13 @@ def tensor_core_batch_matmul(warp_tile_m=16, m=64, n=32, l=96, batch=2): kl, ki = s[CL].split(ki, tile_k) s[C].reorder(z, yo, xo, tz, ty, tx, yi, xi) - s[C].bind(z, tvm.thread_axis("blockIdx.z")) - s[C].bind(yo, tvm.thread_axis("blockIdx.y")) - s[C].bind(xo, tvm.thread_axis("blockIdx.x")) - s[C].bind(ty, tvm.thread_axis("threadIdx.y")) - s[C].bind(tz, tvm.thread_axis("threadIdx.z")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) - s[C].bind(vy, tvm.thread_axis((0, vthread), "vthread", name="vy")) + s[C].bind(z, te.thread_axis("blockIdx.z")) + s[C].bind(yo, te.thread_axis("blockIdx.y")) + s[C].bind(xo, te.thread_axis("blockIdx.x")) + s[C].bind(ty, te.thread_axis("threadIdx.y")) + s[C].bind(tz, te.thread_axis("threadIdx.z")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) + s[C].bind(vy, te.thread_axis((0, vthread), "vthread", name="vy")) s[CL].compute_at(s[C], tx) zo, yo, xo = CL.op.axis s[CL].reorder(ko, kl, ki, zo, yo, xo) @@ -165,9 +166,9 @@ def tensor_core_batch_matmul(warp_tile_m=16, m=64, n=32, l=96, batch=2): tx, vec = s[AA].split(tx, factor=v) fused = s[AA].fuse(s[AA].op.axis[1], xo) _, ty = s[AA].split(fused, factor=by) - s[AA].bind(ty, tvm.thread_axis("threadIdx.y")) - s[AA].bind(tz, tvm.thread_axis("threadIdx.z")) - s[AA].bind(tx, tvm.thread_axis("threadIdx.x")) + s[AA].bind(ty, te.thread_axis("threadIdx.y")) + s[AA].bind(tz, te.thread_axis("threadIdx.z")) + s[AA].bind(tx, te.thread_axis("threadIdx.x")) s[AA].vectorize(vec) s[BB].compute_at(s[CL], ko) @@ -176,9 +177,9 @@ def tensor_core_batch_matmul(warp_tile_m=16, m=64, n=32, l=96, batch=2): tx, vec = s[BB].split(tx, factor=v) fused = s[BB].fuse(s[BB].op.axis[1], xo) _, ty = s[BB].split(fused, factor=by) - s[BB].bind(ty, tvm.thread_axis("threadIdx.y")) - s[BB].bind(tz, tvm.thread_axis("threadIdx.z")) - s[BB].bind(tx, tvm.thread_axis("threadIdx.x")) + s[BB].bind(ty, te.thread_axis("threadIdx.y")) + s[BB].bind(tz, te.thread_axis("threadIdx.z")) + s[BB].bind(tx, te.thread_axis("threadIdx.x")) s[BB].vectorize(vec) s[AL].compute_at(s[CL], kl) diff --git a/tests/python/unittest/test_pass_rewrite_unsafe_select.py b/tests/python/unittest/test_pass_rewrite_unsafe_select.py index dc6ae82..f1e411e 100644 --- a/tests/python/unittest/test_pass_rewrite_unsafe_select.py +++ b/tests/python/unittest/test_pass_rewrite_unsafe_select.py @@ -15,21 +15,22 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_rewrite_Select(): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() A = ib.allocate("float32", 100, name="A", scope="global") - i = tvm.var("i") + i = te.var("i") y = tvm.tir.Select(i > 1, A[i-1], 1.0) - yy = tvm.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(y)).value + yy = tvm.tir.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(y)).value z = tvm.tir.Select( tvm.tir.Select(i > 1, A[i-1], 1.0) > 0.0, A[i], 0.1) - zz = tvm.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(z)).value + zz = tvm.tir.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(z)).value - a = tvm.tir.Select(tvm.floordiv(i, 4) > 10, y, z) - aa = tvm.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(a)).value + a = tvm.tir.Select(tvm.te.floordiv(i, 4) > 10, y, z) + aa = tvm.tir.ir_pass.RewriteUnsafeSelect(tvm.tir.Evaluate(a)).value assert yy.name == "tvm_if_then_else" assert zz.name == "tvm_if_then_else" assert isinstance(aa, tvm.tir.Select) diff --git a/tests/python/unittest/test_pass_split_host_device.py b/tests/python/unittest/test_pass_split_host_device.py index e8858b8..09f7740 100644 --- a/tests/python/unittest/test_pass_split_host_device.py +++ b/tests/python/unittest/test_pass_split_host_device.py @@ -16,13 +16,14 @@ # under the License. import pytest import tvm +from tvm import te @pytest.mark.xfail def test_loop_dependent_allocate(): - N = tvm.size_var("N") - A = tvm.placeholder((2*N,), "float32", "A") - C = tvm.compute((N, ), lambda i: A[2*i] + A[i+1], name='C') - s = tvm.create_schedule(C.op) + N = te.size_var("N") + A = te.placeholder((2*N,), "float32", "A") + C = te.compute((N, ), lambda i: A[2*i] + A[i+1], name='C') + s = te.create_schedule(C.op) AA = s.cache_read(A, "local", [C]) s[AA].compute_at(s[C], s[C].op.axis[0]) # this line should fail due to IRUseDefAnalysis sees an allocate statement diff --git a/tests/python/unittest/test_pass_storage_flatten.py b/tests/python/unittest/test_pass_storage_flatten.py index 47a43c7..e8a78cb 100644 --- a/tests/python/unittest/test_pass_storage_flatten.py +++ b/tests/python/unittest/test_pass_storage_flatten.py @@ -15,35 +15,36 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_flatten2(): - m = tvm.size_var('m') - l = tvm.size_var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + m = te.size_var('m') + l = te.size_var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + s = te.create_schedule(A2.op) xo, xi = s[A2].split(A2.op.axis[0], 8) s[A1].compute_at(s[A2], xo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2') - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64) - stmt = tvm.ir_pass.Simplify(stmt) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64) + stmt = tvm.tir.ir_pass.Simplify(stmt) def test_flatten_prefetch(): - A = tvm.placeholder((25, 100, 4), name = 'A') - _A= tvm.decl_buffer(A.shape, A.dtype, name = 'A'); - i = tvm.size_var('i') - j = tvm.size_var('j') + A = te.placeholder((25, 100, 4), name = 'A') + _A= tvm.tir.decl_buffer(A.shape, A.dtype, name = 'A'); + i = te.size_var('i') + j = te.size_var('j') region = [tvm.ir.Range.make_by_min_extent(i[0], i[1]) for i in [(i, 2), (j, 8), (0, 4)]] stmt = tvm.tir.Prefetch(A.op, 0, A.dtype, region) - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: _A}, 64) - stmt = tvm.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: _A}, 64) + stmt = tvm.tir.ir_pass.Simplify(stmt) assert stmt.extent.value == 2 assert isinstance(stmt.body, tvm.tir.For) assert stmt.body.extent.value == 2 @@ -52,27 +53,27 @@ def test_flatten_prefetch(): def test_flatten_storage_align(): m = 8 l = 16 - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + s = te.create_schedule(A2.op) s[A1].storage_align(A1.op.axis[0], 2, 1) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2') - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64) - stmt = tvm.ir_pass.Simplify(stmt) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64) + stmt = tvm.tir.ir_pass.Simplify(stmt) assert(stmt.body.extents[0].value == 17 * 8) def test_flatten_double_buffer(): dtype = 'int64' n = 100 m = 4 - tx = tvm.thread_axis("threadIdx.x") - ib = tvm.ir_builder.create() + tx = te.thread_axis("threadIdx.x") + ib = tvm.tir.ir_builder.create() A = ib.pointer("float32", name="A") C = ib.pointer("float32", name="C") ib.scope_attr(tx, "thread_extent", 1) @@ -86,18 +87,18 @@ def test_flatten_double_buffer(): C[j] = B[j] + 1 stmt = ib.get() - stmt = tvm.ir_pass.StorageFlatten(stmt, {}, 64) - stmt = tvm.ir_pass.InjectDoubleBuffer(stmt, 2) - stmt = tvm.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {}, 64) + stmt = tvm.tir.ir_pass.InjectDoubleBuffer(stmt, 2) + stmt = tvm.tir.ir_pass.Simplify(stmt) assert isinstance(stmt.body.body, tvm.tir.Allocate) assert stmt.body.body.extents[0].value == 2 - f = tvm.ir_pass.MakeAPI(stmt, "db", [A.asobject(), C.asobject()], 2, True) - f = tvm.ir_pass.ThreadSync(f, "shared") + f = tvm.tir.ir_pass.MakeAPI(stmt, "db", [A.asobject(), C.asobject()], 2, True) + f = tvm.tir.ir_pass.ThreadSync(f, "shared") count = [0] def count_sync(op): if isinstance(op, tvm.tir.Call) and op.name == "tvm_storage_sync": count[0] += 1 - tvm.ir_pass.PostOrderVisit(f.body, count_sync) + tvm.tir.ir_pass.PostOrderVisit(f.body, count_sync) assert count[0] == 4 if __name__ == "__main__": diff --git a/tests/python/unittest/test_pass_storage_rewrite.py b/tests/python/unittest/test_pass_storage_rewrite.py index d4125d0..c74225d 100644 --- a/tests/python/unittest/test_pass_storage_rewrite.py +++ b/tests/python/unittest/test_pass_storage_rewrite.py @@ -15,33 +15,34 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_storage_share(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') num_stage = 5 B = A for t in range(num_stage): - B = tvm.compute((m, l), lambda i, j: B[i, j] + (t+1), name='A%d' % t) + B = te.compute((m, l), lambda i, j: B[i, j] + (t+1), name='A%d' % t) - s = tvm.create_schedule(B.op) - bounds = tvm.schedule.InferBound(s) + s = te.create_schedule(B.op) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) - stmt = tvm.ir_pass.CanonicalSimplify(stmt) - stmt = tvm.ir_pass.Simplify(stmt) - stmt = tvm.ir_pass.StorageRewrite(stmt) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) + stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt) + stmt = tvm.tir.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.StorageRewrite(stmt) # verify only have one allocations. # verify inplace folding works num_alloc = [0] def verify(n): if isinstance(n, tvm.tir.Allocate): num_alloc[0] += 1 - tvm.ir_pass.PostOrderVisit(stmt, verify) + tvm.tir.ir_pass.PostOrderVisit(stmt, verify) assert num_alloc[0] == 1 def register_mem(scope_tb, max_bits): @@ -60,8 +61,8 @@ def test_alloc_seq(): register_mem(scope_tb, max_bits) - ib = tvm.ir_builder.create() - n = tvm.var("n") + ib = tvm.tir.ir_builder.create() + n = te.var("n") with ib.for_range(0, n, name="i") as i: with ib.for_range(0, 10, name="j") as j: A = ib.allocate("float32", 200, name="A", scope=scope_tb) @@ -71,37 +72,37 @@ def test_alloc_seq(): A[j] = 1.3 body = ib.get() - body = tvm.ir_pass.StorageRewrite(body) + body = tvm.tir.ir_pass.StorageRewrite(body) num_alloc = [0] def verify(n): if isinstance(n, tvm.tir.Allocate): num_alloc[0] += 1 assert n.extents[0].value == 200 - tvm.ir_pass.PostOrderVisit(body, verify) + tvm.tir.ir_pass.PostOrderVisit(body, verify) assert num_alloc[0] == 1 def test_alloc_different_dtypes(): def stmt_generater(dtype_list, length): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() base_dtype = dtype_list[0] - global_a = tvm.placeholder((length,), name = "global_a", dtype = base_dtype) + global_a = te.placeholder((length,), name = "global_a", dtype = base_dtype) assert len(dtype_list) == 4 with ib.for_range(0, length, name="j") as j: dtype = dtype_list[0] A = ib.allocate(dtype, length, name="A", scope="local.L0A") - A[j] = tvm.const(1, dtype = dtype) + A[j] = tvm.tir.const(1, dtype = dtype) with ib.for_range(0, length, name="j") as j: dtype = dtype_list[1] B = ib.allocate(dtype, length, name="B", scope="local.L0A") - B[j] = tvm.const(1, dtype = dtype) + B[j] = tvm.tir.const(1, dtype = dtype) with ib.for_range(0, length, name="j") as j: dtype = dtype_list[2] C = ib.allocate(dtype, length, name="C", scope="local.L0A") - C[j] = tvm.const(1, dtype = dtype) + C[j] = tvm.tir.const(1, dtype = dtype) with ib.for_range(0, length, name="j") as j: dtype = dtype_list[3] D = ib.allocate(dtype, length, name="D", scope="local.L0A") - D[j] = tvm.const(1, dtype = dtype) + D[j] = tvm.tir.const(1, dtype = dtype) with ib.for_range(0, length, name="j") as j: dtype = "int8" E = ib.allocate(dtype, length, name="E", scope="local.L0A") @@ -128,8 +129,8 @@ def test_alloc_different_dtypes(): body = stmt_generater(dtype_list, length) offset = offset_generater(dtype_list, length) - body = tvm.ir_pass.StorageRewrite(body) - tvm.ir_pass.PostOrderVisit(body, verify) + body = tvm.tir.ir_pass.StorageRewrite(body) + tvm.tir.ir_pass.PostOrderVisit(body, verify) length = 1024 dtype_list = ["float16", "int32", "uint16", "int8"] @@ -147,121 +148,121 @@ def test_alloc_different_dtypes(): def test_inplace_rule(): m = 10 - A = tvm.placeholder((m,), name='A') - A0 = tvm.compute((m,), lambda i: A[i], name='A0') - A1 = tvm.compute((m,), lambda i: A[i] + 1, name='A1') - AA = tvm.compute((m,), lambda i: A0[i] + A1[i] + A1[0], name='AA') - B = tvm.compute((m,), lambda i: AA[i] + 1, name='B') - s = tvm.create_schedule(B.op) - bounds = tvm.schedule.InferBound(s) + A = te.placeholder((m,), name='A') + A0 = te.compute((m,), lambda i: A[i], name='A0') + A1 = te.compute((m,), lambda i: A[i] + 1, name='A1') + AA = te.compute((m,), lambda i: A0[i] + A1[i] + A1[0], name='AA') + B = te.compute((m,), lambda i: AA[i] + 1, name='B') + s = te.create_schedule(B.op) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) - stmt = tvm.ir_pass.CanonicalSimplify(stmt) - stmt = tvm.ir_pass.Simplify(stmt) - stmt = tvm.ir_pass.StorageRewrite(stmt) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) + stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt) + stmt = tvm.tir.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.StorageRewrite(stmt) # verify only have one allocations. # verify inplace folding works num_alloc = [0] def verify(n): if isinstance(n, tvm.tir.Allocate): num_alloc[0] += 1 - tvm.ir_pass.PostOrderVisit(stmt, verify) + tvm.tir.ir_pass.PostOrderVisit(stmt, verify) assert num_alloc[0] == 2 def test_storage_combine(): n = 8 - A = tvm.placeholder((4,), name='A') + A = te.placeholder((4,), name='A') num_stage = 5 B = A stages = [] for t in range(num_stage): - B = tvm.compute((n, ), lambda i: B[i] + B[0] + (t+1), name='A%d' % t) + B = te.compute((n, ), lambda i: B[i] + B[0] + (t+1), name='A%d' % t) stages.append(B) - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) for S in stages[:-1]: s[S].set_scope("global:tag") - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) - stmt = tvm.ir_pass.CanonicalSimplify(stmt) - stmt = tvm.ir_pass.Simplify(stmt) - stmt = tvm.ir_pass.StorageRewrite(stmt) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64) + stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt) + stmt = tvm.tir.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.StorageRewrite(stmt) num_alloc = [0] def verify(n): if isinstance(n, tvm.tir.Allocate): num_alloc[0] += 1 assert (n.extents[0].value == 16) - tvm.ir_pass.PostOrderVisit(stmt, verify) + tvm.tir.ir_pass.PostOrderVisit(stmt, verify) assert num_alloc[0] == 1 def test_storage_share_gpu(): - m = tvm.var('m') - A = [tvm.placeholder((m), name='A')] + m = te.var('m') + A = [te.placeholder((m), name='A')] num_stage = 5 for t in range(num_stage): - A.append(tvm.compute((m,), lambda i: A[-1][i] + (t+1), name='A%d_s' % t)) - A.append(tvm.compute((m,), lambda i: A[-1][i], name='A%d' % t)) - s = tvm.create_schedule(A[-1].op) + A.append(te.compute((m,), lambda i: A[-1][i] + (t+1), name='A%d_s' % t)) + A.append(te.compute((m,), lambda i: A[-1][i], name='A%d' % t)) + s = te.create_schedule(A[-1].op) for t in range(num_stage): x = A[2*t+2].op.axis[0] bx, tx = s[A[2*t+2]].split(x, factor=32) - s[A[2*t+2]].bind(bx, tvm.thread_axis("blockIdx.x")) - s[A[2*t+2]].bind(tx, tvm.thread_axis("threadIdx.x")) + s[A[2*t+2]].bind(bx, te.thread_axis("blockIdx.x")) + s[A[2*t+2]].bind(tx, te.thread_axis("threadIdx.x")) s[A[2*t+1]].compute_at(s[A[2*t+2]], tx) s[A[2*t+1]].set_scope("shared") - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A[0].shape, A[0].dtype, name='A') - Bb = tvm.decl_buffer(A[0].shape, A[0].dtype, name='B') - stmt = tvm.ir_pass.StorageFlatten(stmt, {A[0]: Ab, A[-1]: Bb}, 64) - stmt = tvm.ir_pass.CanonicalSimplify(stmt) - stmt = tvm.ir_pass.Simplify(stmt) - stmt = tvm.ir_pass.StorageRewrite(stmt) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A[0].shape, A[0].dtype, name='A') + Bb = tvm.tir.decl_buffer(A[0].shape, A[0].dtype, name='B') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A[0]: Ab, A[-1]: Bb}, 64) + stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt) + stmt = tvm.tir.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.StorageRewrite(stmt) alloc_stats = {"global": 0, "shared": 0} def verify(n): if isinstance(n, tvm.tir.AttrStmt): if n.attr_key == "storage_scope": alloc_stats[n.value.value] += 1 - tvm.ir_pass.PostOrderVisit(stmt, verify) + tvm.tir.ir_pass.PostOrderVisit(stmt, verify) assert alloc_stats["global"] == 2 assert alloc_stats["shared"] == num_stage def test_parallel_alloc(): - ib = tvm.ir_builder.create() - n = tvm.var("n") + ib = tvm.tir.ir_builder.create() + n = te.var("n") with ib.for_range(0, n, name="i", for_type="parallel") as i: with ib.for_range(0, 10, name="j") as j: A = ib.allocate("float32", n, name="A", scope="global") A[j] = A[j] + 2 body = ib.get() - body = tvm.ir_pass.StorageRewrite(body) + body = tvm.tir.ir_pass.StorageRewrite(body) assert (isinstance(body.body.body, tvm.tir.Allocate)) - ib = tvm.ir_builder.create() - n = tvm.var("n") + ib = tvm.tir.ir_builder.create() + n = te.var("n") with ib.for_range(0, n, name="t") as i: ib.scope_attr( - tvm.const(1, "int32") , "pragma_scope", + tvm.tir.const(1, "int32") , "pragma_scope", tvm.tir.StringImm("parallel_launch_point")) with ib.for_range(0, n, name="i", for_type="parallel") as i: with ib.for_range(0, 10, name="j") as j: A = ib.allocate("float32", n, name="A", scope="global") A[j] = A[j] + 2 body = ib.get() - body = tvm.ir_pass.StorageRewrite(body) + body = tvm.tir.ir_pass.StorageRewrite(body) assert(isinstance(body.body.body.body.body, tvm.tir.Allocate)) @@ -269,35 +270,35 @@ def test_inplace_rule2(scope_tb = "local_TB2", max_bits = 1024 * 1024 * 1024): #Test Buffer register_mem(scope_tb, max_bits) m = 10 - A = tvm.placeholder((m,), name='A') - C = tvm.placeholder((m,), name='C') - D = tvm.placeholder((m,), name='D') - A0 = tvm.compute((m,), lambda i: A[i] + C[i], name='A0') - A1 = tvm.compute((m,), lambda i: D[i] * D[i], name='A1') - A2 = tvm.compute((m,), lambda i: A0[i] + A1[i], name='A2') - B = tvm.compute((m,), lambda i: A2[i], name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((m,), name='A') + C = te.placeholder((m,), name='C') + D = te.placeholder((m,), name='D') + A0 = te.compute((m,), lambda i: A[i] + C[i], name='A0') + A1 = te.compute((m,), lambda i: D[i] * D[i], name='A1') + A2 = te.compute((m,), lambda i: A0[i] + A1[i], name='A2') + B = te.compute((m,), lambda i: A2[i], name='B') + s = te.create_schedule(B.op) A0L = s.cache_read(A0, scope_tb, [A2]) A1L = s.cache_read(A1, scope_tb, [A2]) A2L = s.cache_read(A2, scope_tb, [B]) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') - Cc = tvm.decl_buffer(C.shape, B.dtype, name='C') - Dd = tvm.decl_buffer(D.shape, B.dtype, name='D') - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb, C: Cc, D:Dd}, 64) - stmt = tvm.ir_pass.CanonicalSimplify(stmt) - stmt = tvm.ir_pass.Simplify(stmt) - stmt = tvm.ir_pass.StorageRewrite(stmt) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') + Cc = tvm.tir.decl_buffer(C.shape, B.dtype, name='C') + Dd = tvm.tir.decl_buffer(D.shape, B.dtype, name='D') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb, C: Cc, D:Dd}, 64) + stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt) + stmt = tvm.tir.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.StorageRewrite(stmt) # verify only have one allocations. # verify inplace folding works num_alloc = [0] def verify(n): if isinstance(n, tvm.tir.Allocate): num_alloc[0] += 1 - tvm.ir_pass.PostOrderVisit(stmt, verify) + tvm.tir.ir_pass.PostOrderVisit(stmt, verify) assert num_alloc[0] == 2 def test_exceed_mem(): @@ -318,27 +319,27 @@ def test_inplace_rule3(): register_mem(scope_tb, max_bits) m = 10 - B0 = tvm.placeholder((m,), name='B0') - B1 = tvm.placeholder((m,), name='B1') - B2 = tvm.placeholder((m,), name='B2') - B3 = tvm.placeholder((m,), name='B3') - B4 = tvm.placeholder((m,), name='B4') - B5 = tvm.placeholder((m,), name='B5') + B0 = te.placeholder((m,), name='B0') + B1 = te.placeholder((m,), name='B1') + B2 = te.placeholder((m,), name='B2') + B3 = te.placeholder((m,), name='B3') + B4 = te.placeholder((m,), name='B4') + B5 = te.placeholder((m,), name='B5') - B6 = tvm.compute((m,), lambda i: B1[i] * B5[i], name='B6') - B7 = tvm.compute((m,), lambda i: B2[i] * B4[i], name='B7') - B8 = tvm.compute((m,), lambda i: B6[i] - B7[i], name='B8') + B6 = te.compute((m,), lambda i: B1[i] * B5[i], name='B6') + B7 = te.compute((m,), lambda i: B2[i] * B4[i], name='B7') + B8 = te.compute((m,), lambda i: B6[i] - B7[i], name='B8') - B9 = tvm.compute((m,), lambda i: B2[i] * B3[i], name='B9') - B10 = tvm.compute((m,), lambda i: B0[i] * B5[i], name='B10') - B11 = tvm.compute((m,), lambda i: B9[i] - B10[i], name='B11') + B9 = te.compute((m,), lambda i: B2[i] * B3[i], name='B9') + B10 = te.compute((m,), lambda i: B0[i] * B5[i], name='B10') + B11 = te.compute((m,), lambda i: B9[i] - B10[i], name='B11') - B12 = tvm.compute((m,), lambda i: B0[i] * B4[i], name='B12') - B13 = tvm.compute((m,), lambda i: B1[i] * B3[i], name='B13') - B14 = tvm.compute((m,), lambda i: B12[i] - B13[i], name='B14') + B12 = te.compute((m,), lambda i: B0[i] * B4[i], name='B12') + B13 = te.compute((m,), lambda i: B1[i] * B3[i], name='B13') + B14 = te.compute((m,), lambda i: B12[i] - B13[i], name='B14') - B = tvm.compute((m,), lambda i: B8[i] * B11[i] + B14[i], name='B') - s = tvm.create_schedule(B.op) + B = te.compute((m,), lambda i: B8[i] * B11[i] + B14[i], name='B') + s = te.create_schedule(B.op) B1L = s.cache_read(B1, scope_tb, [B6, B13]) B5L = s.cache_read(B5, scope_tb, [B6, B10]) @@ -368,32 +369,32 @@ def test_inplace_rule3(): s[B10].compute_inline() s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - - B0a = tvm.decl_buffer(B0.shape, B0.dtype, name='B0') - B1a = tvm.decl_buffer(B1.shape, B1.dtype, name='B1') - B2a = tvm.decl_buffer(B2.shape, B2.dtype, name='B2') - B3a = tvm.decl_buffer(B3.shape, B3.dtype, name='B3') - B4a = tvm.decl_buffer(B4.shape, B4.dtype, name='B4') - B5a = tvm.decl_buffer(B5.shape, B5.dtype, name='B5') - - Bb = tvm.decl_buffer(B.shape, B.dtype, name='B') - stmt = tvm.ir_pass.StorageFlatten(stmt, {B0: B0a, B1: B1a, B2: B2a, B3: B2a, B4: B4a, B5: B5a, B: Bb}, 64) - stmt = tvm.ir_pass.CanonicalSimplify(stmt) - stmt = tvm.ir_pass.Simplify(stmt) - stmt = tvm.ir_pass.StorageRewrite(stmt) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + + B0a = tvm.tir.decl_buffer(B0.shape, B0.dtype, name='B0') + B1a = tvm.tir.decl_buffer(B1.shape, B1.dtype, name='B1') + B2a = tvm.tir.decl_buffer(B2.shape, B2.dtype, name='B2') + B3a = tvm.tir.decl_buffer(B3.shape, B3.dtype, name='B3') + B4a = tvm.tir.decl_buffer(B4.shape, B4.dtype, name='B4') + B5a = tvm.tir.decl_buffer(B5.shape, B5.dtype, name='B5') + + Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name='B') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {B0: B0a, B1: B1a, B2: B2a, B3: B2a, B4: B4a, B5: B5a, B: Bb}, 64) + stmt = tvm.tir.ir_pass.CanonicalSimplify(stmt) + stmt = tvm.tir.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.StorageRewrite(stmt) # verify only have one allocations. # verify inplace folding works def verify(n): if isinstance(n, tvm.tir.Allocate): assert n.extents[0].value == 70 - tvm.ir_pass.PostOrderVisit(stmt, verify) + tvm.tir.ir_pass.PostOrderVisit(stmt, verify) def test_alloc_seq_type(): - ib = tvm.ir_builder.create() - n = tvm.var("n") + ib = tvm.tir.ir_builder.create() + n = te.var("n") with ib.for_range(0, n, name="i") as i: with ib.for_range(0, 10, name="j") as j: A = ib.allocate("float32", 200, name="A", scope="local.L0A") @@ -401,22 +402,22 @@ def test_alloc_seq_type(): A[j] = 1.2 A1[j] = 1.3 B = ib.allocate("int16", 200, name="B", scope="local.L0A") - B[j] = tvm.const(1, "int16") + B[j] = tvm.tir.const(1, "int16") C = ib.allocate("int16", 200, name="C", scope="local.L0A") - C[j] = tvm.const(1, "int16") + C[j] = tvm.tir.const(1, "int16") D = ib.allocate("int16", 200, name="D", scope="local.L0A") D[j] = B[j] + C[j] A2 = ib.allocate("float32", 200, name="A2", scope="local.L0A") A2[j] = A[j] body = ib.get() - body = tvm.ir_pass.StorageRewrite(body) + body = tvm.tir.ir_pass.StorageRewrite(body) num_alloc = [0] def verify(n): if isinstance(n, tvm.tir.Allocate): num_alloc[0] += 1 assert n.extents[0].value == 500 - tvm.ir_pass.PostOrderVisit(body, verify) + tvm.tir.ir_pass.PostOrderVisit(body, verify) assert num_alloc[0] == 1 def test_alloc_seq_type2(): @@ -425,50 +426,50 @@ def test_alloc_seq_type2(): register_mem(scope_tb, max_bits) - ib = tvm.ir_builder.create() - n = tvm.var("n") + ib = tvm.tir.ir_builder.create() + n = te.var("n") with ib.for_range(0, n, name="i") as i: with ib.for_range(0, 10, name="j") as j: A = ib.allocate("float32", 200, name="A", scope=scope_tb) A[j] = 1.2 with ib.for_range(0, 20, name="j") as j: B = ib.allocate("int16", 400, name="B", scope=scope_tb) - B[j] = tvm.const(1, "int16") + B[j] = tvm.tir.const(1, "int16") with ib.for_range(0, 10, name="j") as j: C = ib.allocate("float32", 200, name="C", scope=scope_tb) C[j] = 1.2 body = ib.get() - body = tvm.ir_pass.StorageRewrite(body) + body = tvm.tir.ir_pass.StorageRewrite(body) num_alloc = [0] def verify(n): if isinstance(n, tvm.tir.Allocate): num_alloc[0] += 1 assert n.extents[0].value == 200 - tvm.ir_pass.PostOrderVisit(body, verify) + tvm.tir.ir_pass.PostOrderVisit(body, verify) assert num_alloc[0] == 1 def test_reuse_small_buffer(): - ib = tvm.ir_builder.create() - n = tvm.var("n") + ib = tvm.tir.ir_builder.create() + n = te.var("n") with ib.for_range(0, n, name="i") as i: with ib.for_range(0, 10, name="j") as j: A = ib.allocate("int16", 200, name="A", scope="local.L0A") - A[j] = tvm.const(1, "int16") + A[j] = tvm.tir.const(1, "int16") B = ib.allocate("int16", 200, name="B", scope="local.L0A") - B[j] = tvm.const(1, "int16") + B[j] = tvm.tir.const(1, "int16") B1 = ib.allocate("int16", 200, name="B1", scope="local.L0A") B1[j] = A[j] + B[j] C = ib.allocate("int16", 400, name="C", scope="local.L0A") - C[j] = tvm.const(1, "int16") + C[j] = tvm.tir.const(1, "int16") D = ib.allocate("int16", 400, name="D", scope="local.L0A") - D[j] = tvm.const(1, "int16") + D[j] = tvm.tir.const(1, "int16") E = ib.allocate("int16", 400, name="E", scope="local.L0A") E[j] = C[j] body = ib.get() - body = tvm.ir_pass.StorageRewrite(body) + body = tvm.tir.ir_pass.StorageRewrite(body) num_alloc = [0] @@ -476,20 +477,20 @@ def test_reuse_small_buffer(): if isinstance(n, tvm.tir.Allocate): num_alloc[0] += 1 assert n.extents[0].value == 800 - tvm.ir_pass.PostOrderVisit(body, verify) + tvm.tir.ir_pass.PostOrderVisit(body, verify) assert num_alloc[0] == 1 def test_replace_dataflow(): shape = (255,) - A = tvm.placeholder(shape, name = "A") - B = tvm.compute(shape, lambda i: A[i] + A[i], name = "B") - C = tvm.compute(shape, lambda i: A[i] + B[i], name = "C") - D = tvm.compute(shape, lambda i: A[i] + C[i], name = "D") - E = tvm.compute(shape, lambda i: A[i] + D[i], name = "E") + A = te.placeholder(shape, name = "A") + B = te.compute(shape, lambda i: A[i] + A[i], name = "B") + C = te.compute(shape, lambda i: A[i] + B[i], name = "C") + D = te.compute(shape, lambda i: A[i] + C[i], name = "D") + E = te.compute(shape, lambda i: A[i] + D[i], name = "E") - s = tvm.create_schedule(E.op) + s = te.create_schedule(E.op) s.cache_read(A, "local", [B, C, D, E]) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) @@ -505,16 +506,16 @@ def test_large_input(): n = 16384 shape = (n, n) - a = tvm.placeholder(shape, name='a', dtype='int32') - b = tvm.placeholder(shape, name='b', dtype='int32') - c = tvm.compute(shape, lambda i, j: compute(a, b)[i, j]) - c = tvm.compute(shape, lambda i, j: 1 + c[i, j]) - s = tvm.create_schedule(c.op) + a = te.placeholder(shape, name='a', dtype='int32') + b = te.placeholder(shape, name='b', dtype='int32') + c = te.compute(shape, lambda i, j: compute(a, b)[i, j]) + c = te.compute(shape, lambda i, j: 1 + c[i, j]) + s = te.create_schedule(c.op) stmt = tvm.lower(s, [a, b, c], simple_mode=True) def verify(n): if isinstance(n, tvm.tir.Allocate): assert n.extents[0].value == 268435456 - tvm.ir_pass.PostOrderVisit(stmt, verify) + tvm.tir.ir_pass.PostOrderVisit(stmt, verify) if __name__ == "__main__": diff --git a/tests/python/unittest/test_pass_storage_sync.py b/tests/python/unittest/test_pass_storage_sync.py index 0ed0c99..9edfa95 100644 --- a/tests/python/unittest/test_pass_storage_sync.py +++ b/tests/python/unittest/test_pass_storage_sync.py @@ -15,31 +15,32 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_storage_sync(): - m = tvm.size_var('m') - l = tvm.size_var('l') - A = tvm.placeholder((m, l), name='A') + m = te.size_var('m') + l = te.size_var('l') + A = te.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + s = te.create_schedule(A2.op) xo, xi = s[A2].split(A2.op.axis[0], factor=8) - s[A2].bind(xo, tvm.thread_axis("blockIdx.x")) + s[A2].bind(xo, te.thread_axis("blockIdx.x")) s[A1].compute_at(s[A2], xo) s[A1].set_scope("shared") - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2') - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64) - f = tvm.ir_pass.MakeAPI(stmt, "test", [Ab, A2b], 0, True) - flist = tvm.ir_pass.SplitHostDevice(f) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64) + f = tvm.tir.ir_pass.MakeAPI(stmt, "test", [Ab, A2b], 0, True) + flist = tvm.tir.ir_pass.SplitHostDevice(f) f = flist[1] - f = tvm.ir_pass.ThreadSync(f, "shared") + f = tvm.tir.ir_pass.ThreadSync(f, "shared") body_list = tvm.tir.stmt_list(f.body.body.body.body) assert(body_list[1].value.name == "tvm_storage_sync") @@ -52,10 +53,10 @@ def test_coproc_sync(): unit_bits=8, max_simd_bits=32, max_num_bits=128, - head_address=tvm.call_extern("handle", "global_cache")) - ib = tvm.ir_builder.create() - n = tvm.size_var("n") - cp = tvm.thread_axis((0, 1), "cop") + head_address=tvm.tir.call_extern("handle", "global_cache")) + ib = tvm.tir.ir_builder.create() + n = te.size_var("n") + cp = te.thread_axis((0, 1), "cop") A = ib.allocate("float32", 128, name="A", scope="global.cache") with ib.for_range(0, n, name="i") as i: A[i] = A[i] + 1 @@ -64,7 +65,7 @@ def test_coproc_sync(): ib.scope_attr(cp, "coproc_scope", 1) A[j] = A[j + k * 10] + 2 stmt = ib.get() - stmt = tvm.ir_pass.CoProcSync(stmt) + stmt = tvm.tir.ir_pass.CoProcSync(stmt) body = stmt.body.body.body blist = tvm.tir.stmt_list(body) assert(blist[1].value.name == "cop.coproc_read_barrier") @@ -75,10 +76,10 @@ def test_coproc_sync(): def test_coproc_sync2(): - ib = tvm.ir_builder.create() - n = tvm.size_var("n") - cp = tvm.thread_axis((0, 1), "cop") - ty = tvm.thread_axis("cthread") + ib = tvm.tir.ir_builder.create() + n = te.size_var("n") + cp = te.thread_axis((0, 1), "cop") + ty = te.thread_axis("cthread") A = ib.allocate("float32", 128, name="A") ib.scope_attr(ty, "virtual_thread", 2) with ib.new_scope(): @@ -92,7 +93,7 @@ def test_coproc_sync2(): ib.scope_attr(cp, "coproc_scope", 2) A[ty] = 1.0 stmt = ib.get() - stmt = tvm.ir_pass.CoProcSync(stmt) + stmt = tvm.tir.ir_pass.CoProcSync(stmt) def test_coproc_sync3(): def __check_list(tvm_array, py_list): @@ -101,9 +102,9 @@ def test_coproc_sync3(): return False return True - ib = tvm.ir_builder.create() - n = tvm.size_var("n") - cp = tvm.thread_axis((0, 1), "cop") + ib = tvm.tir.ir_builder.create() + n = te.size_var("n") + cp = te.thread_axis((0, 1), "cop") A = ib.allocate("float32", 128, name="A", scope="global.cache") with ib.for_range(0, n, name="i") as i: with ib.for_range(0, n, name="i") as j: @@ -118,7 +119,7 @@ def test_coproc_sync3(): A[0] = 0.0 stmt = ib.get() - stmt = tvm.ir_pass.CoProcSync(stmt) + stmt = tvm.tir.ir_pass.CoProcSync(stmt) slist = tvm.tir.stmt_list(stmt[0].body.body) push_st = slist[2] slist = tvm.tir.stmt_list(slist[-1]) diff --git a/tests/python/unittest/test_pass_unroll.py b/tests/python/unittest/test_pass_unroll.py index c6b536b..165edab 100644 --- a/tests/python/unittest/test_pass_unroll.py +++ b/tests/python/unittest/test_pass_unroll.py @@ -15,14 +15,15 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import os def test_unroll_loop(): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() dtype = 'int64' - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) Aptr = ib.buffer_ptr(Ab) # for i in 0 to n-1: with ib.for_range(n, n + 2, name="i") as i: @@ -31,31 +32,31 @@ def test_unroll_loop(): stmt = ib.get() assert isinstance(stmt, tvm.tir.For) - ret = tvm.ir_pass.UnrollLoop(stmt, 16, 8, 0, True) + ret = tvm.tir.ir_pass.UnrollLoop(stmt, 16, 8, 0, True) assert not isinstance(ret, tvm.tir.For) - ret = tvm.ir_pass.UnrollLoop(stmt, 15, 8, 0, True) + ret = tvm.tir.ir_pass.UnrollLoop(stmt, 15, 8, 0, True) assert isinstance(ret, tvm.tir.For) - ret = tvm.ir_pass.UnrollLoop(stmt, 16, 8, 0, False) + ret = tvm.tir.ir_pass.UnrollLoop(stmt, 16, 8, 0, False) assert isinstance(ret, tvm.tir.For) assert ret.for_type == tvm.tir.For.Unrolled - ib = tvm.ir_builder.create() - ib.scope_attr(tvm.const(0, "int32"), "pragma_auto_unroll_max_step", 16) + ib = tvm.tir.ir_builder.create() + ib.scope_attr(tvm.tir.const(0, "int32"), "pragma_auto_unroll_max_step", 16) ib.emit(stmt) wrapped = ib.get() wrapped = tvm.tir.SeqStmt([wrapped, stmt]) assert isinstance(ret, tvm.tir.For) - ret = tvm.ir_pass.UnrollLoop(wrapped, 0, 8, 0, False) + ret = tvm.tir.ir_pass.UnrollLoop(wrapped, 0, 8, 0, False) assert isinstance(ret[0], tvm.tir.For) assert ret[0].for_type == tvm.tir.For.Unrolled assert isinstance(ret[1], tvm.tir.For) assert ret[1].for_type != tvm.tir.For.Unrolled def test_unroll_fake_loop(): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() dtype = 'int32' - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) Aptr = ib.buffer_ptr(Ab) # for i in 0 to n-1: with ib.for_range(0, 1, name="i") as i: @@ -64,20 +65,20 @@ def test_unroll_fake_loop(): Aptr[j + 1] = Aptr[i] + 1 stmt = ib.get() - ret = tvm.ir_pass.UnrollLoop(stmt, 8, 0, 1, True) + ret = tvm.tir.ir_pass.UnrollLoop(stmt, 8, 0, 1, True) assert isinstance(ret[0], tvm.tir.Store) def test_unroll_single_count_loops(): - n = tvm.size_var('n') - A = tvm.placeholder((n,), name='A') - B = tvm.compute((n,), lambda *i: A(*i), name='B') - s = tvm.create_schedule(B.op) + n = te.size_var('n') + A = te.placeholder((n,), name='A') + B = te.compute((n,), lambda *i: A(*i), name='B') + s = te.create_schedule(B.op) s = s.normalize() - dom_map = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, dom_map) + dom_map = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, dom_map) # all parameters to UnrolLoops are default values except for # auto_unroll_max_extent which has been set to 1 (default:0) - after_unroll_stmt = tvm.ir_pass.UnrollLoop(stmt, 0, 8, 1, True) + after_unroll_stmt = tvm.tir.ir_pass.UnrollLoop(stmt, 0, 8, 1, True) assert after_unroll_stmt == stmt if __name__ == "__main__": diff --git a/tests/python/unittest/test_pass_vectorize.py b/tests/python/unittest/test_pass_vectorize.py index d1cd2d4..2ade843 100644 --- a/tests/python/unittest/test_pass_vectorize.py +++ b/tests/python/unittest/test_pass_vectorize.py @@ -15,19 +15,20 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_vectorize_loop(): dtype = 'int64' - n = tvm.var('n') - ib = tvm.ir_builder.create() + n = te.var('n') + ib = tvm.tir.ir_builder.create() A = ib.pointer("float32", name="A") with ib.for_range(0, n) as i: with ib.for_range(0, 4, for_type="vectorize") as j: - A[j] = tvm.const(1, A.dtype) + A[j] = tvm.tir.const(1, A.dtype) stmt = ib.get() assert isinstance(stmt.body, tvm.tir.For) - stmt = tvm.ir_pass.VectorizeLoop(stmt) + stmt = tvm.tir.ir_pass.VectorizeLoop(stmt) assert isinstance(stmt, tvm.tir.For) assert not isinstance(stmt.body, tvm.tir.For) assert isinstance(stmt.body.index, tvm.tir.Ramp) @@ -35,15 +36,15 @@ def test_vectorize_loop(): def test_vectorize_vector(): dtype = 'int64' - n = tvm.var('n') - ib = tvm.ir_builder.create() + n = te.var('n') + ib = tvm.tir.ir_builder.create() A = ib.pointer("float32x4", name="A") with ib.for_range(0, n) as i: with ib.for_range(0, 4, for_type="vectorize") as j: - A[j] = tvm.const(1, A.dtype) + A[j] = tvm.tir.const(1, A.dtype) stmt = ib.get() assert isinstance(stmt.body, tvm.tir.For) - stmt = tvm.ir_pass.VectorizeLoop(stmt) + stmt = tvm.tir.ir_pass.VectorizeLoop(stmt) assert isinstance(stmt, tvm.tir.For) assert not isinstance(stmt.body, tvm.tir.For) assert isinstance(stmt.body.index, tvm.tir.Ramp) @@ -51,9 +52,9 @@ def test_vectorize_vector(): def test_vectorize_with_if(): - n = tvm.var('n') - x = tvm.var('x') - ib = tvm.ir_builder.create() + n = te.var('n') + x = te.var('x') + ib = tvm.tir.ir_builder.create() A = ib.pointer("float32", name="A") with ib.for_range(0, 4, for_type="vectorize") as i: with ib.if_scope(x < n): @@ -62,7 +63,7 @@ def test_vectorize_with_if(): with ib.if_scope(i < n): A[i] = 2.0 stmt = ib.get() - stmt = tvm.ir_pass.VectorizeLoop(stmt) + stmt = tvm.tir.ir_pass.VectorizeLoop(stmt) assert isinstance(stmt, tvm.tir.IfThenElse) assert isinstance(stmt.then_case.index, tvm.tir.Ramp) assert isinstance(stmt.then_case.value, tvm.tir.Add) @@ -70,51 +71,51 @@ def test_vectorize_with_if(): assert isinstance(stmt.else_case, tvm.tir.For) def test_vectorize_with_le_cond(): - n = tvm.var('n') - ib = tvm.ir_builder.create() + n = te.var('n') + ib = tvm.tir.ir_builder.create() A = ib.pointer("float32", name="A") with ib.for_range(0, 4, for_type="vectorize") as i: with ib.if_scope(i <= n): A[i] = A[i] + 1 stmt = ib.get() - stmt = tvm.ir_pass.VectorizeLoop(stmt) + stmt = tvm.tir.ir_pass.VectorizeLoop(stmt) assert isinstance(stmt, tvm.tir.For) def test_vectorize_with_ge_cond(): - n = tvm.var('n') - ib = tvm.ir_builder.create() + n = te.var('n') + ib = tvm.tir.ir_builder.create() A = ib.pointer("float32", name="A") with ib.for_range(0, 4, for_type="vectorize") as i: with ib.if_scope(i >= n): A[i] = A[i] + 1 stmt = ib.get() - stmt = tvm.ir_pass.VectorizeLoop(stmt) + stmt = tvm.tir.ir_pass.VectorizeLoop(stmt) assert isinstance(stmt, tvm.tir.For) def test_vectorize_if_then_else(): - n = tvm.var('n') - x = tvm.var('x') - ib = tvm.ir_builder.create() + n = te.var('n') + x = te.var('x') + ib = tvm.tir.ir_builder.create() A = ib.pointer("float32", name="A") with ib.for_range(0, 4, for_type="vectorize") as i: - A[i] = tvm.call_intrin("float32", "tvm_if_then_else", + A[i] = tvm.tir.call_intrin("float32", "tvm_if_then_else", i > 0, A[i] + 1, A[i]) stmt = ib.get() - stmt = tvm.ir_pass.VectorizeLoop(stmt) + stmt = tvm.tir.ir_pass.VectorizeLoop(stmt) assert isinstance(stmt, tvm.tir.For) - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() A = ib.pointer("float32", name="A") with ib.for_range(0, n) as k: with ib.for_range(0, 4, for_type="vectorize") as i: - A[k * 4 + i] = tvm.call_intrin("float32", "tvm_if_then_else", + A[k * 4 + i] = tvm.tir.call_intrin("float32", "tvm_if_then_else", k > 0, A[k * 4 + i], 0) stmt = ib.get() assert isinstance(stmt.body, tvm.tir.For) - stmt = tvm.ir_pass.VectorizeLoop(stmt) + stmt = tvm.tir.ir_pass.VectorizeLoop(stmt) assert not isinstance(stmt.body, tvm.tir.For) assert isinstance(stmt.body.value.args[2], tvm.tir.Broadcast) diff --git a/tests/python/unittest/test_pass_verify_gpu_code.py b/tests/python/unittest/test_pass_verify_gpu_code.py index 76e5f0d..6e138a2 100644 --- a/tests/python/unittest/test_pass_verify_gpu_code.py +++ b/tests/python/unittest/test_pass_verify_gpu_code.py @@ -16,10 +16,11 @@ # under the License. """Test gpu code verifier""" import tvm +from tvm import te def get_verify_pass(valid, **kwargs): def verify_pass(stmt): - valid[0] = tvm.ir_pass.VerifyGPUCode(stmt, kwargs) + valid[0] = tvm.tir.ir_pass.VerifyGPUCode(stmt, kwargs) return stmt return verify_pass @@ -31,15 +32,15 @@ def test_shared_memory(): tvm_type = tvm.runtime.DataType(dtype) type_size = tvm_type.bits // 8 * tvm_type.lanes - A = tvm.placeholder((N,), name='A', dtype=dtype) - B = tvm.compute((N, ), lambda i: A[i], name='B') + A = te.placeholder((N,), name='A', dtype=dtype) + B = te.compute((N, ), lambda i: A[i], name='B') - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) AA = s.cache_read(A, "shared", [B]) o, i = s[B].split(s[B].op.axis[0], M) s[AA].compute_at(s[B], o) - s[B].bind(o, tvm.thread_axis("blockIdx.x")) - s[B].bind(i, tvm.thread_axis("threadIdx.x")) + s[B].bind(o, te.thread_axis("blockIdx.x")) + s[B].bind(i, te.thread_axis("threadIdx.x")) # shared memory usage: M * sizeof(dtype) Bytes # thread usage: M @@ -48,14 +49,14 @@ def test_shared_memory(): if not tvm.context(target).exist: continue valid = [None] - with tvm.build_config(**{"add_lower_pass": [ + with tvm.target.build_config(**{"add_lower_pass": [ (2, get_verify_pass(valid, max_shared_memory_per_block=type_size * M - 1, max_threads_per_block=M))]}): tvm.build(s, [A, B], target) assert not valid[0] - with tvm.build_config(**{"add_lower_pass": [ + with tvm.target.build_config(**{"add_lower_pass": [ (2, get_verify_pass(valid, max_shared_memory_per_block=type_size * M, max_threads_per_block=M))]}): @@ -68,14 +69,14 @@ def test_local_memory(): N = 1024 M = 128 - A = tvm.placeholder((N,), name='A', dtype='float32') - B = tvm.compute((N, ), lambda i: A[i], name='B') + A = te.placeholder((N,), name='A', dtype='float32') + B = te.compute((N, ), lambda i: A[i], name='B') - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) AA = s.cache_read(A, "local", [B]) o, i = s[B].split(s[B].op.axis[0], M) s[AA].compute_at(s[B], o) - s[B].bind(o, tvm.thread_axis("blockIdx.x")) + s[B].bind(o, te.thread_axis("blockIdx.x")) # local memory usage: M * 4B # thread usage: M @@ -85,14 +86,14 @@ def test_local_memory(): continue valid = [None] - with tvm.build_config(**{"add_lower_pass": [ + with tvm.target.build_config(**{"add_lower_pass": [ (2, get_verify_pass(valid, max_local_memory_per_block=4 * M - 1, max_threads_per_block=1))]}): tvm.build(s, [A, B], target) assert not valid[0] - with tvm.build_config(**{"add_lower_pass": [ + with tvm.target.build_config(**{"add_lower_pass": [ (2, get_verify_pass(valid, max_local_memory_per_block=4 * M, max_threads_per_block=1))]}): @@ -103,14 +104,14 @@ def test_num_thread(): N = 1024 M = 128 - A = tvm.placeholder((N,), name='A', dtype='float32') - B = tvm.compute((N, ), lambda i: A[i], name='B') + A = te.placeholder((N,), name='A', dtype='float32') + B = te.compute((N, ), lambda i: A[i], name='B') - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) o, i = s[B].split(s[B].op.axis[0], M) - s[B].bind(o, tvm.thread_axis('threadIdx.x')) - s[B].bind(i, tvm.thread_axis("threadIdx.y")) + s[B].bind(o, te.thread_axis('threadIdx.x')) + s[B].bind(i, te.thread_axis("threadIdx.y")) # shared memory usage: 0 # thread usage: N @@ -120,21 +121,21 @@ def test_num_thread(): continue valid = [None] - with tvm.build_config(**{"add_lower_pass": [ + with tvm.target.build_config(**{"add_lower_pass": [ (2, get_verify_pass(valid, max_shared_memory_per_block=0, max_threads_per_block=N - 1))]}): tvm.build(s, [A, B], target) assert not valid[0] - with tvm.build_config(**{"add_lower_pass": [ + with tvm.target.build_config(**{"add_lower_pass": [ (2, get_verify_pass(valid, max_shared_memory_per_block=0, max_threads_per_block=N))]}): tvm.build(s, [A, B], target) assert valid[0] - with tvm.build_config(**{"add_lower_pass": [ + with tvm.target.build_config(**{"add_lower_pass": [ (2, get_verify_pass(valid, max_shared_memory_per_block=0, max_threads_per_block=N, @@ -142,7 +143,7 @@ def test_num_thread(): tvm.build(s, [A, B], target) assert not valid[0] - with tvm.build_config(**{"add_lower_pass": [ + with tvm.target.build_config(**{"add_lower_pass": [ (2, get_verify_pass(valid, max_shared_memory_per_block=0, max_threads_per_block=N, @@ -153,14 +154,14 @@ def test_num_thread(): def test_multiple_kernels(): N = 1024 - A = tvm.placeholder((N, N), name='A') - B = tvm.compute((N, N), lambda i, j: A[i, j]) - C = tvm.compute((N, N), lambda i, j: B[i, j]) + A = te.placeholder((N, N), name='A') + B = te.compute((N, N), lambda i, j: A[i, j]) + C = te.compute((N, N), lambda i, j: B[i, j]) - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) - s[C].bind(s[C].op.axis[1], tvm.thread_axis("threadIdx.x")) - s[B].bind(s[B].op.axis[1], tvm.thread_axis("threadIdx.x")) + s[C].bind(s[C].op.axis[1], te.thread_axis("threadIdx.x")) + s[B].bind(s[B].op.axis[1], te.thread_axis("threadIdx.x")) # shared memory usage: 0 # thread usage: N @@ -170,14 +171,14 @@ def test_multiple_kernels(): continue valid = [None] - with tvm.build_config(**{"add_lower_pass": [ + with tvm.target.build_config(**{"add_lower_pass": [ (2, get_verify_pass(valid, max_shared_memory_per_block=0, max_threads_per_block=N - 1))]}): tvm.build(s, [A, C], target) assert not valid[0] - with tvm.build_config(**{"add_lower_pass": [ + with tvm.target.build_config(**{"add_lower_pass": [ (2, get_verify_pass(valid, max_shared_memory_per_block=0, max_threads_per_block=N))]}): @@ -187,21 +188,21 @@ def test_multiple_kernels(): def test_wrong_bind(): N = 1024 - A = tvm.placeholder((N, N-1), name='A') - B = tvm.compute((N, N-1), lambda i, j: A[i, j]) + A = te.placeholder((N, N-1), name='A') + B = te.compute((N, N-1), lambda i, j: A[i, j]) - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) # bind a thread axis to two loop axes with different lengths - s[B].bind(s[B].op.axis[0], tvm.thread_axis("threadIdx.x")) - s[B].bind(s[B].op.axis[1], tvm.thread_axis("threadIdx.x")) + s[B].bind(s[B].op.axis[0], te.thread_axis("threadIdx.x")) + s[B].bind(s[B].op.axis[1], te.thread_axis("threadIdx.x")) for target in ['opencl', 'cuda']: if not tvm.context(target).exist: continue valid = [None] - with tvm.build_config(**{"add_lower_pass": [ + with tvm.target.build_config(**{"add_lower_pass": [ (2, get_verify_pass(valid, max_threads_per_block=N*N))]}): tvm.build(s, [A, B], target) assert not valid[0] diff --git a/tests/python/unittest/test_pass_verify_memory.py b/tests/python/unittest/test_pass_verify_memory.py index e76b6e5..3747cae 100644 --- a/tests/python/unittest/test_pass_verify_memory.py +++ b/tests/python/unittest/test_pass_verify_memory.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te # The following DLDeviceType/TVMDeviceExtType values # are originally defined in dlpack.h and c_runtime_api.h. @@ -26,19 +27,19 @@ def lower(sch, args): binds = {} arg_list = [] for x in args: - if isinstance(x, tvm.tensor.Tensor): - buf = tvm.decl_buffer(x.shape, dtype=x.dtype, name=x.name) + if isinstance(x, te.tensor.Tensor): + buf = tvm.tir.decl_buffer(x.shape, dtype=x.dtype, name=x.name) assert x not in binds binds[x] = buf arg_list.append(buf) else: raise ValueError("args must be Tensor, Buffer or Var") sch = sch.normalize() - bounds = tvm.schedule.InferBound(sch) - stmt = tvm.schedule.ScheduleOps(sch, bounds) - stmt = tvm.ir_pass.LoopPartition(stmt, False) - stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 64) - func = tvm.ir_pass.MakeAPI(stmt, "myadd", arg_list, 0, True) + bounds = tvm.te.schedule.InferBound(sch) + stmt = tvm.te.schedule.ScheduleOps(sch, bounds) + stmt = tvm.tir.ir_pass.LoopPartition(stmt, False) + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, binds, 64) + func = tvm.tir.ir_pass.MakeAPI(stmt, "myadd", arg_list, 0, True) return func @@ -46,63 +47,63 @@ def lower(sch, args): # So VerifyMemory pass is expected to succeed. # def test_verify_memory_all_bind(): - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B") + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B") # B is bound to threads. - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) bx, tx = s[B].split(B.op.axis[0], factor=64) - s[B].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B].bind(tx, tvm.thread_axis("threadIdx.x")) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(tx, te.thread_axis("threadIdx.x")) func = lower(s, [A, B]) for dev_type in gpu_devices + other_devices: - assert tvm.ir_pass.VerifyMemory(func, dev_type) + assert tvm.tir.ir_pass.VerifyMemory(func, dev_type) # Computations are not bound. # So VerifyMemory pass fails when device type is GPU. # def test_verify_memory_not_bind(): - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B") + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B") # B is not bound to threads. - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) func = lower(s, [A, B]) for dev_type in gpu_devices: - assert not tvm.ir_pass.VerifyMemory(func, dev_type) + assert not tvm.tir.ir_pass.VerifyMemory(func, dev_type) for dev_type in other_devices: - assert tvm.ir_pass.VerifyMemory(func, dev_type) + assert tvm.tir.ir_pass.VerifyMemory(func, dev_type) # Computations are partially bound. # So VerifyMemory pass fails when device type is GPU. # def test_verify_memory_partially_bind(): - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B") - C = tvm.compute(B.shape, lambda i: B[i] + 2.0, name="C") - D = tvm.compute(C.shape, lambda i: C[i] + 2.0, name="D") + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B") + C = te.compute(B.shape, lambda i: B[i] + 2.0, name="C") + D = te.compute(C.shape, lambda i: C[i] + 2.0, name="D") # C is bound to threads, but B and D are not. - s = tvm.create_schedule([B.op, C.op, D.op]) + s = te.create_schedule([B.op, C.op, D.op]) bx, tx = s[C].split(C.op.axis[0], factor=64) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) func = lower(s, [A, B, C, D]) for dev_type in gpu_devices: - assert not tvm.ir_pass.VerifyMemory(func, dev_type) + assert not tvm.tir.ir_pass.VerifyMemory(func, dev_type) for dev_type in other_devices: - assert tvm.ir_pass.VerifyMemory(func, dev_type) + assert tvm.tir.ir_pass.VerifyMemory(func, dev_type) if __name__ == "__main__": diff --git a/tests/python/unittest/test_pass_virtual_thread.py b/tests/python/unittest/test_pass_virtual_thread.py index 48a769f..2d96696 100644 --- a/tests/python/unittest/test_pass_virtual_thread.py +++ b/tests/python/unittest/test_pass_virtual_thread.py @@ -15,29 +15,30 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_virtual_thread(): - m = tvm.var('m') - A = tvm.placeholder((m, ), name='A') - A1 = tvm.compute((m,), lambda i: A[i], name='A1') - A2 = tvm.compute((m,), lambda i: A1[i] + 3, name='A2') + m = te.var('m') + A = te.placeholder((m, ), name='A') + A1 = te.compute((m,), lambda i: A[i], name='A1') + A2 = te.compute((m,), lambda i: A1[i] + 3, name='A2') - s = tvm.create_schedule(A2.op) - vx = tvm.thread_axis("vthread", name="vx") + s = te.create_schedule(A2.op) + vx = te.thread_axis("vthread", name="vx") xo, xi = s[A2].split(A2.op.axis[0], nparts=2) s[A2].bind(xo, vx) xo, xi = s[A2].split(xi, 8) s[A1].compute_at(s[A2], xo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) - Ab = tvm.decl_buffer(A.shape, A.dtype, name='A') - A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2') - stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64) - stmt = tvm.ir_pass.Simplify(stmt) - stmt = tvm.ir_pass.InjectVirtualThread(stmt) + Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name='A') + A2b = tvm.tir.decl_buffer(A2.shape, A2.dtype, name='A2') + stmt = tvm.tir.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64) + stmt = tvm.tir.ir_pass.Simplify(stmt) + stmt = tvm.tir.ir_pass.InjectVirtualThread(stmt) print(stmt) if __name__ == "__main__": diff --git a/tests/python/unittest/test_runtime_error.py b/tests/python/unittest/test_runtime_error.py index ac019a0..70166b3 100644 --- a/tests/python/unittest/test_runtime_error.py +++ b/tests/python/unittest/test_runtime_error.py @@ -16,6 +16,7 @@ # under the License. """Test runtime error handling""" import tvm +from tvm import te import tvm.testing def test_op_translation(): diff --git a/tests/python/unittest/test_runtime_extension.py b/tests/python/unittest/test_runtime_extension.py index 5207b09..375b99b 100644 --- a/tests/python/unittest/test_runtime_extension.py +++ b/tests/python/unittest/test_runtime_extension.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np @tvm.register_extension @@ -29,16 +30,16 @@ class MyTensorView(object): def test_dltensor_compatible(): dtype = 'int64' - n = tvm.var('n') - Ab = tvm.decl_buffer((n,), dtype) - i = tvm.var('i') - ib = tvm.ir_builder.create() + n = te.var('n') + Ab = tvm.tir.decl_buffer((n,), dtype) + i = te.var('i') + ib = tvm.tir.ir_builder.create() A = ib.buffer_ptr(Ab) with ib.for_range(0, n - 1, "i") as i: A[i + 1] = A[i] + 1 stmt = ib.get() - fapi = tvm.ir_pass.MakeAPI(stmt, "arange", [Ab], 0, True) - fapi = tvm.ir_pass.LowerTVMBuiltin(fapi) + fapi = tvm.tir.ir_pass.MakeAPI(stmt, "arange", [Ab], 0, True) + fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi) f = tvm.target.codegen.build_module(fapi, "stackvm") a = tvm.nd.array(np.zeros(10, dtype=dtype)) aview = MyTensorView(a) diff --git a/tests/python/unittest/test_runtime_graph.py b/tests/python/unittest/test_runtime_graph.py index da5bea1..ee2cd71 100644 --- a/tests/python/unittest/test_runtime_graph.py +++ b/tests/python/unittest/test_runtime_graph.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np import json from tvm import rpc @@ -22,9 +23,9 @@ from tvm.contrib import util, graph_runtime def test_graph_simple(): n = 4 - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) node0 = {"op": "null", "name": "x", "inputs": []} node1 = {"op": "tvm_op", "name": "add", diff --git a/tests/python/unittest/test_runtime_graph_debug.py b/tests/python/unittest/test_runtime_graph_debug.py index aeb4809..658d9eb 100644 --- a/tests/python/unittest/test_runtime_graph_debug.py +++ b/tests/python/unittest/test_runtime_graph_debug.py @@ -16,6 +16,7 @@ # under the License. import os import tvm +from tvm import te import numpy as np import json from tvm import rpc @@ -24,9 +25,9 @@ from tvm.contrib.debugger import debug_runtime as graph_runtime def test_graph_simple(): n = 4 - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) node0 = {"op": "null", "name": "x", "inputs": []} node1 = {"op": "tvm_op", "name": "add", diff --git a/tests/python/unittest/test_runtime_heterogeneous.py b/tests/python/unittest/test_runtime_heterogeneous.py index a718ed8..8ca61c1 100644 --- a/tests/python/unittest/test_runtime_heterogeneous.py +++ b/tests/python/unittest/test_runtime_heterogeneous.py @@ -20,6 +20,7 @@ import json import numpy as np import tvm +from tvm import te from tvm.contrib import graph_runtime, util import topi @@ -132,9 +133,9 @@ def test_simplex_data_transferring(): shape = (4,) # Create module for add whose target is the device. - tensor_a = tvm.placeholder(shape, name="A") - tensor_b = tvm.placeholder(shape, name="B") - elemwise_add = tvm.compute(shape, lambda *i: tensor_a(*i) + tensor_a = te.placeholder(shape, name="A") + tensor_b = te.placeholder(shape, name="B") + elemwise_add = te.compute(shape, lambda *i: tensor_a(*i) + tensor_b(*i), name="elemwise_add") target = topi.cpp.TEST_create_target(device) schedule_add = topi.cpp.cuda.schedule_injective(target, [elemwise_add]) @@ -144,13 +145,13 @@ def test_simplex_data_transferring(): # Insert copy. Neither compute nor schedule is required for the copy # node. The compute will be performed at runtime which is just data # copy from the input to the output. - tensor_copy = tvm.placeholder(shape, name="__copy") + tensor_copy = te.placeholder(shape, name="__copy") # Create module for sub whose target is the host. - tensor_c = tvm.placeholder(shape, name="C") - elemwise_sub = tvm.compute(shape, lambda *i: tensor_copy(*i) + tensor_c = te.placeholder(shape, name="C") + elemwise_sub = te.compute(shape, lambda *i: tensor_copy(*i) - tensor_c(*i), name="elemwise_sub") - schedule_sub = tvm.create_schedule(elemwise_sub.op) + schedule_sub = te.create_schedule(elemwise_sub.op) lower_sub = tvm.lower(schedule_sub, [tensor_copy, tensor_c, elemwise_sub], name="elemwise_sub") @@ -321,17 +322,17 @@ def test_duplex_data_transferring(): # Insert copy nodes for data transferring between add and sub nodes. # Transfers data from gpu to cpu. - copy_add_sub = tvm.placeholder(shape, name="__copy0") + copy_add_sub = te.placeholder(shape, name="__copy0") # Transfers data from cpu to gpu. - copy_sub_add = tvm.placeholder(shape, name="__copy1") + copy_sub_add = te.placeholder(shape, name="__copy1") # Create a module containing adds on the device. - tensor_a = tvm.placeholder(shape, name="A") - tensor_b = tvm.placeholder(shape, name="B") - tensor_d = tvm.placeholder(shape, name="D") - elemwise_add0 = tvm.compute(shape, lambda *i: tensor_a(*i) + tensor_a = te.placeholder(shape, name="A") + tensor_b = te.placeholder(shape, name="B") + tensor_d = te.placeholder(shape, name="D") + elemwise_add0 = te.compute(shape, lambda *i: tensor_a(*i) + tensor_b(*i), name="elemwise_add0") - elemwise_add1 = tvm.compute(shape, lambda *i: copy_sub_add(*i) + elemwise_add1 = te.compute(shape, lambda *i: copy_sub_add(*i) + tensor_d(*i), name="elemwise_add1") target = topi.cpp.TEST_create_target(device) add_schedule0 = topi.cpp.cuda.schedule_injective( @@ -345,10 +346,10 @@ def test_duplex_data_transferring(): add_schedule1, [tensor_d, copy_sub_add, elemwise_add1], name="elemwise_add1") # Create module for sub whose target is the host. - tensor_c = tvm.placeholder(shape, name="C") - elemwise_sub = tvm.compute(shape, lambda *i: copy_add_sub(*i) + tensor_c = te.placeholder(shape, name="C") + elemwise_sub = te.compute(shape, lambda *i: copy_add_sub(*i) - tensor_c(*i), name="elemwise_sub") - sub_schedule = tvm.create_schedule(elemwise_sub.op) + sub_schedule = te.create_schedule(elemwise_sub.op) lower_sub = tvm.lower(sub_schedule, [copy_add_sub, tensor_c, elemwise_sub], name="elemwise_sub") diff --git a/tests/python/unittest/test_runtime_measure.py b/tests/python/unittest/test_runtime_measure.py index 7413a37..25361a1 100644 --- a/tests/python/unittest/test_runtime_measure.py +++ b/tests/python/unittest/test_runtime_measure.py @@ -18,6 +18,7 @@ import time import ctypes import tvm +from tvm import te from tvm.contrib.util import tempdir @@ -32,8 +33,8 @@ def test_min_repeat_ms(): with open(filename, "a") as fout: fout.write("c") - X = tvm.compute((), lambda : tvm.call_packed("my_debug", filename)) - s = tvm.create_schedule(X.op) + X = te.compute((), lambda : tvm.tir.call_packed("my_debug", filename)) + s = te.create_schedule(X.op) func = tvm.build(s, [X]) x = tvm.nd.empty((), dtype="int32") diff --git a/tests/python/unittest/test_runtime_micro.py b/tests/python/unittest/test_runtime_micro.py index f6114da..28fdb11 100644 --- a/tests/python/unittest/test_runtime_micro.py +++ b/tests/python/unittest/test_runtime_micro.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te from tvm.contrib import graph_runtime, util from tvm import relay import tvm.micro as micro @@ -46,7 +47,7 @@ def relay_micro_build(func, dev_config, params=None): mod : tvm.runtime.Module graph runtime module for the target device """ - with tvm.build_config(disable_vectorize=True): + with tvm.target.build_config(disable_vectorize=True): graph, c_mod, params = relay.build(func, target="c", params=params) micro_mod = create_micro_mod(c_mod, dev_config) ctx = tvm.micro_dev(0) @@ -76,11 +77,11 @@ def test_add(): dtype = "float32" # Construct TVM expression. - tvm_shape = tvm.convert(shape) - A = tvm.placeholder(tvm_shape, name="A", dtype=dtype) - B = tvm.placeholder(tvm_shape, name="B", dtype=dtype) - C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name="C") - s = tvm.create_schedule(C.op) + tvm_shape = tvm.runtime.convert(shape) + A = te.placeholder(tvm_shape, name="A", dtype=dtype) + B = te.placeholder(tvm_shape, name="B", dtype=dtype) + C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name="C") + s = te.create_schedule(C.op) func_name = "fadd" c_mod = tvm.build(s, [A, B, C], target="c", name=func_name) @@ -105,12 +106,12 @@ def test_workspace_add(): dtype = "float32" # Construct TVM expression. - tvm_shape = tvm.convert(shape) - A = tvm.placeholder(tvm_shape, name="A", dtype=dtype) - B = tvm.placeholder(tvm_shape, name="B", dtype=dtype) - B = tvm.compute(A.shape, lambda *i: A(*i) + 1, name="B") - C = tvm.compute(A.shape, lambda *i: B(*i) + 1, name="C") - s = tvm.create_schedule(C.op) + tvm_shape = tvm.runtime.convert(shape) + A = te.placeholder(tvm_shape, name="A", dtype=dtype) + B = te.placeholder(tvm_shape, name="B", dtype=dtype) + B = te.compute(A.shape, lambda *i: A(*i) + 1, name="B") + C = te.compute(A.shape, lambda *i: B(*i) + 1, name="C") + s = te.create_schedule(C.op) func_name = "fadd_two_workspace" c_mod = tvm.build(s, [A, C], target="c", name=func_name) diff --git a/tests/python/unittest/test_runtime_module_export.py b/tests/python/unittest/test_runtime_module_export.py index ee82da6..35bafb4 100644 --- a/tests/python/unittest/test_runtime_module_export.py +++ b/tests/python/unittest/test_runtime_module_export.py @@ -17,6 +17,7 @@ from tvm import relay from tvm.relay import testing import tvm +from tvm import te from tvm.contrib import util header_file_dir_path = util.tempdir() @@ -95,9 +96,9 @@ def test_mod_export(): with relay.build_config(opt_level=3): _, resnet18_cpu_lib, _ = relay.build_module.build(resnet18_mod, "llvm", params=resnet18_params) - A = tvm.placeholder((1024,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((1024,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) f = tvm.build(s, [A, B], "llvm", name="myadd") from tvm.contrib import util temp = util.tempdir() @@ -144,9 +145,9 @@ def test_mod_export(): f.write(subgraph_json) # Get Json and module. - A = tvm.placeholder((1024,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((1024,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) f = tvm.build(s, [A, B], "llvm", name="myadd") try: ext_lib = tvm.runtime.load_module(subgraph_path, "examplejson") @@ -179,9 +180,9 @@ def test_mod_export(): with relay.build_config(opt_level=3): _, resnet18_cpu_lib, _ = relay.build_module.build(resnet18_mod, "llvm", params=resnet18_params) - A = tvm.placeholder((1024,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder((1024,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) f = tvm.build(s, [A, B], "c", name="myadd") engine_module = generate_engine_module() from tvm.contrib import util diff --git a/tests/python/unittest/test_runtime_module_load.py b/tests/python/unittest/test_runtime_module_load.py index 1cbc157..e7771e3 100644 --- a/tests/python/unittest/test_runtime_module_load.py +++ b/tests/python/unittest/test_runtime_module_load.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm.contrib import cc, util import ctypes import os @@ -29,6 +30,7 @@ import sys os.environ["TVM_USE_RUNTIME_LIB"] = "1" os.environ["TVM_FFI"] = "ctypes" import tvm +from tvm import te import numpy as np path_dso = sys.argv[1] dtype = sys.argv[2] @@ -46,17 +48,17 @@ def test_dso_module_load(): temp = util.tempdir() def save_object(names): - n = tvm.size_var('n') - Ab = tvm.decl_buffer((n, ), dtype) - i = tvm.var('i') + n = te.size_var('n') + Ab = tvm.tir.decl_buffer((n, ), dtype) + i = te.var('i') # for i in 0 to n-1: stmt = tvm.tir.For( i, 0, n - 1, 0, 0, tvm.tir.Store(Ab.data, tvm.tir.Load(dtype, Ab.data, i) + 1, i + 1)) - fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True) - fapi = tvm.ir_pass.LowerTVMBuiltin(fapi) + fapi = tvm.tir.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True) + fapi = tvm.tir.ir_pass.LowerTVMBuiltin(fapi) m = tvm.target.codegen.build_module(fapi, "llvm") for name in names: m.save(name) @@ -88,15 +90,15 @@ def test_dso_module_load(): def test_device_module_dump(): # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) # create iter var and assign them tags. num_thread = 8 bx, tx = s[B].split(B.op.axis[0], factor=num_thread) - s[B].bind(bx, tvm.thread_axis("blockIdx.x")) - s[B].bind(tx, tvm.thread_axis("threadIdx.x")) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(tx, te.thread_axis("threadIdx.x")) def check_device(device): ctx = tvm.context(device, 0) @@ -150,10 +152,10 @@ def test_combine_module_llvm(): """Test combine multiple module into one shared lib.""" # graph nn = 12 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(nn) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) def check_llvm(): ctx = tvm.cpu(0) diff --git a/tests/python/unittest/test_runtime_ndarray.py b/tests/python/unittest/test_runtime_ndarray.py index ed23a0b..e314379 100644 --- a/tests/python/unittest/test_runtime_ndarray.py +++ b/tests/python/unittest/test_runtime_ndarray.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def enabled_ctx_list(): @@ -55,10 +56,10 @@ def test_fp16_conversion(): n = 100 for (src, dst) in [('float32', 'float16'), ('float16', 'float32')]: - A = tvm.placeholder((n,), dtype=src) - B = tvm.compute((n,), lambda i: A[i].astype(dst)) + A = te.placeholder((n,), dtype=src) + B = te.compute((n,), lambda i: A[i].astype(dst)) - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) func = tvm.build(s, [A, B], 'llvm') x_tvm = tvm.nd.array(100 * np.random.randn(n).astype(src) - 50) diff --git a/tests/python/unittest/test_runtime_packed_func.py b/tests/python/unittest/test_runtime_packed_func.py index 4f73770..3570fe1 100644 --- a/tests/python/unittest/test_runtime_packed_func.py +++ b/tests/python/unittest/test_runtime_packed_func.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import tvm.testing import numpy as np @@ -32,12 +33,12 @@ def test_get_global(): assert y == 10 def test_get_callback_with_node(): - x = tvm.convert(10) + x = tvm.runtime.convert(10) def test(y): assert y.handle != x.handle return y - f2 = tvm.convert(test) + f2 = tvm.runtime.convert(test) # register into global function table @tvm.register_func def my_callback_with_node(y, f): @@ -54,9 +55,9 @@ def test_get_callback_with_node(): def test_return_func(): def addy(y): def add(x): - return tvm.convert(x + y) + return tvm.runtime.convert(x + y) return add - myf = tvm.convert(addy) + myf = tvm.runtime.convert(addy) f = myf(10) assert f(11).value == 21 @@ -67,7 +68,7 @@ def test_convert(): def myfunc(*args): assert(tuple(args) == targs) - f = tvm.convert(myfunc) + f = tvm.runtime.convert(myfunc) assert isinstance(f, tvm.runtime.PackedFunc) def test_byte_array(): @@ -76,15 +77,15 @@ def test_byte_array(): def myfunc(ss): assert ss == a - f = tvm.convert(myfunc) + f = tvm.runtime.convert(myfunc) f(a) def test_empty_array(): def myfunc(ss): assert tuple(ss) == () - x = tvm.convert(()) - tvm.convert(myfunc)(x) + x = tvm.runtime.convert(()) + tvm.runtime.convert(myfunc)(x) def test_ctx(): @@ -99,25 +100,25 @@ def test_ctx(): def test_trace_default_action(): n = 2 - x = tvm.placeholder((n,n,n), name="X", dtype="float32") - y = tvm.compute(x.shape, lambda i, j, k: tvm.trace([i, j, k, x[i][j][k]])) - s = tvm.create_schedule(y.op) + x = te.placeholder((n,n,n), name="X", dtype="float32") + y = te.compute(x.shape, lambda i, j, k: tvm.tir.trace([i, j, k, x[i][j][k]])) + s = te.create_schedule(y.op) f = tvm.build(s, [x, y], target="llvm") xnd = tvm.nd.array(np.ones((n,n,n), dtype=x.dtype)) ynd = tvm.nd.array(np.zeros((n,n,n), dtype=y.dtype)) f(xnd, ynd) def test_trace_expr_assign(): - @tvm.register_func("tvm.trace_callback2") + @tvm.register_func("tvm.tir.trace_callback2") def trace_buffer(x): return def check_assign(dtype): n = 4 - x = tvm.placeholder((n,n,n), name="X", dtype=dtype) - y = tvm.compute(x.shape, lambda i, j, k: tvm.trace([x[i][j][k]], "tvm.trace_callback2")) - z = tvm.compute(x.shape, lambda i, j, k: tvm.trace([y[i][j][k]], "tvm.trace_callback2")) - s = tvm.create_schedule(z.op) + x = te.placeholder((n,n,n), name="X", dtype=dtype) + y = te.compute(x.shape, lambda i, j, k: tvm.tir.trace([x[i][j][k]], "tvm.tir.trace_callback2")) + z = te.compute(x.shape, lambda i, j, k: tvm.tir.trace([y[i][j][k]], "tvm.tir.trace_callback2")) + s = te.create_schedule(z.op) f = tvm.build(s, [x, y, z], "llvm") xnd = tvm.nd.array(np.ones((n,n,n), dtype=x.dtype)) @@ -133,17 +134,17 @@ def test_trace_expr_assign(): check_assign(t) def test_trace_expr_sum_generated(): - @tvm.register_func("tvm.trace_callback3") + @tvm.register_func("tvm.tir.trace_callback3") def trace_buffer(x): return def check_expr_sum(dtype): n = 4 - a = tvm.placeholder((n,n,n), name="a", dtype=dtype) - b = tvm.placeholder((n,n,n), name="b", dtype=dtype) - c = tvm.compute(a.shape, lambda i, j, k: tvm.trace([a[i][j][k]],"tvm.trace_callback3") - + tvm.trace([b[i][j][k]],"tvm.trace_callback3")) - s = tvm.create_schedule(c.op) + a = te.placeholder((n,n,n), name="a", dtype=dtype) + b = te.placeholder((n,n,n), name="b", dtype=dtype) + c = te.compute(a.shape, lambda i, j, k: tvm.tir.trace([a[i][j][k]],"tvm.tir.trace_callback3") + + tvm.tir.trace([b[i][j][k]],"tvm.tir.trace_callback3")) + s = te.create_schedule(c.op) f = tvm.build(s, [a, b, c]) xnd = tvm.nd.array(np.array(np.ones((n,n,n), dtype=a.dtype))) ynd = tvm.nd.array(np.array(np.ones((n,n,n), dtype=b.dtype))) @@ -155,22 +156,22 @@ def test_trace_expr_sum_generated(): check_expr_sum(t) def test_trace_expr_sum_args(): - @tvm.register_func("tvm.trace_silent") + @tvm.register_func("tvm.tir.trace_silent") def silent(*args): return def check_expr_sum(dtype): n = 4 - a = tvm.placeholder((n,n,n), name="a", dtype=dtype) - b = tvm.placeholder((n,n,n), name="b", dtype=dtype) - e = tvm.placeholder((n,n,n), name="e", dtype=dtype) - d = tvm.placeholder((n,n,n), name="d", dtype=dtype) - - c = tvm.compute(a.shape, lambda i, j, k: tvm.trace([i, j, k, a[i][j][k]], "tvm.trace_silent") - + tvm.trace([i, j, k, b[i][j][k]], "tvm.trace_silent") - + tvm.trace([i, j, k, d[i][j][k]], "tvm.trace_silent") - + tvm.trace([i, j, k, e[i][j][k]], "tvm.trace_silent")) - s = tvm.create_schedule(c.op) + a = te.placeholder((n,n,n), name="a", dtype=dtype) + b = te.placeholder((n,n,n), name="b", dtype=dtype) + e = te.placeholder((n,n,n), name="e", dtype=dtype) + d = te.placeholder((n,n,n), name="d", dtype=dtype) + + c = te.compute(a.shape, lambda i, j, k: tvm.tir.trace([i, j, k, a[i][j][k]], "tvm.tir.trace_silent") + + tvm.tir.trace([i, j, k, b[i][j][k]], "tvm.tir.trace_silent") + + tvm.tir.trace([i, j, k, d[i][j][k]], "tvm.tir.trace_silent") + + tvm.tir.trace([i, j, k, e[i][j][k]], "tvm.tir.trace_silent")) + s = te.create_schedule(c.op) f = tvm.build(s, [a, b, d, e, c]) a_nd = tvm.nd.array(np.array(np.ones((n,n,n), dtype=a.dtype))) b_nd = tvm.nd.array(np.array(np.ones((n,n,n), dtype=b.dtype))) @@ -187,17 +188,17 @@ def test_trace_expr_sum_args(): check_expr_sum(t) def test_trace_expr_sum_custom(): - @tvm.register_func("tvm.trace_callback4") + @tvm.register_func("tvm.tir.trace_callback4") def trace_buffer(x): return def check_expr_sum_custom(dtype): n = 4 - a = tvm.placeholder((n,n), name="a", dtype=dtype) - b = tvm.placeholder((n,n), name="b", dtype=dtype) - c = tvm.compute(a.shape, lambda i,j: tvm.trace([a[i][j]], "tvm.trace_callback4") - + tvm.trace([b[i][j]], "tvm.trace_callback4")) - s = tvm.create_schedule(c.op) + a = te.placeholder((n,n), name="a", dtype=dtype) + b = te.placeholder((n,n), name="b", dtype=dtype) + c = te.compute(a.shape, lambda i,j: tvm.tir.trace([a[i][j]], "tvm.tir.trace_callback4") + + tvm.tir.trace([b[i][j]], "tvm.tir.trace_callback4")) + s = te.create_schedule(c.op) f = tvm.build(s, [a, b, c]) npa = np.array([[1,0,0,0], [0,1,0,0],[0,0,1,0],[0,0,0,1]], dtype=a.dtype) npb = np.array([[1,0,0,0], [0,1,0,0],[0,0,1,0],[0,0,0,1]], dtype=a.dtype) @@ -211,20 +212,20 @@ def test_trace_expr_sum_custom(): check_expr_sum_custom(t) def test_trace_can_change_traced_value_int(): - @tvm.register_func("tvm.trace_change_int_first") + @tvm.register_func("tvm.tir.trace_change_int_first") def trace_buffer(x): return 13 - @tvm.register_func("tvm.trace_change_int_second") + @tvm.register_func("tvm.tir.trace_change_int_second") def trace_buffer(x): return 14 def check_assign(dtype): n = 4 - x = tvm.placeholder((n,), name="X", dtype=dtype) - y = tvm.compute(x.shape, lambda i: tvm.trace([x[i]], "tvm.trace_change_int_first")) - z = tvm.compute(x.shape, lambda i: tvm.trace([y[i]], "tvm.trace_change_int_second")) - s = tvm.create_schedule(z.op) + x = te.placeholder((n,), name="X", dtype=dtype) + y = te.compute(x.shape, lambda i: tvm.tir.trace([x[i]], "tvm.tir.trace_change_int_first")) + z = te.compute(x.shape, lambda i: tvm.tir.trace([y[i]], "tvm.tir.trace_change_int_second")) + s = te.create_schedule(z.op) f = tvm.build(s, [x, y, z], "llvm") xnd = tvm.nd.array(np.ones((n,), dtype=x.dtype)) @@ -240,20 +241,20 @@ def test_trace_can_change_traced_value_int(): check_assign(t) def test_trace_can_change_traced_value_float(): - @tvm.register_func("tvm.trace_change_float_first") + @tvm.register_func("tvm.tir.trace_change_float_first") def trace_buffer(x): return 13.0 - @tvm.register_func("tvm.trace_change_float_second") + @tvm.register_func("tvm.tir.trace_change_float_second") def trace_buffer(x): return 14.0 def check_assign(dtype): n = 4 - x = tvm.placeholder((n,), name="X", dtype=dtype) - y = tvm.compute(x.shape, lambda i: tvm.trace([x[i]], "tvm.trace_change_float_first")) - z = tvm.compute(x.shape, lambda i: tvm.trace([y[i]], "tvm.trace_change_float_second")) - s = tvm.create_schedule(z.op) + x = te.placeholder((n,), name="X", dtype=dtype) + y = te.compute(x.shape, lambda i: tvm.tir.trace([x[i]], "tvm.tir.trace_change_float_first")) + z = te.compute(x.shape, lambda i: tvm.tir.trace([y[i]], "tvm.tir.trace_change_float_second")) + s = te.create_schedule(z.op) f = tvm.build(s, [x, y, z], "llvm") xnd = tvm.nd.array(np.ones((n,), dtype=x.dtype)) diff --git a/tests/python/unittest/test_runtime_rpc.py b/tests/python/unittest/test_runtime_rpc.py index 75169da..1d9b79e 100644 --- a/tests/python/unittest/test_runtime_rpc.py +++ b/tests/python/unittest/test_runtime_rpc.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import tvm.testing import os import logging @@ -34,9 +35,9 @@ def test_bigendian_rpc(): if host is None: return def verify_rpc(remote, target, shape, dtype): - A = tvm.placeholder(shape, dtype=dtype) - B = tvm.compute(A.shape, lambda i: A[i]+tvm.const(1, A.dtype)) - s = tvm.create_schedule(B.op) + A = te.placeholder(shape, dtype=dtype) + B = te.compute(A.shape, lambda i: A[i]+tvm.tir.const(1, A.dtype)) + s = te.create_schedule(B.op) f = tvm.build(s, [A, B], target, name="myadd") ctx = remote.cpu(0) @@ -116,10 +117,10 @@ def test_rpc_remote_module(): server = rpc.Server("localhost") client = rpc.connect(server.host, server.port) # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) def check_remote(remote): if not tvm.runtime.enabled("llvm"): @@ -155,10 +156,10 @@ def test_rpc_remote_module(): return temp = util.tempdir() ctx = remote.cl(0) - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=32) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) - s[B].bind(xi, tvm.thread_axis("threadIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) + s[B].bind(xi, te.thread_axis("threadIdx.x")) f = tvm.build(s, [A, B], "opencl", target_host="llvm", name="myadd") # Option 1: save modules separately and rely on remote compiler path_o = temp.relpath("myadd.o") diff --git a/tests/python/unittest/test_runtime_vm_profiler.py b/tests/python/unittest/test_runtime_vm_profiler.py index 849a9ef..064b733 100644 --- a/tests/python/unittest/test_runtime_vm_profiler.py +++ b/tests/python/unittest/test_runtime_vm_profiler.py @@ -17,6 +17,7 @@ import numpy as np import tvm +from tvm import te from tvm.runtime import profiler_vm from tvm import relay from tvm.relay.testing import resnet diff --git a/tests/python/unittest/test_schedule_bound_inference.py b/tests/python/unittest/test_schedule_bound_inference.py index 9c3d1df..484aa50 100644 --- a/tests/python/unittest/test_schedule_bound_inference.py +++ b/tests/python/unittest/test_schedule_bound_inference.py @@ -15,81 +15,82 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_bound1(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule([A2.op]) + s = te.create_schedule([A2.op]) xo, xi = s[A2].split(s[A2].op.axis[0], 8) s[A1].compute_at(s[A2], xo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) assert(bounds[A1.op.axis[0]].extent.value == 8) def test_bound2(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + s = te.create_schedule(A2.op) xo, yo, xi, yi = s[A2].tile(A2.op.axis[0], A2.op.axis[1], 8, 8) # test normalize not affecting schedule _ = s.normalize() s[A1].compute_at(s[A2], yo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) assert(bounds[A1.op.axis[0]].extent.value == 8) assert(bounds[A1.op.axis[1]].extent.value == 8) def test_bound3(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + s = te.create_schedule(A2.op) s[A1].set_scope("shared") xo, xi = s[A2].split(A2.op.axis[0], 32) xi0, xi1 = s[A2].split(xi, nparts=16) - s[A2].bind(xi0, tvm.thread_axis("threadIdx.x")) + s[A2].bind(xi0, te.thread_axis("threadIdx.x")) yo, yi = s[A2].split(A2.op.axis[1], 16) # test normalize not affecting schedule _ = s.normalize() s[A2].reorder(xo, xi0, yo, xi1, yi) s[A1].compute_at(s[A2], yo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) assert(bounds[A1.op.axis[0]].extent.value==32) assert(bounds[A1.op.axis[1]].extent.value==16) def test_bound_split_divisible(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((8 * m, l), name='A') - B = tvm.compute((8 * m, l), lambda i, j: A[i, j], name='B') - s = tvm.create_schedule(B.op) + m = te.var('m') + l = te.var('l') + A = te.placeholder((8 * m, l), name='A') + B = te.compute((8 * m, l), lambda i, j: A[i, j], name='B') + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], 8) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) assert bounds[xo].extent == m assert bounds[xi].extent.value == 8 def test_bound_tile_divisible(): - m = tvm.var('m') - l = tvm.var('l') + m = te.var('m') + l = te.var('l') shape = (8 * m, 32 * l) - A = tvm.placeholder(shape, name='A') - B = tvm.compute(shape, lambda i, j: A[i, j], name='B') - s = tvm.create_schedule(B.op) + A = te.placeholder(shape, name='A') + B = te.compute(shape, lambda i, j: A[i, j], name='B') + s = te.create_schedule(B.op) xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], 8, 32) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) assert bounds[xo].extent == m assert bounds[xi].extent.value == 8 @@ -97,165 +98,165 @@ def test_bound_tile_divisible(): assert bounds[yi].extent.value == 32 def test_bound_fusesplit1(): - m = tvm.var('m') - l = tvm.var('l') - split1 = tvm.var('s') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - - s = tvm.create_schedule(A2.op) + m = te.var('m') + l = te.var('l') + split1 = te.var('s') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + + s = te.create_schedule(A2.op) fused_axes = s[A2].fuse(A2.op.axis[0], A2.op.axis[1]) xo, xi = s[A2].split(fused_axes, split1) s[A1].compute_at(s[A2], xo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - idxdiv = tvm.indexdiv - assert(tvm.ir_pass.Simplify( + idxdiv = tvm.tir.indexdiv + assert(tvm.tir.ir_pass.Simplify( bounds[A1.op.axis[0]].min - idxdiv(xo * split1, l)).value == 0) expected_extent = (idxdiv((xo + 1) * split1 - 1, l) - idxdiv(xo * split1, l) + 1) for i in range(1, 6): for j in range(1, 6): for k in range(1, 6): - vars = tvm.convert({split1: tvm.const(i, "int32"), l: tvm.const(j, "int32"), xo.var: tvm.const(k, "int32")}) - comp_ext = tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[0]].extent, vars)).value - exp_ext = tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(expected_extent, vars)).value + vars = tvm.runtime.convert({split1: tvm.tir.const(i, "int32"), l: tvm.tir.const(j, "int32"), xo.var: tvm.tir.const(k, "int32")}) + comp_ext = tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[0]].extent, vars)).value + exp_ext = tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(expected_extent, vars)).value assert(comp_ext == exp_ext) - assert(tvm.ir_pass.Simplify(bounds[A1.op.axis[1]].extent - l).value == 0) + assert(tvm.tir.ir_pass.Simplify(bounds[A1.op.axis[1]].extent - l).value == 0) def test_bound_fusesplit2(): - m = tvm.var("m") - l = tvm.convert(6) - split = tvm.convert(3) - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - - s = tvm.create_schedule(A2.op) + m = te.var("m") + l = tvm.runtime.convert(6) + split = tvm.runtime.convert(3) + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + + s = te.create_schedule(A2.op) fused_axes = s[A2].fuse(A2.op.axis[0], A2.op.axis[1]) xo, xi = s[A2].split(fused_axes, split) s[A1].compute_at(s[A2], xo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - vars = tvm.convert({xo.var: tvm.const(5, "int32")}) - assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[0]].min, vars)).value == 2) - assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[1]].min, vars)).value == 3) - assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[0]].extent, vars)).value == 1) - assert(tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(bounds[A1.op.axis[1]].extent, vars)).value == 3) + vars = tvm.runtime.convert({xo.var: tvm.tir.const(5, "int32")}) + assert(tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[0]].min, vars)).value == 2) + assert(tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[1]].min, vars)).value == 3) + assert(tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[0]].extent, vars)).value == 1) + assert(tvm.tir.ir_pass.Simplify(tvm.tir.ir_pass.Substitute(bounds[A1.op.axis[1]].extent, vars)).value == 3) def test_bound_warp(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + s = te.create_schedule(A2.op) s[A1].set_scope("warp") xo, xi = s[A2].split(A2.op.axis[0], 32) xi0, xi1 = s[A2].split(xi, factor=16) - tx = tvm.thread_axis("threadIdx.x") + tx = te.thread_axis("threadIdx.x") s[A2].bind(xi1, tx) - s[A2].bind(xi0, tvm.thread_axis("threadIdx.y")) + s[A2].bind(xi0, te.thread_axis("threadIdx.y")) y = s[A2].op.axis[1] s[A1].compute_at(s[A2], y) xo, xi = s[A1].split(s[A1].op.axis[0], factor=16) s[A1].bind(xi, tx) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) assert(bounds[A1.op.axis[0]].extent.value==16) def test_bound_scan(): - m = tvm.var("m") - n = tvm.var("n") - X = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - s_state = tvm.placeholder((m, n)) - s_init = tvm.compute((1, n), lambda _, i: X[0, i]) - s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) - s_scan = tvm.scan(s_init, s_update, s_state) + m = te.var("m") + n = te.var("n") + X = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + s_state = te.placeholder((m, n)) + s_init = te.compute((1, n), lambda _, i: X[0, i]) + s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) + s_scan = tvm.te.scan(s_init, s_update, s_state) assert tuple(s_scan.shape) == (m, n) - s = tvm.create_schedule(s_scan.op) + s = te.create_schedule(s_scan.op) XX = s.cache_read(X, "local", s_update) xo, xi = s[s_update].split(s_update.op.axis[1], factor=4) s[XX].compute_at(s[s_update], xo) s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) assert bounds[XX.op.axis[1]].extent.value == 4 def test_bound_conv1d(): - n = tvm.var('n') - A = tvm.compute((n+2), lambda i: 1, name='A') + n = te.var('n') + A = te.compute((n+2), lambda i: 1, name='A') def computeB(ii): i = ii + 1 return A[i-1] + A[i] + A[i+1] - B = tvm.compute(n, computeB, name='B') - s = tvm.create_schedule(B.op) + B = te.compute(n, computeB, name='B') + s = te.create_schedule(B.op) s[A].compute_at(s[B], B.op.axis[0]) s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert(bounds[A.op.axis[0]].extent.value == 3) def test_bound_blur(): - n = tvm.convert(12) - A = tvm.compute((n, n), lambda i, j: 1, name='A') + n = tvm.runtime.convert(12) + A = te.compute((n, n), lambda i, j: 1, name='A') def computeB(ii, jj): # set the correct center i = ii + 1 j = jj + 1 return A[i][j] + A[i-1][j] + A[i+1][j] + A[i][j+1] + A[i][j-1] - B = tvm.compute((n-2, n-2), computeB, name='B') - s = tvm.create_schedule(B.op) + B = te.compute((n-2, n-2), computeB, name='B') + s = te.create_schedule(B.op) s[A].compute_at(s[B], B.op.axis[1]) s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert(bounds[A.op.axis[0]].extent.value == 3) assert(bounds[A.op.axis[1]].extent.value == 3) def test_bound_rfactor(): - n = tvm.var('n') - A = tvm.placeholder((n,), name='A') - k = tvm.reduce_axis((0, n)) - B = tvm.compute((1,), lambda i: tvm.sum(A[k], axis=k, where=(i>1)), name='B') + n = te.var('n') + A = te.placeholder((n,), name='A') + k = te.reduce_axis((0, n)) + B = te.compute((1,), lambda i: te.sum(A[k], axis=k, where=(i>1)), name='B') # schedule - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) kf, ki = s[B].split(k, nparts=4) BF = s.rfactor(B, kf) s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert(bounds[BF.op.axis[0]].extent.value == 4) assert(bounds[BF.op.axis[1]].extent.value == 1) def test_bound_group_schedule(): - m = tvm.var("m") - n = tvm.var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - x1 = tvm.compute(x.shape, lambda *i: x(*i) + 1, name="x1") - x2 = tvm.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") - s = tvm.create_schedule(x2.op) + m = te.var("m") + n = te.var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + x1 = te.compute(x.shape, lambda *i: x(*i) + 1, name="x1") + x2 = te.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") + s = te.create_schedule(x2.op) g = s.create_group(outputs=x1, inputs=x, include_inputs=True) g.compute_at(s[x2], x2.op.axis[0]) assert s[x1].group == g assert s[x].group == g s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert bounds[x.op.axis[0]].extent.value == 1 assert bounds[x.op.axis[1]].extent == n def test_bound_nest_group(): - m = tvm.var("m") - n = tvm.var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - x1 = tvm.compute(x.shape, lambda *i: x(*i) + 1, name="x1") - x2 = tvm.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") - s = tvm.create_schedule(x2.op) + m = te.var("m") + n = te.var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + x1 = te.compute(x.shape, lambda *i: x(*i) + 1, name="x1") + x2 = te.compute(x.shape, lambda *i: x1(*i) + 2, name="x2") + s = te.create_schedule(x2.op) g1 = s.create_group(outputs=x, inputs=x, include_inputs=True) g2 = s.create_group(outputs=x1, inputs=x, include_inputs=True) assert s[x].group == g1 @@ -263,7 +264,7 @@ def test_bound_nest_group(): g2.compute_at(s[x2], x2.op.axis[0]) g1.compute_at(s[x1], s[x1].op.axis[1]) s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert bounds[x.op.axis[0]].extent.value == 1 assert bounds[x.op.axis[1]].extent.value == 1 assert bounds[x1.op.axis[0]].extent.value == 1 @@ -271,18 +272,18 @@ def test_bound_nest_group(): def test_bound_nest_thread(): - m = tvm.var('m') - A = tvm.placeholder((m), name='A') - A1 = tvm.compute((m,), lambda i: A[i], name='A1') - A2 = tvm.compute((m,), lambda i: A1[i] + 2, name='A2') - A3 = tvm.compute((m,), lambda i: A2[i] + 3, name='A3') + m = te.var('m') + A = te.placeholder((m), name='A') + A1 = te.compute((m,), lambda i: A[i], name='A1') + A2 = te.compute((m,), lambda i: A1[i] + 2, name='A2') + A3 = te.compute((m,), lambda i: A2[i] + 3, name='A3') - s = tvm.create_schedule(A3.op) + s = te.create_schedule(A3.op) s[A2].set_scope("shared") s[A1].set_scope("local") - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis("threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis("threadIdx.x") bx, tx = s[A3].split(A3.op.axis[0], factor=32) s[A3].bind(bx, block_x) s[A3].bind(tx, thread_x) @@ -291,31 +292,31 @@ def test_bound_nest_thread(): s[A2].bind(xi, thread_x) s[A1].compute_at(s[A3], tx) s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert(bounds[A1.op.axis[0]].extent.value==1) assert(bounds[A2.op.axis[0]].extent.value==32) assert(bounds[A3.op.axis[0]].extent == m) def test_gemm_bound(): nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n, n), name='A') - B = tvm.placeholder((n, n), name='B') - k = tvm.reduce_axis((0, n), name='k') - C = tvm.compute( + n = tvm.runtime.convert(nn) + A = te.placeholder((n, n), name='A') + B = te.placeholder((n, n), name='B') + k = te.reduce_axis((0, n), name='k') + C = te.compute( (n, n), - lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k), + lambda ii, jj: te.sum(A[ii, k] * B[jj, k], axis=k), name='CC') # schedule - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) xtile, ytile = 32, 32 scale = 8 num_thread = 8 block_factor = scale * num_thread - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis("threadIdx.x") - block_y = tvm.thread_axis("blockIdx.y") - thread_y = tvm.thread_axis("threadIdx.y") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis("threadIdx.x") + block_y = te.thread_axis("blockIdx.y") + thread_y = te.thread_axis("threadIdx.y") CC = s.cache_write(C, "local") AA = s.cache_read(A, "shared", [CC]) @@ -347,7 +348,7 @@ def test_gemm_bound(): s[BB].bind(ty, thread_y) s[BB].bind(tx, thread_x) s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert(bounds[BB.op.axis[0]].extent.value==64) assert(bounds[AA.op.axis[0]].extent.value==64) assert(bounds[CC.op.axis[0]].extent.value == 8) @@ -356,54 +357,54 @@ def test_gemm_bound(): def test_bound_tensor_compute_op(): def intrin_test(): - m1 = tvm.var("m1") - n1 = tvm.var("n1") - a = tvm.placeholder((m1, n1), name='a') - c = tvm.compute((1, n1), lambda i, j : a[0, j] + a[1, j] + a[2, j], name='c') + m1 = te.var("m1") + n1 = te.var("n1") + a = te.placeholder((m1, n1), name='a') + c = te.compute((1, n1), lambda i, j : a[0, j] + a[1, j] + a[2, j], name='c') - Ab = tvm.decl_buffer(a.shape, name="Abuf", offset_factor=1) - Cb = tvm.decl_buffer(c.shape, name="Cbuf", offset_factor=1) + Ab = tvm.tir.decl_buffer(a.shape, name="Abuf", offset_factor=1) + Cb = tvm.tir.decl_buffer(c.shape, name="Cbuf", offset_factor=1) def intrin_func(ins, outs): aa = ins[0] cc = outs[0] def _body(): - ib = tvm.ir_builder.create() - ib.emit(tvm.call_extern("int32", "test", cc.access_ptr("w"), aa.access_ptr("r"))) + ib = tvm.tir.ir_builder.create() + ib.emit(tvm.tir.call_extern("int32", "test", cc.access_ptr("w"), aa.access_ptr("r"))) return ib.get() return _body() - with tvm.build_config(offset_factor=1): - return tvm.decl_tensor_intrin(c.op, intrin_func, binds={a : Ab, c : Cb}) + with tvm.target.build_config(offset_factor=1): + return te.decl_tensor_intrin(c.op, intrin_func, binds={a : Ab, c : Cb}) test_func = intrin_test() - A = tvm.placeholder((20,20), name='A') - B = tvm.compute(A.shape, lambda i,j : A[i,j], name='B') - C = tvm.compute((10, 20), lambda i : test_func(B[i:10, 0:20]), name='C') - s = tvm.create_schedule(C.op) - bounds = tvm.schedule.InferBound(s) + A = te.placeholder((20,20), name='A') + B = te.compute(A.shape, lambda i,j : A[i,j], name='B') + C = te.compute((10, 20), lambda i : test_func(B[i:10, 0:20]), name='C') + s = te.create_schedule(C.op) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) assert(bounds[B.op.axis[0]].extent.value == 10) def test_bound_simplification_failure(): # Check that the bounds are not expanded - A = tvm.compute((2,), lambda j: j, "A") + A = te.compute((2,), lambda j: j, "A") def _check(B, A=A): - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) s = s.normalize() - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) stmt = tvm.lower(s, [B, A], simple_mode=True) if not bounds[A.op.axis[0]].extent.value <= 2: print(stmt) assert bounds[A.op.axis[0]].extent.value <= 2 - tdiv = tvm.truncdiv + tdiv = tvm.tir.truncdiv # These are hard to simplify, moreover we don't simplify them - _check(tvm.compute((10,), lambda i: A[tvm.min(3*i, 4*i) + tvm.min(-3*i, -2*i)])) - _check(tvm.compute((10,), lambda i: A[tvm.min(3*i, 4*i) + tvm.max(-3*i, -4*i)])) - _check(tvm.compute((10,), lambda i: A[-2*tdiv(i,2) - tvm.min(i, 0-i)])) - _check(tvm.compute((10,), lambda i: A[i + (0 - i)])) + _check(te.compute((10,), lambda i: A[tvm.te.min(3*i, 4*i) + tvm.te.min(-3*i, -2*i)])) + _check(te.compute((10,), lambda i: A[tvm.te.min(3*i, 4*i) + tvm.te.max(-3*i, -4*i)])) + _check(te.compute((10,), lambda i: A[-2*tdiv(i,2) - tvm.te.min(i, 0-i)])) + _check(te.compute((10,), lambda i: A[i + (0 - i)])) # This would cause out of bounds, but we nevertheless include it - _check(tvm.compute((10,), lambda i: A[i])) + _check(te.compute((10,), lambda i: A[i])) if __name__ == "__main__": test_bound_nest_thread() diff --git a/tests/python/unittest/test_schedule_graph.py b/tests/python/unittest/test_schedule_graph.py index d77c1d4..d6d38e5 100644 --- a/tests/python/unittest/test_schedule_graph.py +++ b/tests/python/unittest/test_schedule_graph.py @@ -15,96 +15,97 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_scan(): - m = tvm.var("m") - n = tvm.var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - s_state = tvm.placeholder((m, n)) - s_init = tvm.compute((1, n), lambda _, i: x[0, i], name="s_init") - x_trans = tvm.compute((m, n), lambda i, j: x[i, j] + 1, name="x_trans") - s_up1 = tvm.compute((m, n), lambda t, i: s_state[t - 1, i] + 1, name="up1") - s_update = tvm.compute((m, n), lambda t, i: s_up1[t, i] + x_trans[t, i], name="update") - s_scan = tvm.scan(s_init, s_update, s_state) + m = te.var("m") + n = te.var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + s_state = te.placeholder((m, n)) + s_init = te.compute((1, n), lambda _, i: x[0, i], name="s_init") + x_trans = te.compute((m, n), lambda i, j: x[i, j] + 1, name="x_trans") + s_up1 = te.compute((m, n), lambda t, i: s_state[t - 1, i] + 1, name="up1") + s_update = te.compute((m, n), lambda t, i: s_up1[t, i] + x_trans[t, i], name="update") + s_scan = tvm.te.scan(s_init, s_update, s_state) def test_getbody(): - body = tvm.schedule.ScanGetBody(s_scan.op) + body = tvm.te.schedule.ScanGetBody(s_scan.op) assert set(body) == set([s_scan.op, s_update.op, s_up1.op]) def test_attach_path(): - s = tvm.create_schedule(s_scan.op) + s = te.create_schedule(s_scan.op) s[x_trans].compute_at(s[s_update], s_update.op.axis[0]) - apath = tvm.schedule.CreateAttachPath(s) + apath = tvm.te.schedule.CreateAttachPath(s) assert(tuple(apath[s_update.op]) == tuple([s_scan.op.scan_axis])) assert(tuple(apath[x_trans.op]) == tuple([s_update.op.axis[0], s_scan.op.scan_axis])) def test_fix_pt(): - body = tvm.schedule.ScanGetBody(s_scan.op) - fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op, body) + body = tvm.te.schedule.ScanGetBody(s_scan.op) + fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op, body) assert(fxpt[s_scan.spatial_axis_[0]].value != 0) def test_scan_fix_point(): - m = tvm.var("m") - n = tvm.var("n") - l = tvm.var("l") - x = tvm.compute((l, m, n), lambda *i: tvm.const(1, "float32"), name="x") - s_state = tvm.placeholder((l, m, n)) - s_init = tvm.compute((1, m, n), lambda _, i, j: x[0, i, j], name="s_init") + m = te.var("m") + n = te.var("n") + l = te.var("l") + x = te.compute((l, m, n), lambda *i: tvm.tir.const(1, "float32"), name="x") + s_state = te.placeholder((l, m, n)) + s_init = te.compute((1, m, n), lambda _, i, j: x[0, i, j], name="s_init") def test_scan0(): - s_update = tvm.compute((l, m, n), + s_update = te.compute((l, m, n), lambda t, i, j: x[t, j, i] + s_state[t-1, i, j], name="update") - s_scan = tvm.scan(s_init, s_update, s_state) - body = tvm.schedule.ScanGetBody(s_scan.op) - fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op, body) + s_scan = tvm.te.scan(s_init, s_update, s_state) + body = tvm.te.schedule.ScanGetBody(s_scan.op) + fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op, body) assert(fxpt[s_scan.op.spatial_axis_[0]].value == 1) assert(fxpt[s_scan.op.spatial_axis_[1]].value == 1) def test_scan1(): - s_update = tvm.compute((l, m, n), + s_update = te.compute((l, m, n), lambda t, i, j: x[t, j, i] + s_state[t-1, j, i], name="update") - s_scan = tvm.scan(s_init, s_update, s_state) - body = tvm.schedule.ScanGetBody(s_scan.op) - fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op, body) + s_scan = tvm.te.scan(s_init, s_update, s_state) + body = tvm.te.schedule.ScanGetBody(s_scan.op) + fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op, body) assert(fxpt[s_scan.op.spatial_axis_[0]].value == 0) assert(fxpt[s_scan.op.spatial_axis_[1]].value == 0) def test_scan3_not_exact_reach(): - s_h1 = tvm.compute((l, n, m), lambda t, j, i: s_state[t-1, i, j], name="h1") - s_h2 = tvm.compute((l, m, n), lambda t, i, j: s_state[t-1, i, 10] * 2, name="h1") - s_update = tvm.compute((l, m, n), lambda t, i, j: s_h1[t, j, i] + s_h2[t, i, j], name="update") - s_scan = tvm.scan(s_init, s_update, s_state) - body = tvm.schedule.ScanGetBody(s_scan.op) - fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op) + s_h1 = te.compute((l, n, m), lambda t, j, i: s_state[t-1, i, j], name="h1") + s_h2 = te.compute((l, m, n), lambda t, i, j: s_state[t-1, i, 10] * 2, name="h1") + s_update = te.compute((l, m, n), lambda t, i, j: s_h1[t, j, i] + s_h2[t, i, j], name="update") + s_scan = tvm.te.scan(s_init, s_update, s_state) + body = tvm.te.schedule.ScanGetBody(s_scan.op) + fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op) assert(fxpt[s_scan.op.spatial_axis_[0]].value == 1) assert(fxpt[s_scan.op.spatial_axis_[1]].value == 0) def test_scan4_reach_other(): - s_h1 = tvm.compute((l, n, m), lambda t, j, i: s_state[t-1, j, j], name="h1") - s_h2 = tvm.compute((l, m, n), lambda t, i, j: s_state[t-1, i, j] * 2, name="h1") - s_update = tvm.compute((l, m, n), + s_h1 = te.compute((l, n, m), lambda t, j, i: s_state[t-1, j, j], name="h1") + s_h2 = te.compute((l, m, n), lambda t, i, j: s_state[t-1, i, j] * 2, name="h1") + s_update = te.compute((l, m, n), lambda t, i, j: s_h1[t, j, i] + s_h2[t, i, j], name="update") - s_scan = tvm.scan(s_init, s_update, s_state) - fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op) + s_scan = tvm.te.scan(s_init, s_update, s_state) + fxpt = tvm.te.schedule.ScanFixPointAnalysis(s_scan.op) assert(fxpt[s_scan.op.spatial_axis_[0]].value == 0) assert(fxpt[s_scan.op.spatial_axis_[1]].value == 0) def test_scan5_multi_output(): - m = tvm.var("m") - n = tvm.var("n") - x1 = tvm.placeholder((m, n)) - s1 = tvm.placeholder((m, n)) - x2 = tvm.placeholder((m, n)) - s2 = tvm.placeholder((m, n)) - s1_init = tvm.compute((1, n), lambda _, i: x1[0, i]) - s2_init = tvm.compute((1, n), lambda _, i: x2[0, i]) - s1_update = tvm.compute((m, n), lambda t, i: s1[t-1, i] + x1[t, i]) - s2_update = tvm.compute((m, n), lambda t, i: x2[t, i] + s2[t-1,i]) - r0, r1 = tvm.scan([s1_init, s2_init], + m = te.var("m") + n = te.var("n") + x1 = te.placeholder((m, n)) + s1 = te.placeholder((m, n)) + x2 = te.placeholder((m, n)) + s2 = te.placeholder((m, n)) + s1_init = te.compute((1, n), lambda _, i: x1[0, i]) + s2_init = te.compute((1, n), lambda _, i: x2[0, i]) + s1_update = te.compute((m, n), lambda t, i: s1[t-1, i] + x1[t, i]) + s2_update = te.compute((m, n), lambda t, i: x2[t, i] + s2[t-1,i]) + r0, r1 = tvm.te.scan([s1_init, s2_init], [s1_update, s2_update], [s1, s2]) - body = tvm.schedule.ScanGetBody(r0.op) - fxpt = tvm.schedule.ScanFixPointAnalysis(r0.op) + body = tvm.te.schedule.ScanGetBody(r0.op) + fxpt = tvm.te.schedule.ScanFixPointAnalysis(r0.op) assert(fxpt[r1.op.spatial_axis_[0]].value == 1) test_scan0() @@ -114,17 +115,17 @@ def test_scan_fix_point(): test_scan5_multi_output() def test_create_read_graph(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j]) - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3) + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j]) + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3) - g = tvm.schedule.CreateReadGraph([A2.op]) + g = tvm.te.schedule.CreateReadGraph([A2.op]) assert g[A2.op][0] == A1 assert g[A1.op][0] == A - post_order = tvm.schedule.PostDFSOrder([A2.op], g) + post_order = tvm.te.schedule.PostDFSOrder([A2.op], g) assert(post_order[0] == A.op) assert(post_order[1] == A1.op) diff --git a/tests/python/unittest/test_schedule_lstm.py b/tests/python/unittest/test_schedule_lstm.py index 21cf8e8..23c7486 100644 --- a/tests/python/unittest/test_schedule_lstm.py +++ b/tests/python/unittest/test_schedule_lstm.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def test_lstm_cell_inline(): num_step = 128 @@ -22,52 +23,52 @@ def test_lstm_cell_inline(): num_hidden = 1152 batch_size = 4 # Global transition matrix - X = tvm.placeholder((num_step - 1, batch_size, num_input), name="X") - Wi2h = tvm.placeholder((4, num_hidden, num_input), name="Wi2h") - Wh2h = tvm.placeholder((4, num_hidden, num_hidden), name="Wh2h") + X = te.placeholder((num_step - 1, batch_size, num_input), name="X") + Wi2h = te.placeholder((4, num_hidden, num_input), name="Wi2h") + Wh2h = te.placeholder((4, num_hidden, num_hidden), name="Wh2h") # h: output hidden state, c: cell state. - s_state_h = tvm.placeholder((num_step, batch_size, num_hidden)) - s_state_c = tvm.placeholder((num_step, batch_size, num_hidden)) - s_init_c = tvm.compute((1, batch_size, num_hidden), + s_state_h = te.placeholder((num_step, batch_size, num_hidden)) + s_state_c = te.placeholder((num_step, batch_size, num_hidden)) + s_init_c = te.compute((1, batch_size, num_hidden), lambda *i: 0.0, name="init_c") - s_init_h = tvm.compute((1, batch_size, num_hidden), + s_init_h = te.compute((1, batch_size, num_hidden), lambda *i: 0.0, name="init_h") # LSTM transition - k = tvm.reduce_axis((0, num_input), name="ki2h") - s_i2h = tvm.compute( + k = te.reduce_axis((0, num_input), name="ki2h") + s_i2h = te.compute( (num_step, 4, batch_size, num_hidden), - lambda t, x, i, j: tvm.sum(X[t - 1, i, k] * Wi2h[x, j, k], axis=k), + lambda t, x, i, j: te.sum(X[t - 1, i, k] * Wi2h[x, j, k], axis=k), name="s_i2h") - k = tvm.reduce_axis((0, num_hidden), name="ki2h") - s_h2h = tvm.compute( + k = te.reduce_axis((0, num_hidden), name="ki2h") + s_h2h = te.compute( (num_step, 4, batch_size, num_hidden), - lambda t, x, i, j: tvm.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k), + lambda t, x, i, j: te.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k), name="s_h2h") # Gate rules - gates = tvm.compute(s_i2h.shape, lambda *i: + gates = te.compute(s_i2h.shape, lambda *i: s_i2h(*i) + s_h2h(*i), name="gates") gshape = (num_step, batch_size, num_hidden) - in_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 0, i, j]), name="in_gate") - in_transform = tvm.compute(gshape, lambda t, i, j: tvm.tanh(gates[t, 1, i, j]), name="in_transform") - forget_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 2, i, j]), name="forget_gate") - out_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 3, i, j]), name="out_gate") - next_c = tvm.compute(gshape, + in_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, 0, i, j]), name="in_gate") + in_transform = te.compute(gshape, lambda t, i, j: te.tanh(gates[t, 1, i, j]), name="in_transform") + forget_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, 2, i, j]), name="forget_gate") + out_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, 3, i, j]), name="out_gate") + next_c = te.compute(gshape, lambda t, i, j: forget_gate[t, i, j] * s_state_c[t - 1, i, j] + in_gate[t, i, j] * in_transform[t, i, j], name="next_c") - next_h = tvm.compute(gshape, - lambda t, i, j: out_gate[t, i, j] * tvm.tanh(next_c[t, i, j]), name="next_h") - update_c = tvm.compute(gshape, lambda *i: next_c(*i), name="update_c") - update_h = tvm.compute(gshape, lambda *i: next_h(*i), name="update_h") + next_h = te.compute(gshape, + lambda t, i, j: out_gate[t, i, j] * te.tanh(next_c[t, i, j]), name="next_h") + update_c = te.compute(gshape, lambda *i: next_c(*i), name="update_c") + update_h = te.compute(gshape, lambda *i: next_h(*i), name="update_h") # schedule - scan_h, scan_c = tvm.scan( + scan_h, scan_c = tvm.te.scan( [s_init_h, s_init_c], [update_h, update_c], [s_state_h, s_state_c], inputs=[X], name="lstm_scan") # schedule - s = tvm.create_schedule(scan_h.op) + s = te.create_schedule(scan_h.op) # Inline gate computations s[gates].compute_inline() s[in_gate].compute_inline() diff --git a/tests/python/unittest/test_schedule_schedule_ops.py b/tests/python/unittest/test_schedule_schedule_ops.py index 2fc84bb..8d10cee 100644 --- a/tests/python/unittest/test_schedule_schedule_ops.py +++ b/tests/python/unittest/test_schedule_schedule_ops.py @@ -15,66 +15,67 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_schedule0(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - s = tvm.create_schedule(A1.op) + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + s = te.create_schedule(A1.op) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule1(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') - s = tvm.create_schedule(A1.op) + s = te.create_schedule(A1.op) xo, xi = s[A1].split(A1.op.axis[0], 8) s[A1].pragma(xo, "auto_unroll_max_step", 10) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule2(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') - A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1') - A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') + A1 = te.compute((m, l), lambda i, j: A[i, j], name='A1') + A2 = te.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2') - s = tvm.create_schedule(A2.op) + s = te.create_schedule(A2.op) xo, xi = s[A2].split(A2.op.axis[0], 8) s[A1].compute_at(s[A2], xo) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_scan(): - m = tvm.var("m") - n = tvm.var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - s_state = tvm.placeholder((m, n)) - s_init = tvm.compute((1, n), lambda _, i: x[0, i]) - s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + x[t, i]) - res = tvm.scan(s_init, s_update, s_state) + m = te.var("m") + n = te.var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + s_state = te.placeholder((m, n)) + s_init = te.compute((1, n), lambda _, i: x[0, i]) + s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + x[t, i]) + res = tvm.te.scan(s_init, s_update, s_state) assert tuple(res.shape) == (m, n) - s = tvm.create_schedule(res.op) + s = te.create_schedule(res.op) s = s.normalize() ir = tvm.lower(s, [s_state], simple_mode=True) assert not hasattr(ir.body.body.body.body[1].body.body[1].body, "condition") - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert(bounds[res.op.scan_axis].min.value == 1) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_inline_multi_reduce(): @@ -83,107 +84,107 @@ def test_inline_multi_reduce(): val = tvm.tir.Select((x[1] >= y[1]), x[1], y[1]) return idx, val def argmax_init(idx_typ, val_typ): - return tvm.const(-1, idx_typ), tvm.min_value(val_typ) - - argmax = tvm.comm_reducer(argmax_comp, argmax_init, name='argmax') - m = tvm.var('m') - n = tvm.var('n') - val = tvm.placeholder((m, n), name='val', dtype='float32') - val1 = tvm.compute((m, n), lambda i, j: val[i, j]+1, name='val1') - val2 = tvm.compute((m, n), lambda i, j: tvm.exp(val1[i, j]), name='val2') - k = tvm.reduce_axis((0, n), 'k') - T_idx, T_val = tvm.compute((m, ), lambda i: argmax((k.var, val2[i, k]), axis=k), name='T') - s = tvm.create_schedule(T_idx.op) + return tvm.tir.const(-1, idx_typ), tvm.te.min_value(val_typ) + + argmax = te.comm_reducer(argmax_comp, argmax_init, name='argmax') + m = te.var('m') + n = te.var('n') + val = te.placeholder((m, n), name='val', dtype='float32') + val1 = te.compute((m, n), lambda i, j: val[i, j]+1, name='val1') + val2 = te.compute((m, n), lambda i, j: te.exp(val1[i, j]), name='val2') + k = te.reduce_axis((0, n), 'k') + T_idx, T_val = te.compute((m, ), lambda i: argmax((k.var, val2[i, k]), axis=k), name='T') + s = te.create_schedule(T_idx.op) s[val1].compute_inline() s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_auto_inline(): - m = tvm.var('m') - n = tvm.var('n') - A = tvm.placeholder((m, n), name='A') - B = tvm.placeholder((m, n), name='B') - C = tvm.placeholder((m, n), name='C') - T1 = tvm.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='T1') - T2 = tvm.compute((m, n), lambda i, j: T1(i, j) + C(i, j), name='T2') - - s = tvm.create_schedule(T2.op) - tvm.schedule.AutoInlineElemWise(s) + m = te.var('m') + n = te.var('n') + A = te.placeholder((m, n), name='A') + B = te.placeholder((m, n), name='B') + C = te.placeholder((m, n), name='C') + T1 = te.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='T1') + T2 = te.compute((m, n), lambda i, j: T1(i, j) + C(i, j), name='T2') + + s = te.create_schedule(T2.op) + tvm.te.schedule.AutoInlineElemWise(s) s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_const_bound(): n = 128 - A = tvm.placeholder((n,), name='A') - A1 = tvm.compute((n,), lambda i: A[i] + 1, name='A1') - s = tvm.create_schedule(A1.op) + A = te.placeholder((n,), name='A') + A1 = te.compute((n,), lambda i: A[i] + 1, name='A1') + s = te.create_schedule(A1.op) xo, xi = s[A1].split(A1.op.axis[0], 8) - bounds = tvm.schedule.InferBound(s) + bounds = tvm.te.schedule.InferBound(s) assert isinstance(bounds, tvm.container.Map) - stmt = tvm.schedule.ScheduleOps(s, bounds) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_inline_mixed(): - n = tvm.var('n') - A = tvm.placeholder((n, ), name='A') - A1 = tvm.compute(A.shape, lambda *i: A(*i) + 1, name='A1') - A2 = tvm.compute(A.shape, lambda *i: A1(*i) + 2, name='A2') - C = tvm.compute((n,), lambda i: A2[i] + A1[i], name='C') + n = te.var('n') + A = te.placeholder((n, ), name='A') + A1 = te.compute(A.shape, lambda *i: A(*i) + 1, name='A1') + A2 = te.compute(A.shape, lambda *i: A1(*i) + 2, name='A2') + C = te.compute((n,), lambda i: A2[i] + A1[i], name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) xo, xi = s[C].split(C.op.axis[0], factor=8) s[A1].compute_at(s[C], xo) s[A2].compute_inline() s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def check(x): if isinstance(x, tvm.tir.Call): assert x.func != A2 - tvm.ir_pass.PostOrderVisit(s[C].op.body[0], check) + tvm.tir.ir_pass.PostOrderVisit(s[C].op.body[0], check) def test_scan_inline1(): - m = tvm.var("m") - n = tvm.var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - s_state1 = tvm.placeholder((m, n)) - s_state2 = tvm.placeholder((m, n)) - s_init1 = tvm.compute((1, n), lambda _, i: x[0, i]) - s_init2 = tvm.compute((1, n), lambda _, i: x[0, i]) - s_x1 = tvm.compute((m, n), lambda t, i: s_state1[t-1, i] + x[t, i], name="x1") - s_x2 = tvm.compute((m, n), lambda t, i: s_state2[t-1, i] + 1 , name="x2") - s_update1 = tvm.compute((m, n), lambda t, i: s_x1[t, i], "u1") - s_update2 = tvm.compute((m, n), lambda t, i: s_x2[t, i], "u2") - res1, res2 = tvm.scan([s_init1, s_init2], + m = te.var("m") + n = te.var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + s_state1 = te.placeholder((m, n)) + s_state2 = te.placeholder((m, n)) + s_init1 = te.compute((1, n), lambda _, i: x[0, i]) + s_init2 = te.compute((1, n), lambda _, i: x[0, i]) + s_x1 = te.compute((m, n), lambda t, i: s_state1[t-1, i] + x[t, i], name="x1") + s_x2 = te.compute((m, n), lambda t, i: s_state2[t-1, i] + 1 , name="x2") + s_update1 = te.compute((m, n), lambda t, i: s_x1[t, i], "u1") + s_update2 = te.compute((m, n), lambda t, i: s_x2[t, i], "u2") + res1, res2 = tvm.te.scan([s_init1, s_init2], [s_update1, s_update2], [s_state1, s_state2]) - s = tvm.create_schedule(res1.op) + s = te.create_schedule(res1.op) s[s_x1].compute_inline() stmt = tvm.lower(s, [x, res1, res2]) def test_scan_inline2(): - m = tvm.var("m") - n = tvm.var("n") - x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x") - s_state1 = tvm.placeholder((m, n)) - s_state2 = tvm.placeholder((m, n)) - s_init1 = tvm.compute((1, n), lambda _, i: x[0, i]) - s_init2 = tvm.compute((1, n), lambda _, i: x[0, i]) - s_xx = tvm.compute((m, n), lambda t, i: s_state1[t-1, i] + x[t, i], name="xx") - s_x1 = tvm.compute((m, n), lambda t, i: s_xx[t, i] + 1, name="x1") - s_x2 = tvm.compute((m, n), lambda t, i: s_xx[t, i] + s_state2[t-1, 2], name="x2") - s_update1 = tvm.compute((m, n), lambda t, i: s_x1[t, i], "u1") - s_update2 = tvm.compute((m, n), lambda t, i: s_x2[t, i], "u2") - res1, res2 = tvm.scan([s_init1, s_init2], + m = te.var("m") + n = te.var("n") + x = te.compute((m, n), lambda i, j: tvm.tir.const(1, "float32"), name="x") + s_state1 = te.placeholder((m, n)) + s_state2 = te.placeholder((m, n)) + s_init1 = te.compute((1, n), lambda _, i: x[0, i]) + s_init2 = te.compute((1, n), lambda _, i: x[0, i]) + s_xx = te.compute((m, n), lambda t, i: s_state1[t-1, i] + x[t, i], name="xx") + s_x1 = te.compute((m, n), lambda t, i: s_xx[t, i] + 1, name="x1") + s_x2 = te.compute((m, n), lambda t, i: s_xx[t, i] + s_state2[t-1, 2], name="x2") + s_update1 = te.compute((m, n), lambda t, i: s_x1[t, i], "u1") + s_update2 = te.compute((m, n), lambda t, i: s_x2[t, i], "u2") + res1, res2 = tvm.te.scan([s_init1, s_init2], [s_update1, s_update2], [s_state1, s_state2]) - s = tvm.create_schedule(res1.op) + s = te.create_schedule(res1.op) s[s_xx].compute_inline() s[s_x1].compute_inline() s[s_x2].compute_inline() @@ -191,128 +192,128 @@ def test_scan_inline2(): def test_schedule_cache(): - m = tvm.var('m') - n = tvm.var('n') - A = tvm.placeholder((m, n), name='A') - B = tvm.placeholder((m, n), name='B') - C = tvm.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C') + m = te.var('m') + n = te.var('n') + A = te.placeholder((m, n), name='A') + B = te.placeholder((m, n), name='B') + C = te.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) AA = s.cache_read(A, "shared", readers=[C]) CC = s.cache_write(C, "shared") s[AA].compute_at(s[CC], CC.op.axis[0]) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_middle_cache(): - m = tvm.var('m') - n = tvm.var('n') - A = tvm.placeholder((m, n), name='A') - B = tvm.placeholder((m, n), name='B') + m = te.var('m') + n = te.var('n') + A = te.placeholder((m, n), name='A') + B = te.placeholder((m, n), name='B') - C = tvm.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C') - D = tvm.compute((m, n), lambda i, j: C(i , j) , name='D') + C = te.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C') + D = te.compute((m, n), lambda i, j: C(i , j) , name='D') - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) AA = s.cache_read(A, "local", readers=[C]) BB = s.cache_read(B, "local", readers=[C]) CC = s.cache_read(C, "local", readers=[D]) DD = s.cache_write(D, "local") #s[AA].compute_at(s[CC], CC.op.axis[0]) - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_cache_relayout1(): - m = tvm.var('m') - n = tvm.var('n') - A = tvm.placeholder((m, n), name='A') - B = tvm.placeholder((m, n), name='B') - C = tvm.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C') + m = te.var('m') + n = te.var('n') + A = te.placeholder((m, n), name='A') + B = te.placeholder((m, n), name='B') + C = te.compute((m, n), lambda i, j: A(i, j) * B(i, j), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) s[C].reorder(C.op.axis[1], C.op.axis[0]) CC = s.cache_write(C, "global") - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_cache_relayout2(): - m = tvm.var('m') - n = tvm.var('n') - A = tvm.placeholder((m*4, n), name='A') - B = tvm.placeholder((m*4, n), name='B') - C = tvm.compute(A.shape, lambda i, j: A(i, j) * B(i, j), name='C') - s = tvm.create_schedule(C.op) + m = te.var('m') + n = te.var('n') + A = te.placeholder((m*4, n), name='A') + B = te.placeholder((m*4, n), name='B') + C = te.compute(A.shape, lambda i, j: A(i, j) * B(i, j), name='C') + s = te.create_schedule(C.op) x, y = C.op.axis xo, xi = s[C].split(x, factor=4) s[C].reorder(xo, y, xi) CC = s.cache_write(C, "global") s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_cache_relayout3(): - m = tvm.var('m') - n = tvm.var('n') - A = tvm.placeholder((m*4, n), name='A') - B = tvm.placeholder((m*4, n), name='B') - k = tvm.reduce_axis((0, n), "k") - C = tvm.compute((A.shape[0],), - lambda i: tvm.sum(A(i, k) * B(i, k), axis=k), name='C') - s = tvm.create_schedule(C.op) + m = te.var('m') + n = te.var('n') + A = te.placeholder((m*4, n), name='A') + B = te.placeholder((m*4, n), name='B') + k = te.reduce_axis((0, n), "k") + C = te.compute((A.shape[0],), + lambda i: te.sum(A(i, k) * B(i, k), axis=k), name='C') + s = te.create_schedule(C.op) x = C.op.axis[0] xo, xi = s[C].split(x, factor=4) CC = s.cache_write(C, "global") s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_cache_relayout4(): def _compute(*indice): return A(*indice) + 1, B(*indice) / 2 - m = tvm.var('m') - n = tvm.var('n') - A = tvm.placeholder((m*4, n), name='A') - B = tvm.placeholder((m*4, n), name='B') - C1, C2 = tvm.compute(A.shape, _compute, name='C') - s = tvm.create_schedule([C1.op, C2.op]) + m = te.var('m') + n = te.var('n') + A = te.placeholder((m*4, n), name='A') + B = te.placeholder((m*4, n), name='B') + C1, C2 = te.compute(A.shape, _compute, name='C') + s = te.create_schedule([C1.op, C2.op]) C1_cache, C2_cache = s.cache_write([C1, C2], "local") s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def intrin_gemv(m, n): - w = tvm.placeholder((m, n), name='w') - x = tvm.placeholder((n,), name='x') - k = tvm.reduce_axis((0, n), name='k') - z = tvm.compute((m,), lambda i: - tvm.sum(w[i, k] * x[k], axis=k), name='z') - Wb = tvm.decl_buffer(w.shape, w.dtype, + w = te.placeholder((m, n), name='w') + x = te.placeholder((n,), name='x') + k = te.reduce_axis((0, n), name='k') + z = te.compute((m,), lambda i: + te.sum(w[i, k] * x[k], axis=k), name='z') + Wb = tvm.tir.decl_buffer(w.shape, w.dtype, name="W", offset_factor=16, - strides=[tvm.var('ldw'), 1]) + strides=[te.var('ldw'), 1]) def intrin_func(ins, outs): ww, xx = ins zz = outs[0] ww_ptr = ww.access_ptr("r") xx_ptr = xx.access_ptr("r") zz_ptr = zz.access_ptr("w") - body = tvm.call_packed( + body = tvm.tir.call_packed( "gemm", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0]) - reset = tvm.call_packed( + reset = tvm.tir.call_packed( "fill_zero", zz_ptr, n) - update = tvm.call_packed( + update = tvm.tir.call_packed( "gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0]) return body, reset, update - with tvm.build_config(data_alignment=16, + with tvm.target.build_config(data_alignment=16, offset_factor=16): - return tvm.decl_tensor_intrin(z.op, intrin_func, + return te.decl_tensor_intrin(z.op, intrin_func, binds={w: Wb}) @@ -320,36 +321,36 @@ def test_schedule_tensor_compute1(): # basic: split, reorder, tile M, N, L = 2048, 1024, 512 factor, rfactor = 16, 16 - A = tvm.placeholder((N//factor, L//rfactor, factor, rfactor), name='A') - B = tvm.placeholder((M, L//rfactor, rfactor), name='B') - k = tvm.reduce_axis((0, L//rfactor), name='k') + A = te.placeholder((N//factor, L//rfactor, factor, rfactor), name='A') + B = te.placeholder((M, L//rfactor, rfactor), name='B') + k = te.reduce_axis((0, L//rfactor), name='k') gemv = intrin_gemv(factor, rfactor) - C = tvm.compute((N, M//factor, factor), + C = te.compute((N, M//factor, factor), lambda i, j: gemv(A[i, k, 0:factor, 0:factor], B[j, k, 0:rfactor], reduce_axis=k), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) ai, aj, ax = s[C].op.axis aio, aii = s[C].split(ai, 16) s[C].reorder(aio, aj, aii) aioo, ajo, aioi, aji = s[C].tile(aio, aj, 16, 4) s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def intrin_vadd(n, cache_read=False, cache_write=False): scope_ubuf = 'local' dtype = 'float32' - x = tvm.placeholder((n,), dtype=dtype, name='vx') - y = tvm.placeholder((n,), dtype=dtype, name='vy') - z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z') - s = tvm.create_schedule(z.op) + x = te.placeholder((n,), dtype=dtype, name='vx') + y = te.placeholder((n,), dtype=dtype, name='vy') + z = te.compute(x.shape, lambda i: x[i] + y[i], name='z') + s = te.create_schedule(z.op) def create_buffer(t): - return tvm.decl_buffer(t.shape, t.dtype, + return tvm.tir.decl_buffer(t.shape, t.dtype, name='W'+t.name, scope=scope_ubuf, offset_factor=16) @@ -362,12 +363,12 @@ def intrin_vadd(n, cache_read=False, cache_write=False): binds[z] = create_buffer(z) def intrin_func(ins, outs): - ib = tvm.ir_builder.create() - ib.emit(tvm.call_extern(outs[0].dtype, 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr'))) + ib = tvm.tir.ir_builder.create() + ib.emit(tvm.tir.call_extern(outs[0].dtype, 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr'))) return ib.get() - with tvm.build_config(offset_factor=16): - return tvm.decl_tensor_intrin(z.op, intrin_func, binds=binds) + with tvm.target.build_config(offset_factor=16): + return te.decl_tensor_intrin(z.op, intrin_func, binds=binds) def test_schedule_tensor_compute2(): @@ -377,20 +378,20 @@ def test_schedule_tensor_compute2(): dtype = 'float32' scope_ubuf = 'local' - A = tvm.placeholder((M//factor, factor), name="A", dtype=dtype) - B = tvm.placeholder((M//factor, factor), name="B", dtype=dtype) + A = te.placeholder((M//factor, factor), name="A", dtype=dtype) + B = te.placeholder((M//factor, factor), name="B", dtype=dtype) vadd = intrin_vadd(factor, True, True) - C = tvm.compute((M//factor, factor), + C = te.compute((M//factor, factor), lambda i: vadd(A[i, 0:factor], B[i, 0:factor]), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) AL = s.cache_read(A, scope_ubuf, C) BL = s.cache_read(B, scope_ubuf, C) CL = s.cache_write(C, scope_ubuf) s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_schedule_tensor_compute3(): @@ -398,48 +399,48 @@ def test_schedule_tensor_compute3(): M = 1024 factor = 16 dtype = 'float32' - A = tvm.placeholder((M//factor, factor), name="A", dtype=dtype) - B = tvm.placeholder((M//factor, factor), name="B", dtype=dtype) - Bi = tvm.compute((M//factor, factor), lambda i, j: B[i, j] + 5, name="Bi") + A = te.placeholder((M//factor, factor), name="A", dtype=dtype) + B = te.placeholder((M//factor, factor), name="B", dtype=dtype) + Bi = te.compute((M//factor, factor), lambda i, j: B[i, j] + 5, name="Bi") vadd = intrin_vadd(factor) - C = tvm.compute((M//factor, factor), + C = te.compute((M//factor, factor), lambda i: vadd(A[i, 0:factor], Bi[i, 0:factor]), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) s[Bi].compute_at(s[C], C.op.axis[0]) s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) def test_loop_dep_reduce(): - X = tvm.placeholder(shape=(10,), name="x") + X = te.placeholder(shape=(10,), name="x") def f(n): - rv = tvm.reduce_axis((0, n)) - return tvm.sum(X[rv], axis=rv) - Y = tvm.compute(X.shape, f, name="y") - s = tvm.create_schedule([Y.op]) + rv = te.reduce_axis((0, n)) + return te.sum(X[rv], axis=rv) + Y = te.compute(X.shape, f, name="y") + s = te.create_schedule([Y.op]) f = tvm.build(s, [X, Y]) def test_loop_dep_reduce_cache_write(): - X = tvm.placeholder(shape=(10,), name="x") + X = te.placeholder(shape=(10,), name="x") def f(n): - rv = tvm.reduce_axis((0, n)) - init = lambda dtype: tvm.tir.Select(n > 1, tvm.const(0, dtype), n.astype(dtype)) - sum = tvm.comm_reducer(lambda x, y: tvm.max(x + y, n.astype('float32')), init, name='sum') + rv = te.reduce_axis((0, n)) + init = lambda dtype: tvm.tir.Select(n > 1, tvm.tir.const(0, dtype), n.astype(dtype)) + sum = te.comm_reducer(lambda x, y: tvm.te.max(x + y, n.astype('float32')), init, name='sum') return sum(X[rv], axis=rv) - Y = tvm.compute(X.shape, f, name="y") - s = tvm.create_schedule([Y.op]) + Y = te.compute(X.shape, f, name="y") + s = te.create_schedule([Y.op]) s.cache_write(Y, 'local') f = tvm.build(s, [X, Y]) def test_reduction_and_dummy_fuse_split(): n = 10 - X = tvm.placeholder(shape=(n,), dtype='int32', name="X") - k = tvm.reduce_axis((0, n)) - Y = tvm.compute((), lambda: tvm.sum(X[k], k), name="Y") - s = tvm.create_schedule([Y.op]) + X = te.placeholder(shape=(n,), dtype='int32', name="X") + k = te.reduce_axis((0, n)) + Y = te.compute((), lambda: te.sum(X[k], k), name="Y") + s = te.create_schedule([Y.op]) ax = s[Y.op].fuse(*Y.op.axis) axo, axi = s[Y.op].split(ax, nparts=20) f = tvm.build(s, [Y, X]) @@ -449,10 +450,10 @@ def test_reduction_and_dummy_fuse_split(): assert args[0].asnumpy() == n n = 10 - X = tvm.placeholder(shape=(n,), dtype='int32', name="X") - k = tvm.reduce_axis((0, n)) - Y = tvm.compute((n,), lambda i: tvm.sum(X[k], k), name="Y") - s = tvm.create_schedule([Y.op]) + X = te.placeholder(shape=(n,), dtype='int32', name="X") + k = te.reduce_axis((0, n)) + Y = te.compute((n,), lambda i: te.sum(X[k], k), name="Y") + s = te.create_schedule([Y.op]) ax = s[Y.op].fuse(*(list(Y.op.axis) + list(Y.op.reduce_axis))) f = tvm.build(s, [Y, X]) @@ -463,14 +464,14 @@ def test_reduction_and_dummy_fuse_split(): def test_schedule_compute_inline(): shape = [10, 1024] - A = tvm.placeholder(shape, name="A") - B = tvm.placeholder(shape, name="B") - C = tvm.compute(shape, lambda *index:A(*index)+ B(*index), name = "C") + A = te.placeholder(shape, name="A") + B = te.placeholder(shape, name="B") + C = te.compute(shape, lambda *index:A(*index)+ B(*index), name = "C") def _compute(*index) : return C(*index) , C(*index) * B(*index) - F,E = tvm.compute(shape, _compute, name = "F") + F,E = te.compute(shape, _compute, name = "F") - s = tvm.create_schedule([F.op, E.op]) + s = te.create_schedule([F.op, E.op]) AL = s.cache_read(A, "local", [C]) BL = s.cache_read(B, "local", [C,E]) CL = s.cache_write(C, "local") @@ -478,8 +479,8 @@ def test_schedule_compute_inline(): s[C].compute_inline() s = s.normalize() - bounds = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, bounds) + bounds = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, bounds) if __name__ == "__main__": test_loop_dep_reduce() diff --git a/tests/python/unittest/test_schedule_tensor_core.py b/tests/python/unittest/test_schedule_tensor_core.py index cd9e062..ae2301c 100644 --- a/tests/python/unittest/test_schedule_tensor_core.py +++ b/tests/python/unittest/test_schedule_tensor_core.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from topi.testing import conv2d_nhwc_python from tvm.contrib import nvcc @@ -28,49 +29,49 @@ def intrin_wmma_load_matrix(shape, scope): row, col = n, l elif scope == "wmma.matrix_b": row, col = l, m - A = tvm.placeholder((row, col), name='A', dtype='float16') - BA = tvm.decl_buffer(A.shape, A.dtype, scope='shared', data_alignment=32, offset_factor=row * col) - C = tvm.compute((row, col), lambda i, j: A[i, j], name='C') - BC = tvm.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=row * col) + A = te.placeholder((row, col), name='A', dtype='float16') + BA = tvm.tir.decl_buffer(A.shape, A.dtype, scope='shared', data_alignment=32, offset_factor=row * col) + C = te.compute((row, col), lambda i, j: A[i, j], name='C') + BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=row * col) def intrin_func(ins, outs): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() BA = ins[0] BC = outs[0] - ib.emit(tvm.call_intrin('handle', 'tvm_load_matrix_sync', + ib.emit(tvm.tir.call_intrin('handle', 'tvm_load_matrix_sync', BC.data, n, m, l, BC.elem_offset // (row * col), BA.access_ptr('r'), col, 'row_major')) return ib.get() - return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC}) + return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC}) def intrin_wmma_gemm(shape): n, m, l = shape - A = tvm.placeholder((n, l), name='A', dtype='float16') - B = tvm.placeholder((l, m), name='B', dtype='float16') - k = tvm.reduce_axis((0, l), name="k") - C = tvm.compute((n, m), + A = te.placeholder((n, l), name='A', dtype='float16') + B = te.placeholder((l, m), name='B', dtype='float16') + k = te.reduce_axis((0, l), name="k") + C = te.compute((n, m), lambda ii, jj: - tvm.sum(A[ii, k].astype('float') * B[k, jj].astype('float'), axis=k), + te.sum(A[ii, k].astype('float') * B[k, jj].astype('float'), axis=k), name='C') - BA = tvm.decl_buffer(A.shape, A.dtype, name='BA', scope='wmma.matrix_a', data_alignment=32, offset_factor=n * l) - BB = tvm.decl_buffer(B.shape, B.dtype, name='BB', scope='wmma.matrix_b', data_alignment=32, offset_factor=l * m) - BC = tvm.decl_buffer(C.shape, C.dtype, name='BC', scope='wmma.accumulator', data_alignment=32, offset_factor=n * m) + BA = tvm.tir.decl_buffer(A.shape, A.dtype, name='BA', scope='wmma.matrix_a', data_alignment=32, offset_factor=n * l) + BB = tvm.tir.decl_buffer(B.shape, B.dtype, name='BB', scope='wmma.matrix_b', data_alignment=32, offset_factor=l * m) + BC = tvm.tir.decl_buffer(C.shape, C.dtype, name='BC', scope='wmma.accumulator', data_alignment=32, offset_factor=n * m) def intrin_func(ins, outs): BA, BB = ins BC, = outs def init(): - ib = tvm.ir_builder.create() - ib.emit(tvm.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, m, l, BC.elem_offset // (n * m), 0.0)) + ib = tvm.tir.ir_builder.create() + ib.emit(tvm.tir.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, m, l, BC.elem_offset // (n * m), 0.0)) return ib.get() def update(): - ib = tvm.ir_builder.create() - ib.emit(tvm.call_intrin('handle', 'tvm_mma_sync', + ib = tvm.tir.ir_builder.create() + ib.emit(tvm.tir.call_intrin('handle', 'tvm_mma_sync', BC.data, BC.elem_offset // (n * m), BA.data, BA.elem_offset // (n * l), BB.data, BB.elem_offset // (l * m), @@ -79,27 +80,27 @@ def intrin_wmma_gemm(shape): return update(), init(), update() - return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, B: BB, C: BC}) + return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, B: BB, C: BC}) def intrin_wmma_store_matrix(shape): n, m, l = shape - A = tvm.placeholder((n, m), name='A', dtype='float32') - BA = tvm.decl_buffer(A.shape, A.dtype, scope='wmma.accumulator', data_alignment=32, offset_factor=n * m) - C = tvm.compute((n, m), lambda i, j: A[i, j], name='C') - BC = tvm.decl_buffer(C.shape, C.dtype, scope='global', data_alignment=32, offset_factor=n * m) + A = te.placeholder((n, m), name='A', dtype='float32') + BA = tvm.tir.decl_buffer(A.shape, A.dtype, scope='wmma.accumulator', data_alignment=32, offset_factor=n * m) + C = te.compute((n, m), lambda i, j: A[i, j], name='C') + BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope='global', data_alignment=32, offset_factor=n * m) def intrin_func(ins, outs): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() BA = ins[0] BC = outs[0] - ib.emit(tvm.call_intrin('handle', 'tvm_store_matrix_sync', + ib.emit(tvm.tir.call_intrin('handle', 'tvm_store_matrix_sync', BA.data, n, m, l, BA.elem_offset // (n * m), BC.access_ptr('w'), m, 'row_major')) return ib.get() - return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC}) + return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC}) def test_tensor_core_batch_matmal(): @@ -117,15 +118,15 @@ def test_tensor_core_batch_matmal(): assert (m % 8 == 0) assert (l % 16 == 0) nn, mm, ll = n // 32, m // 8, l // 16 - A = tvm.placeholder((batch_size, nn, ll, 32, 16), name='A', dtype='float16') - B = tvm.placeholder((batch_size, ll, mm, 16, 8), name='B', dtype='float16') - k1 = tvm.reduce_axis((0, ll), name='k1') - k2 = tvm.reduce_axis((0, 16), name='k2') - C = tvm.compute((batch_size, nn, mm, 32, 8), + A = te.placeholder((batch_size, nn, ll, 32, 16), name='A', dtype='float16') + B = te.placeholder((batch_size, ll, mm, 16, 8), name='B', dtype='float16') + k1 = te.reduce_axis((0, ll), name='k1') + k2 = te.reduce_axis((0, 16), name='k2') + C = te.compute((batch_size, nn, mm, 32, 8), lambda b, i, j, ii, jj: - tvm.sum(A[b, i, k1, ii, k2].astype('float') * B[b, k1, j, k2, jj].astype('float'), axis=[k1, k2]), + te.sum(A[b, i, k1, ii, k2].astype('float') * B[b, k1, j, k2, jj].astype('float'), axis=[k1, k2]), name='Fragment_C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) warp_size = 32 kernel_size = 16 @@ -135,12 +136,12 @@ def test_tensor_core_batch_matmal(): warp_col_tiles = 2 chunk = 4 - block_x = tvm.thread_axis('blockIdx.x') - block_y = tvm.thread_axis('blockIdx.y') - block_z = tvm.thread_axis('blockIdx.z') - thread_x = tvm.thread_axis('threadIdx.x') - thread_y = tvm.thread_axis('threadIdx.y') - thread_z = tvm.thread_axis('threadIdx.z') + block_x = te.thread_axis('blockIdx.x') + block_y = te.thread_axis('blockIdx.y') + block_z = te.thread_axis('blockIdx.z') + thread_x = te.thread_axis('threadIdx.x') + thread_y = te.thread_axis('threadIdx.y') + thread_z = te.thread_axis('threadIdx.z') AS = s.cache_read(A, 'shared', [C]) BS = s.cache_read(B, 'shared', [C]) @@ -271,30 +272,30 @@ def test_tensor_core_batch_conv(): assert (in_channels % block_size == 0) assert (out_channels % block_size == 0) - kh = tvm.reduce_axis((0, kernel_h), name='kh') - kw = tvm.reduce_axis((0, kernel_w), name='kw') - ic = tvm.reduce_axis((0, in_channels // block_size), name='ic') - ii = tvm.reduce_axis((0, block_size), name='ii') + kh = te.reduce_axis((0, kernel_h), name='kh') + kw = te.reduce_axis((0, kernel_w), name='kw') + ic = te.reduce_axis((0, in_channels // block_size), name='ic') + ii = te.reduce_axis((0, block_size), name='ii') # Algorithm - A = tvm.placeholder(data_shape, name='A', dtype="float16") - W = tvm.placeholder(kernel_shape, name='W', dtype="float16") - Apad = tvm.compute( + A = te.placeholder(data_shape, name='A', dtype="float16") + W = te.placeholder(kernel_shape, name='W', dtype="float16") + Apad = te.compute( (batch_size // block_size, height + 2 * pad_h, width + 2 * pad_w, in_channels // block_size, block_size, block_size), - lambda n, h, w, i, nn, ii: tvm.if_then_else( - tvm.all(h >= pad_h, h - pad_h < height, + lambda n, h, w, i, nn, ii: tvm.tir.if_then_else( + tvm.tir.all(h >= pad_h, h - pad_h < height, w >= pad_w, w - pad_w < width), - A[n, h - pad_h, w - pad_w, i, nn, ii], tvm.const(0., "float16")), + A[n, h - pad_h, w - pad_w, i, nn, ii], tvm.tir.const(0., "float16")), name='Apad') - Conv = tvm.compute(output_shape, - lambda n, h, w, o, nn, oo: tvm.sum( + Conv = te.compute(output_shape, + lambda n, h, w, o, nn, oo: te.sum( Apad[n, h * stride_h + kh, w * stride_w + kw, ic, nn, ii].astype("float32") * W[kh, kw, ic, o, ii, oo].astype("float32"), axis=[ic, kh, kw, ii]), name="Conv") - s = tvm.create_schedule(Conv.op) + s = te.create_schedule(Conv.op) s[Apad].compute_inline() AS = s.cache_read(Apad, 'shared', [Conv]) @@ -303,12 +304,12 @@ def test_tensor_core_batch_conv(): WF = s.cache_read(WS, 'wmma.matrix_b', [Conv]) ConvF = s.cache_write(Conv, 'wmma.accumulator') - block_x = tvm.thread_axis('blockIdx.x') - block_y = tvm.thread_axis('blockIdx.y') - block_z = tvm.thread_axis('blockIdx.z') - thread_x = tvm.thread_axis('threadIdx.x') - thread_y = tvm.thread_axis('threadIdx.y') - thread_z = tvm.thread_axis('threadIdx.z') + block_x = te.thread_axis('blockIdx.x') + block_y = te.thread_axis('blockIdx.y') + block_z = te.thread_axis('blockIdx.z') + thread_x = te.thread_axis('threadIdx.x') + thread_y = te.thread_axis('threadIdx.y') + thread_z = te.thread_axis('threadIdx.z') nc, hc, wc, oc, nnc, ooc = Conv.op.axis block_k = s[Conv].fuse(hc, wc) diff --git a/tests/python/unittest/test_schedule_tensorize.py b/tests/python/unittest/test_schedule_tensorize.py index ac60c2d..28a3ae8 100644 --- a/tests/python/unittest/test_schedule_tensorize.py +++ b/tests/python/unittest/test_schedule_tensorize.py @@ -15,98 +15,99 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te def intrin_vadd(n): - x = tvm.placeholder((n,), name='vx') - y = tvm.placeholder((n,), name='vy') - z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z') + x = te.placeholder((n,), name='vx') + y = te.placeholder((n,), name='vy') + z = te.compute(x.shape, lambda i: x[i] + y[i], name='z') def intrin_func(ins, outs): xx, yy = ins zz = outs[0] - return tvm.call_packed("vadd", xx, yy, zz) - with tvm.build_config(offset_factor=16): - return tvm.decl_tensor_intrin(z.op, intrin_func) + return tvm.tir.call_packed("vadd", xx, yy, zz) + with tvm.target.build_config(offset_factor=16): + return te.decl_tensor_intrin(z.op, intrin_func) def intrin_gemv(m, n): - w = tvm.placeholder((m, n), name='w') - x = tvm.placeholder((n,), name='x') - k = tvm.reduce_axis((0, n), name='k') - z = tvm.compute((m,), lambda i: - tvm.sum(w[i, k] * x[k], axis=k), name='z') - Wb = tvm.decl_buffer(w.shape, w.dtype, + w = te.placeholder((m, n), name='w') + x = te.placeholder((n,), name='x') + k = te.reduce_axis((0, n), name='k') + z = te.compute((m,), lambda i: + te.sum(w[i, k] * x[k], axis=k), name='z') + Wb = tvm.tir.decl_buffer(w.shape, w.dtype, name="W", offset_factor=16, - strides=[tvm.var('ldw'), 1]) + strides=[te.var('ldw'), 1]) def intrin_func(ins, outs): ww, xx = ins zz = outs[0] ww_ptr = ww.access_ptr("r") xx_ptr = xx.access_ptr("r") zz_ptr = zz.access_ptr("w") - body = tvm.call_packed( + body = tvm.tir.call_packed( "gemv", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0]) - reset = tvm.call_packed( + reset = tvm.tir.call_packed( "fill_zero", zz_ptr, n) - update = tvm.call_packed( + update = tvm.tir.call_packed( "gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0]) return body, reset, update - with tvm.build_config(data_alignment=16, + with tvm.target.build_config(data_alignment=16, offset_factor=16): - return tvm.decl_tensor_intrin(z.op, intrin_func, + return te.decl_tensor_intrin(z.op, intrin_func, binds={w: Wb}) def intrin_gemv_no_reset(m, n): - w = tvm.placeholder((m, n), name='w') - x = tvm.placeholder((n,), name='x') - k = tvm.reduce_axis((0, n), name='k') - z = tvm.compute((m,), lambda i: - tvm.sum(w[i, k] * x[k], axis=k), name='z') - Wb = tvm.decl_buffer(w.shape, w.dtype, + w = te.placeholder((m, n), name='w') + x = te.placeholder((n,), name='x') + k = te.reduce_axis((0, n), name='k') + z = te.compute((m,), lambda i: + te.sum(w[i, k] * x[k], axis=k), name='z') + Wb = tvm.tir.decl_buffer(w.shape, w.dtype, name="W", offset_factor=16, - strides=[tvm.var('ldw'), 1]) + strides=[te.var('ldw'), 1]) def intrin_func(ins, outs): ww, xx = ins zz = outs[0] ww_ptr = ww.access_ptr("r") xx_ptr = xx.access_ptr("r") zz_ptr = zz.access_ptr("w") - body = tvm.call_packed( + body = tvm.tir.call_packed( "gemv", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0]) - update = tvm.call_packed( + update = tvm.tir.call_packed( "gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0]) return body, None, update - with tvm.build_config(data_alignment=16, + with tvm.target.build_config(data_alignment=16, offset_factor=16): - return tvm.decl_tensor_intrin(z.op, intrin_func, + return te.decl_tensor_intrin(z.op, intrin_func, binds={w: Wb}) def test_tensorize_vadd(): m = 128 - x = tvm.placeholder((m,), name='x') - y = tvm.placeholder((m,), name='y') - z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z') + x = te.placeholder((m,), name='x') + y = te.placeholder((m,), name='y') + z = te.compute(x.shape, lambda i: x[i] + y[i], name='z') def check(factor): - s = tvm.create_schedule(z.op) + s = te.create_schedule(z.op) xo, xi = s[z].split(z.op.axis[0], factor=factor) vadd = intrin_vadd(factor) s[z].tensorize(xi, vadd) s = s.normalize() - dom_map = tvm.schedule.InferBound(s) + dom_map = tvm.te.schedule.InferBound(s) finfer = tvm.get_global_func("test.op.InferTensorizeRegion") out_dom, in_dom = finfer(s[z], dom_map) - assert tvm.ir_pass.Equal(out_dom[z.op.axis[0]].extent, factor) - assert tvm.ir_pass.Equal(out_dom[z.op.axis[0]].min, xo * factor) - assert tvm.ir_pass.Equal(in_dom.items()[0][1][0].extent, factor) + assert tvm.tir.ir_pass.Equal(out_dom[z.op.axis[0]].extent, factor) + assert tvm.tir.ir_pass.Equal(out_dom[z.op.axis[0]].min, xo * factor) + assert tvm.tir.ir_pass.Equal(in_dom.items()[0][1][0].extent, factor) fmatch = tvm.get_global_func("test.op.MatchTensorizeBody") body = fmatch(s[z], out_dom, in_dom, vadd) - assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]), - tvm.ir_pass.CanonicalSimplify(vadd.op.body[0])) - stmt = tvm.schedule.ScheduleOps(s, dom_map) + assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]), + tvm.tir.ir_pass.CanonicalSimplify(vadd.op.body[0])) + stmt = tvm.te.schedule.ScheduleOps(s, dom_map) tvm.lower(s, [x, y, z]) check(16) @@ -116,35 +117,35 @@ def test_tensorize_matmul(): n = 1024 m = n l = n - A = tvm.placeholder((n, l), name='A') - B = tvm.placeholder((m, l), name='B') - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute((n, m), lambda i, j: - tvm.sum(B[j, k] * A[i, k], axis=k), name='C') + A = te.placeholder((n, l), name='A') + B = te.placeholder((m, l), name='B') + k = te.reduce_axis((0, l), name='k') + C = te.compute((n, m), lambda i, j: + te.sum(B[j, k] * A[i, k], axis=k), name='C') def check(factor): - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) x, y = C.op.axis yo, yi = s[C].split(y, factor=factor) gemv = intrin_gemv(factor, l) s[C].tensorize(yi, gemv) s = s.normalize() - dom_map = tvm.schedule.InferBound(s) + dom_map = tvm.te.schedule.InferBound(s) finfer = tvm.get_global_func("test.op.InferTensorizeRegion") out_dom, in_dom = finfer(s[C], dom_map) - assert tvm.ir_pass.Equal(out_dom[x].extent, 1) - assert tvm.ir_pass.Equal(out_dom[y].extent, factor) - assert tvm.ir_pass.Equal(out_dom[y].min, yo * factor) + assert tvm.tir.ir_pass.Equal(out_dom[x].extent, 1) + assert tvm.tir.ir_pass.Equal(out_dom[y].extent, factor) + assert tvm.tir.ir_pass.Equal(out_dom[y].min, yo * factor) fmatch = tvm.get_global_func("test.op.MatchTensorizeBody") body = fmatch(s[C], out_dom, in_dom, gemv) - assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]), - tvm.ir_pass.CanonicalSimplify(gemv.op.body[0])) - stmt = tvm.schedule.ScheduleOps(s, dom_map) + assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]), + tvm.tir.ir_pass.CanonicalSimplify(gemv.op.body[0])) + stmt = tvm.te.schedule.ScheduleOps(s, dom_map) tvm.lower(s, [A, B, C]) def check_rfactor(factor, rfactor): - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) x, y = C.op.axis rk = C.op.reduce_axis[0] yo, yi = s[C].split(y, factor=factor) @@ -153,21 +154,21 @@ def test_tensorize_matmul(): gemv = intrin_gemv(factor, rfactor) s[C].tensorize(yi, gemv) s = s.normalize() - dom_map = tvm.schedule.InferBound(s) + dom_map = tvm.te.schedule.InferBound(s) finfer = tvm.get_global_func("test.op.InferTensorizeRegion") out_dom, in_dom = finfer(s[C], dom_map) - assert tvm.ir_pass.Equal(out_dom[x].extent, 1) - assert tvm.ir_pass.Equal(out_dom[y].extent, factor) - assert tvm.ir_pass.Equal(out_dom[y].min, yo * factor) + assert tvm.tir.ir_pass.Equal(out_dom[x].extent, 1) + assert tvm.tir.ir_pass.Equal(out_dom[y].extent, factor) + assert tvm.tir.ir_pass.Equal(out_dom[y].min, yo * factor) fmatch = tvm.get_global_func("test.op.MatchTensorizeBody") body = fmatch(s[C], out_dom, in_dom, gemv) - assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]), - tvm.ir_pass.CanonicalSimplify(gemv.op.body[0])) - stmt = tvm.schedule.ScheduleOps(s, dom_map) + assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]), + tvm.tir.ir_pass.CanonicalSimplify(gemv.op.body[0])) + stmt = tvm.te.schedule.ScheduleOps(s, dom_map) tvm.lower(s, [A, B, C]) def check_rfactor_no_reset(factor, rfactor): - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) x, y = C.op.axis rk = C.op.reduce_axis[0] yo, yi = s[C].split(y, factor=factor) @@ -176,21 +177,21 @@ def test_tensorize_matmul(): gemv = intrin_gemv_no_reset(factor, rfactor) s[C].tensorize(yi, gemv) s = s.normalize() - dom_map = tvm.schedule.InferBound(s) + dom_map = tvm.te.schedule.InferBound(s) finfer = tvm.get_global_func("test.op.InferTensorizeRegion") out_dom, in_dom = finfer(s[C], dom_map) - assert tvm.ir_pass.Equal(out_dom[x].extent, 1) - assert tvm.ir_pass.Equal(out_dom[y].extent, factor) - assert tvm.ir_pass.Equal(out_dom[y].min, yo * factor) + assert tvm.tir.ir_pass.Equal(out_dom[x].extent, 1) + assert tvm.tir.ir_pass.Equal(out_dom[y].extent, factor) + assert tvm.tir.ir_pass.Equal(out_dom[y].min, yo * factor) fmatch = tvm.get_global_func("test.op.MatchTensorizeBody") body = fmatch(s[C], out_dom, in_dom, gemv) - assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]), - tvm.ir_pass.CanonicalSimplify(gemv.op.body[0])) - stmt = tvm.schedule.ScheduleOps(s, dom_map) + assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]), + tvm.tir.ir_pass.CanonicalSimplify(gemv.op.body[0])) + stmt = tvm.te.schedule.ScheduleOps(s, dom_map) tvm.lower(s, [A, B, C]) def check_rfactor_no_reset_multi_reduction(factor, rfactor): - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) x, y = C.op.axis rk = C.op.reduce_axis[0] yo, yi = s[C].split(y, factor=factor) @@ -200,17 +201,17 @@ def test_tensorize_matmul(): gemv = intrin_gemv_no_reset(factor, rfactor) s[C].tensorize(yi, gemv) s = s.normalize() - dom_map = tvm.schedule.InferBound(s) + dom_map = tvm.te.schedule.InferBound(s) finfer = tvm.get_global_func("test.op.InferTensorizeRegion") out_dom, in_dom = finfer(s[C], dom_map) - assert tvm.ir_pass.Equal(out_dom[x].extent, 1) - assert tvm.ir_pass.Equal(out_dom[y].extent, factor) - assert tvm.ir_pass.Equal(out_dom[y].min, yo * factor) + assert tvm.tir.ir_pass.Equal(out_dom[x].extent, 1) + assert tvm.tir.ir_pass.Equal(out_dom[y].extent, factor) + assert tvm.tir.ir_pass.Equal(out_dom[y].min, yo * factor) fmatch = tvm.get_global_func("test.op.MatchTensorizeBody") body = fmatch(s[C], out_dom, in_dom, gemv) - assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]), - tvm.ir_pass.CanonicalSimplify(gemv.op.body[0])) - stmt = tvm.schedule.ScheduleOps(s, dom_map) + assert tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.CanonicalSimplify(body[0]), + tvm.tir.ir_pass.CanonicalSimplify(gemv.op.body[0])) + stmt = tvm.te.schedule.ScheduleOps(s, dom_map) tvm.lower(s, [A, B, C]) check(16) @@ -221,28 +222,28 @@ def test_tensorize_matmul(): # This tests whether algorithm and intrinsics expressions are simplified # as much as possible first and then checked for equality. See Issue #696 def test_tensorize_op(): - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod def op_intrin(): bh = 9 bw = 9 - x = tvm.placeholder((5, 5), name='A') - y = tvm.compute((bh, bw), + x = te.placeholder((5, 5), name='A') + y = te.compute((bh, bw), lambda i, j: x[idxd(j,3) + idxm(i,3), idxm(j,3)+ idxd(i,3)]) def intrin_func(ins, outs): xx, = ins zz = outs[0] - return tvm.call_packed("op", xx, zz) + return tvm.tir.call_packed("op", xx, zz) - with tvm.build_config(offset_factor=2): - return tvm.decl_tensor_intrin(y.op, intrin_func) + with tvm.target.build_config(offset_factor=2): + return te.decl_tensor_intrin(y.op, intrin_func) - A = tvm.placeholder((5, 5), name='A') - B = tvm.compute((9,9), lambda i, j: A[idxd(j,3) + idxm(i,3), idxm(j,3) + idxd(i,3)]) + A = te.placeholder((5, 5), name='A') + B = te.compute((9,9), lambda i, j: A[idxd(j,3) + idxm(i,3), idxm(j,3) + idxd(i,3)]) bt = op_intrin() - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) x,y = B.op.axis s[B].tensorize(x, bt) @@ -255,16 +256,16 @@ def test_tensorize_tensor_compute_op(): # an intrinsic called "multivadd" whose definition (pattern) # is a loop of another intrinsic called "vadd" def intrin_multivadd(n): - n_a = tvm.var("n_a") - Ab = tvm.decl_buffer((n, ), tvm.float32, strides=[n_a]) + n_a = te.var("n_a") + Ab = tvm.tir.decl_buffer((n, ), "float32", strides=[n_a]) - n_b = tvm.var("n_b") - Bb = tvm.decl_buffer((n, ), tvm.float32, strides=[n_b]) + n_b = te.var("n_b") + Bb = tvm.tir.decl_buffer((n, ), "float32", strides=[n_b]) - n_c = tvm.var("n_c") - Cb = tvm.decl_buffer((n, ), tvm.float32, strides=[n_c]) + n_c = te.var("n_c") + Cb = tvm.tir.decl_buffer((n, ), "float32", strides=[n_c]) - z = tvm.compute((n,), lambda i: tvm.call_extern("float32", 'vadd', + z = te.compute((n,), lambda i: tvm.tir.call_extern("float32", 'vadd', Ab.access_ptr("w", offset=n_a*i), Bb.access_ptr("r", offset=n_b*i), Cb.access_ptr("r", offset=n_c*i))) @@ -272,32 +273,32 @@ def test_tensorize_tensor_compute_op(): # replace the pattern with the multivadd call. I need to figure out # how to pass it the right parameters. def intrin_func(ins, outs): - return tvm.call_packed("multivadd") + return tvm.tir.call_packed("multivadd") - with tvm.build_config(): - return tvm.decl_tensor_intrin(z.op, intrin_func, name="multivadd") + with tvm.target.build_config(): + return te.decl_tensor_intrin(z.op, intrin_func, name="multivadd") def intrin_vadd(n): dtype = 'float32' - x = tvm.placeholder((n,), dtype=dtype, name='vx') - y = tvm.placeholder((n,), dtype=dtype, name='vy') - z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z') - s = tvm.create_schedule(z.op) + x = te.placeholder((n,), dtype=dtype, name='vx') + y = te.placeholder((n,), dtype=dtype, name='vy') + z = te.compute(x.shape, lambda i: x[i] + y[i], name='z') + s = te.create_schedule(z.op) def create_buffer(t): - return tvm.decl_buffer(t.shape, t.dtype, + return tvm.tir.decl_buffer(t.shape, t.dtype, name='W'+t.name, offset_factor=16) def intrin_func(ins, outs): - ib = tvm.ir_builder.create() - ib.emit(tvm.call_extern("float32", 'vadd', + ib = tvm.tir.ir_builder.create() + ib.emit(tvm.tir.call_extern("float32", 'vadd', ins[0].access_ptr("r"), ins[1].access_ptr('r'), outs[0].access_ptr('wr'))) return ib.get() - with tvm.build_config(offset_factor=16): - return tvm.decl_tensor_intrin(z.op, intrin_func, binds={x: create_buffer(x), + with tvm.target.build_config(offset_factor=16): + return te.decl_tensor_intrin(z.op, intrin_func, binds={x: create_buffer(x), y: create_buffer(y), z: create_buffer(z)}) @@ -306,19 +307,19 @@ def test_tensorize_tensor_compute_op(): factor = 16 dtype = 'float32' - A = tvm.placeholder((M//factor, factor), name="A", dtype=dtype) - B = tvm.placeholder((M//factor, factor), name="B", dtype=dtype) + A = te.placeholder((M//factor, factor), name="A", dtype=dtype) + B = te.placeholder((M//factor, factor), name="B", dtype=dtype) vadd = intrin_vadd(factor) - C = tvm.compute((M//factor, factor), + C = te.compute((M//factor, factor), lambda i: vadd(A[i, 0:factor], B[i, 0:factor]), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) multivadd = intrin_multivadd(64) s[C].tensorize(C.op.axis[0], multivadd) s = s.normalize() - dom_map = tvm.schedule.InferBound(s) - stmt = tvm.schedule.ScheduleOps(s, dom_map) + dom_map = tvm.te.schedule.InferBound(s) + stmt = tvm.te.schedule.ScheduleOps(s, dom_map) # The loop that we tried to tensorize still exists in the code # That means tensorize didn't work as expected assert isinstance(stmt.body.body.body, tvm.tir.For) diff --git a/tests/python/unittest/test_testing.py b/tests/python/unittest/test_testing.py index b17d889..ecf520d 100644 --- a/tests/python/unittest/test_testing.py +++ b/tests/python/unittest/test_testing.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm.testing import check_numerical_grads def test_check_numerical_grads(): diff --git a/tests/python/unittest/test_tvm_intrin.py b/tests/python/unittest/test_tvm_intrin.py index 23e921d..5bb1c65 100644 --- a/tests/python/unittest/test_tvm_intrin.py +++ b/tests/python/unittest/test_tvm_intrin.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import topi from tvm.contrib import util, clang import numpy as np @@ -23,10 +24,10 @@ import math def test_nearbyint(): - m = tvm.var("m",) - A = tvm.placeholder((m,), name='A') - A_rounded = tvm.compute((m,), lambda *i: tvm.nearbyint(A(*i)), name='A') - s = tvm.create_schedule(A_rounded.op) + m = te.var("m",) + A = te.placeholder((m,), name='A') + A_rounded = te.compute((m,), lambda *i: tvm.tir.nearbyint(A(*i)), name='A') + s = te.create_schedule(A_rounded.op) f = tvm.build(s, [A, A_rounded], "llvm") ctx = tvm.cpu(0) n = 10 diff --git a/tests/web/prepare_test_libs.py b/tests/web/prepare_test_libs.py index ada40e6..a0e2c13 100644 --- a/tests/web/prepare_test_libs.py +++ b/tests/web/prepare_test_libs.py @@ -16,6 +16,7 @@ # under the License. # Prepare test library for js. import tvm +from tvm import te from tvm.contrib import emscripten import os @@ -23,10 +24,10 @@ def prepare_test_libs(base_path): target = "llvm -target=asmjs-unknown-emscripten -system-lib" if not tvm.runtime.enabled(target): raise RuntimeError("Target %s is not enbaled" % target) - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) fadd1 = tvm.build(s, [A, B], target, name="add_one") obj_path = os.path.join(base_path, "test_add_one.bc") fadd1.save(obj_path) diff --git a/tests/web/websock_rpc_test.py b/tests/web/websock_rpc_test.py index 92b0ad3..8be8ce0 100644 --- a/tests/web/websock_rpc_test.py +++ b/tests/web/websock_rpc_test.py @@ -21,6 +21,7 @@ Connect javascript end to the websocket port and connect to the RPC. """ import tvm +from tvm import te import os from tvm import rpc from tvm.contrib import util, emscripten @@ -33,10 +34,10 @@ def test_rpc_array(): if not tvm.runtime.enabled("rpc"): return # graph - n = tvm.convert(1024) - A = tvm.placeholder((n,), name='A') - B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') - s = tvm.create_schedule(B.op) + n = tvm.runtime.convert(1024) + A = te.placeholder((n,), name='A') + B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') + s = te.create_schedule(B.op) remote = rpc.connect(proxy_host, proxy_port, key="js") target = "llvm -target=asmjs-unknown-emscripten -system-lib" def check_remote(): diff --git a/tests/webgl/test_local_gemm.py b/tests/webgl/test_local_gemm.py index ff3c1a7..6bd22bf 100644 --- a/tests/webgl/test_local_gemm.py +++ b/tests/webgl/test_local_gemm.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_local_gemm(): @@ -24,17 +25,17 @@ def test_local_gemm(): return nn = 1024 - n = tvm.var('n') - n = tvm.convert(nn) + n = te.var('n') + n = tvm.runtime.convert(nn) m = n l = n - A = tvm.placeholder((n, l), name='A', dtype='int32') - B = tvm.placeholder((m, l), name='B', dtype='int32') - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute((n, m), lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k), + A = te.placeholder((n, l), name='A', dtype='int32') + B = te.placeholder((m, l), name='B', dtype='int32') + k = te.reduce_axis((0, l), name='k') + C = te.compute((n, m), lambda ii, jj: te.sum(A[ii, k] * B[jj, k], axis=k), name='CC') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) s[C].opengl() print(tvm.lower(s, [A, B, C], simple_mode=True)) diff --git a/tests/webgl/test_local_multi_stage.py b/tests/webgl/test_local_multi_stage.py index 5786399..54a554b 100644 --- a/tests/webgl/test_local_multi_stage.py +++ b/tests/webgl/test_local_multi_stage.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np def test_local_multi_stage(): @@ -23,12 +24,12 @@ def test_local_multi_stage(): if not tvm.runtime.enabled("llvm"): return - n = tvm.var("n") - A = tvm.placeholder((n,), name='A', dtype="int32") - B = tvm.compute((n,), lambda i: A[i] + 1, name="B") - C = tvm.compute((n,), lambda i: B[i] * 2, name="C") + n = te.var("n") + A = te.placeholder((n,), name='A', dtype="int32") + B = te.compute((n,), lambda i: A[i] + 1, name="B") + C = te.compute((n,), lambda i: B[i] * 2, name="C") - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) s[B].opengl() s[C].opengl() diff --git a/tests/webgl/test_local_save_load.py b/tests/webgl/test_local_save_load.py index 0a63a77..cca6802 100644 --- a/tests/webgl/test_local_save_load.py +++ b/tests/webgl/test_local_save_load.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import rpc from tvm.contrib import util, emscripten @@ -25,11 +26,11 @@ def test_local_save_load(): if not tvm.runtime.enabled("llvm"): return - n = tvm.var("n") - A = tvm.placeholder((n,), name='A', dtype='int32') - B = tvm.placeholder((n,), name='B', dtype='int32') - C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") - s = tvm.create_schedule(C.op) + n = te.var("n") + A = te.placeholder((n,), name='A', dtype='int32') + B = te.placeholder((n,), name='B', dtype='int32') + C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") + s = te.create_schedule(C.op) s[C].opengl() f = tvm.build(s, [A, B, C], "opengl", target_host="llvm", name="myadd") diff --git a/tests/webgl/test_local_topi_conv2d_nchw.py b/tests/webgl/test_local_topi_conv2d_nchw.py index c03d9dc..0d9b777 100644 --- a/tests/webgl/test_local_topi_conv2d_nchw.py +++ b/tests/webgl/test_local_topi_conv2d_nchw.py @@ -20,6 +20,7 @@ Should be removed once we fix OpenGL testing on Jenkins.""" import os import numpy as np import tvm +from tvm import te import topi from tvm.contrib.pickle_memoize import memoize from topi.util import get_const_tuple @@ -27,8 +28,8 @@ from topi.util import get_const_tuple def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding): in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') + W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W') B = topi.nn.conv2d_nchw(A, W, stride, padding) C = topi.nn.relu(B) @@ -59,7 +60,7 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p w = tvm.nd.array(w_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx) - with tvm.build_config(auto_unroll_max_step=1400, + with tvm.target.build_config(auto_unroll_max_step=1400, unroll_explicit=(device != "cuda")): func1 = tvm.build(s1, [A, W, B], device, name="conv2d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding)) func2 = tvm.build(s2, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding)) diff --git a/tests/webgl/test_local_topi_dense.py b/tests/webgl/test_local_topi_dense.py index d57bfd2..60dfe1f 100644 --- a/tests/webgl/test_local_topi_dense.py +++ b/tests/webgl/test_local_topi_dense.py @@ -20,15 +20,16 @@ Should be removed once we fix OpenGL testing on Jenkins. """ import numpy as np import tvm +from tvm import te import topi from topi.util import get_const_tuple from tvm.contrib.pickle_memoize import memoize def verify_dense(batch, in_dim, out_dim, use_bias=True): - A = tvm.placeholder((batch, in_dim), name='A') - B = tvm.placeholder((out_dim, in_dim), name='B') - C = tvm.placeholder((out_dim,), name='C') + A = te.placeholder((batch, in_dim), name='A') + B = te.placeholder((out_dim, in_dim), name='B') + C = te.placeholder((out_dim,), name='C') D = topi.nn.dense(A, B, C if use_bias else None) D = topi.nn.relu(D) dtype = A.dtype diff --git a/tests/webgl/test_local_topi_pooling.py b/tests/webgl/test_local_topi_pooling.py index c1b6660..3adae7b 100644 --- a/tests/webgl/test_local_topi_pooling.py +++ b/tests/webgl/test_local_topi_pooling.py @@ -20,6 +20,7 @@ Should be removed once we fix OpenGL testing on Jenkins. """ import numpy as np import tvm +from tvm import te import topi import math from topi.util import get_const_tuple @@ -29,7 +30,7 @@ def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode): kw = kh sw = sh ph, pw = padding - A = tvm.placeholder((n, ic, ih, iw), name='A') + A = te.placeholder((n, ic, ih, iw), name='A') B = topi.nn.pool(A, kernel=[kh, kw], stride=[sh, sw], padding=padding, pool_type=pool_type, ceil_mode=ceil_mode) B = topi.nn.relu(B) @@ -91,7 +92,7 @@ def test_pool(): def verify_global_pool(n, c, h, w, pool_type): - A = tvm.placeholder((n, c, h, w), name='A') + A = te.placeholder((n, c, h, w), name='A') B = topi.nn.global_pool(A, pool_type=pool_type) B = topi.nn.relu(B) diff --git a/tests/webgl/test_local_topi_softmax.py b/tests/webgl/test_local_topi_softmax.py index 5d9ed93..c0ddbf2 100644 --- a/tests/webgl/test_local_topi_softmax.py +++ b/tests/webgl/test_local_topi_softmax.py @@ -22,15 +22,16 @@ Should be removed once we fix OpenGL testing on Jenkins. import os import numpy as np import tvm +from tvm import te import topi import logging from topi.util import get_const_tuple def verify_softmax(m, n): - A = tvm.placeholder((m, n), name='A') + A = te.placeholder((m, n), name='A') B = topi.nn.softmax(A) # confirm lower works - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) tvm.lower(s, [A, B], simple_mode=True) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) @@ -59,10 +60,10 @@ def test_softmax(): def verify_log_softmax(m, n): - A = tvm.placeholder((m, n), name='A') + A = te.placeholder((m, n), name='A') B = topi.nn.log_softmax(A) # confirm lower works - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) tvm.lower(s, [A, B], simple_mode=True) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) b_np = topi.testing.log_softmax_python(a_np) diff --git a/tests/webgl/test_remote_save_load.py b/tests/webgl/test_remote_save_load.py index 1e2ca0f..34bbb3f 100644 --- a/tests/webgl/test_remote_save_load.py +++ b/tests/webgl/test_remote_save_load.py @@ -30,6 +30,7 @@ $ python tests/webgl/test_remote_save_load.py import numpy as np import tvm +from tvm import te from tvm import rpc from tvm.contrib import util, emscripten @@ -45,11 +46,11 @@ def try_remote_save_load(): return # Build the module. - n = tvm.var("n") - A = tvm.placeholder((n,), name='A') - B = tvm.placeholder((n,), name='B') - C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") - s = tvm.create_schedule(C.op) + n = te.var("n") + A = te.placeholder((n,), name='A') + B = te.placeholder((n,), name='B') + C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") + s = te.create_schedule(C.op) s[C].opengl() target_host = "llvm -target=asmjs-unknown-emscripten -system-lib" f = tvm.build(s, [A, B, C], "opengl", target_host=target_host, name="myadd") diff --git a/tests/webgl/test_static_webgl_library.py b/tests/webgl/test_static_webgl_library.py index 365f821..929da4c 100644 --- a/tests/webgl/test_static_webgl_library.py +++ b/tests/webgl/test_static_webgl_library.py @@ -20,6 +20,7 @@ from __future__ import absolute_import, print_function import os, shutil, SimpleHTTPServer, SocketServer import tvm +from tvm import te from tvm.contrib import emscripten, util import numpy as np @@ -30,11 +31,11 @@ def try_static_webgl_library(): os.chdir(os.path.join(curr_path, "../../lib")) # Create OpenGL module. - n = tvm.var("n") - A = tvm.placeholder((n,), name='A', dtype="float") - B = tvm.compute((n,), lambda *i: A[i], name="B") + n = te.var("n") + A = te.placeholder((n,), name='A', dtype="float") + B = te.compute((n,), lambda *i: A[i], name="B") - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) s[B].opengl() target_host = "llvm -target=asmjs-unknown-emscripten -system-lib" diff --git a/topi/python/topi/argwhere.py b/topi/python/topi/argwhere.py index c2a9ade..7d8429b 100644 --- a/topi/python/topi/argwhere.py +++ b/topi/python/topi/argwhere.py @@ -24,12 +24,12 @@ def hybrid_argwhere_1d(output_shape, condition): Parameters ---------- - condition : tvm.Tensor + condition : tvm.te.Tensor 1-D tensor with boolean values. Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor Indices of non-zero elements. """ a = output_tensor(output_shape, "int32") @@ -47,12 +47,12 @@ def hybrid_argwhere_2d(output_shape, condition): Parameters ---------- - condition : tvm.Tensor + condition : tvm.te.Tensor 2-D tensor with boolean values. Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor Indices of non-zero elements. """ a = output_tensor(output_shape, "int32") @@ -73,12 +73,12 @@ def hybrid_argwhere_3d(output_shape, condition): Parameters ---------- - condition : tvm.Tensor + condition : tvm.te.Tensor 3-D tensor with boolean values. Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor Indices of non-zero elements. """ a = output_tensor(output_shape, "int32") @@ -102,12 +102,12 @@ def hybrid_argwhere_4d(output_shape, condition): Parameters ---------- - condition : tvm.Tensor + condition : tvm.te.Tensor 4-D tensor with boolean values. Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor Indices of non-zero elements. """ a = output_tensor(output_shape, "int32") @@ -134,12 +134,12 @@ def hybrid_argwhere_5d(output_shape, condition): Parameters ---------- - condition : tvm.Tensor + condition : tvm.te.Tensor 5-D tensor with boolean values. Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor Indices of non-zero elements. """ a = output_tensor(output_shape, "int32") @@ -168,12 +168,12 @@ def argwhere(output_shape, condition): Parameters ---------- - condition : tvm.Tensor + condition : tvm.te.Tensor Tensor with boolean values. Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor Indices of non-zero elements. """ if len(condition.shape) == 1: diff --git a/topi/python/topi/arm_cpu/bitserial_conv2d.py b/topi/python/topi/arm_cpu/bitserial_conv2d.py index d28ec09..bdda496 100644 --- a/topi/python/topi/arm_cpu/bitserial_conv2d.py +++ b/topi/python/topi/arm_cpu/bitserial_conv2d.py @@ -18,6 +18,7 @@ """Bitserial conv2d schedule on arm cpu""" from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from tvm import relay from .. import tag @@ -34,8 +35,8 @@ def _kernel_vec_spatial_pack_nhwc(kernel, kernel_bits, VC, use_bitpack=True): kernel_q = kernel KH, KW, KB, CI, CO = kernel_q.shape kvshape = (CO//VC, KH, KW, KB, VC, CI) - return tvm.compute(kvshape, lambda co, dh, dw, b, vc, ci: \ - kernel_q[dh][dw][b][ci][co*VC+vc], name='kernel_vec') + return te.compute(kvshape, lambda co, dh, dw, b, vc, ci: \ + kernel_q[dh][dw][b][ci][co*VC+vc], name='kernel_vec') @autotvm.register_topi_compute("bitserial_conv2d_nhwc.arm_cpu") def bitserial_conv2d_nhwc(cfg, data, kernel, stride, padding, activation_bits, weight_bits, @@ -69,8 +70,8 @@ def bitserial_conv2d_nhwc(cfg, data, kernel, stride, padding, activation_bits, w OW = (PAD_W - KW) // WSTR + 1 oshape = (1, OH, OW, CO) - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod # Pad input channels of weights and data when it is not a multiple of 8 if CI_packed % 8 != 0: @@ -108,7 +109,7 @@ def bitserial_conv2d_nhwc(cfg, data, kernel, stride, padding, activation_bits, w data_q = bitpack(data, activation_bits, pack_axis=3, bit_axis=3, pack_type='uint8') kernel_vec = _kernel_vec_spatial_pack_nhwc(kernel, weight_bits, VC, len(kernel.shape) == 4) - idxm = tvm.indexmod + idxm = tvm.tir.indexmod if idxm(kernel_vec.shape[-1], 8) != 0 and CI_PAD != 0: kernel_vec = pad(kernel_vec, [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, CI_PAD]) @@ -125,78 +126,79 @@ def bitserial_conv2d_nhwc(cfg, data, kernel, stride, padding, activation_bits, w else: data_pad = data_q - data_vec = tvm.compute(dvshape, lambda n, h, w, vh, vw, b, ci: \ - data_pad[n][h*VH*HSTR+vh][w*VW*WSTR+vw][b][ci], name='data_vec') - ci = tvm.reduce_axis((0, CI), name='ci') - dh = tvm.reduce_axis((0, KH), name='dh') - dw = tvm.reduce_axis((0, KW), name='dw') - ib = tvm.reduce_axis((0, IB), name='ib') - kb = tvm.reduce_axis((0, KB), name='kb') + data_vec = te.compute(dvshape, lambda n, h, w, vh, vw, b, ci: \ + data_pad[n][h*VH*HSTR+vh][w*VW*WSTR+vw][b][ci], name='data_vec') + ci = te.reduce_axis((0, CI), name='ci') + dh = te.reduce_axis((0, KH), name='dh') + dw = te.reduce_axis((0, KW), name='dw') + ib = te.reduce_axis((0, IB), name='ib') + kb = te.reduce_axis((0, KB), name='kb') def _bipolar_conv(n, h, w, co, vh, vw, vc): - return tvm.sum((tvm.popcount( + return te.sum((tvm.tir.popcount( kernel_vec[co, dh, dw, kb, vc, ci].astype('uint16') & data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci].astype('uint16')) - << (kb + ib).astype('uint16')), axis=[dh, dw, kb, ib, ci]) + << (kb + ib).astype('uint16')), axis=[dh, dw, kb, ib, ci]) def _unipolar_conv(n, h, w, co, vh, vw, vc): - return tvm.sum( - ((tvm.popcount(kernel_vec[co, dh, dw, kb, vc, ci].astype('int16') & - data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci].astype('int16')) - - tvm.popcount(~kernel_vec[co, dh, dw, kb, vc, ci].astype('int16') & - data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci]).astype('int16')) + return te.sum( + ((tvm.tir.popcount(kernel_vec[co, dh, dw, kb, vc, ci].astype('int16') & + data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci].astype('int16')) - + tvm.tir.popcount(~kernel_vec[co, dh, dw, kb, vc, ci].astype('int16') & + data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci]).astype('int16')) << (kb + ib).astype('int16')), axis=[dh, dw, kb, ib, ci]) if unipolar: - conv_vec = tvm.compute(ovshape, _unipolar_conv, name='conv_vec', tag='unipolar') + conv_vec = te.compute(ovshape, _unipolar_conv, name='conv_vec', tag='unipolar') else: - conv_vec = tvm.compute(ovshape, _bipolar_conv, name='conv_vec', tag='bipolar') + conv_vec = te.compute(ovshape, _bipolar_conv, name='conv_vec', tag='bipolar') - conv = tvm.compute(oshape, - lambda n, h, w, co: - conv_vec[n, - idxd(h, VH), idxd(w, VW), idxd(co, VC), - idxm(h, VH), idxm(w, VW), idxm(co, VC)].astype(out_dtype), - name='conv', tag='spatial_bitserial_conv_nhwc') + conv = te.compute(oshape, + lambda n, h, w, co: + conv_vec[n, + idxd(h, VH), idxd(w, VW), idxd(co, VC), + idxm(h, VH), idxm(w, VW), idxm(co, VC)].astype(out_dtype), + name='conv', tag='spatial_bitserial_conv_nhwc') return conv def _intrin_popcount(m, k_i, w_b, x_b, unipolar): pack_dtype = 'uint8' - w = tvm.placeholder((w_b, m, k_i), dtype=pack_dtype, name='w') - x = tvm.placeholder((x_b, k_i,), dtype=pack_dtype, name='x') - k = tvm.reduce_axis((0, k_i), name='k') - bw = tvm.reduce_axis((0, w_b), name='bw') - bx = tvm.reduce_axis((0, x_b), name='bx') + w = te.placeholder((w_b, m, k_i), dtype=pack_dtype, name='w') + x = te.placeholder((x_b, k_i,), dtype=pack_dtype, name='x') + k = te.reduce_axis((0, k_i), name='k') + bw = te.reduce_axis((0, w_b), name='bw') + bx = te.reduce_axis((0, x_b), name='bx') if unipolar: dtype = 'int16' - z = tvm.compute((m,), lambda i: - tvm.sum((tvm.popcount(w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype)) - - tvm.popcount(~w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype))) - << (bw+bx).astype(dtype), axis=[bw, bx, k]), name='z') + z = te.compute( + (m,), lambda i: + te.sum((tvm.tir.popcount(w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype)) - + tvm.tir.popcount(~w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype))) + << (bw+bx).astype(dtype), axis=[bw, bx, k]), name='z') else: dtype = 'uint16' - z = tvm.compute((m,), lambda i: - tvm.sum(tvm.popcount(w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype)) - << (bw+bx).astype(dtype), axis=[bw, bx, k]), name='z') - Wb = tvm.decl_buffer(w.shape, w.dtype, - name="W", - offset_factor=k_i, - strides=[tvm.var('ldw'), tvm.var('ldw'), 1]) # stride can be inferred - Xb = tvm.decl_buffer(x.shape, x.dtype, - name="X", - offset_factor=k_i, - strides=[tvm.var('ldw'), 1]) - Zb = tvm.decl_buffer(z.shape, z.dtype, - name="Z", - offset_factor=1, - strides=[1]) + z = te.compute((m,), lambda i: + te.sum(tvm.tir.popcount(w[bw, i, k].astype(dtype) & x[bx, k].astype(dtype)) + << (bw+bx).astype(dtype), axis=[bw, bx, k]), name='z') + Wb = tvm.tir.decl_buffer(w.shape, w.dtype, + name="W", + offset_factor=k_i, + strides=[te.var('ldw'), te.var('ldw'), 1]) # stride can be inferred + Xb = tvm.tir.decl_buffer(x.shape, x.dtype, + name="X", + offset_factor=k_i, + strides=[te.var('ldw'), 1]) + Zb = tvm.tir.decl_buffer(z.shape, z.dtype, + name="Z", + offset_factor=1, + strides=[1]) def _intrin_func(ins, outs): ww, xx = ins zz = outs[0] - args_1 = tvm.const(1, 'uint32') - args_2 = tvm.const(2, 'uint32') + args_1 = tvm.tir.const(1, 'uint32') + args_2 = tvm.tir.const(2, 'uint32') if unipolar: vpadd = "llvm.arm.neon.vpadd.v8i8" @@ -212,9 +214,9 @@ def _intrin_popcount(m, k_i, w_b, x_b, unipolar): return_dtype = 'uint16x8' def _instr(index): - irb = tvm.ir_builder.create() + irb = tvm.tir.ir_builder.create() if index == 1: # reduce reset - irb.emit(zz.vstore(0, tvm.const(0, return_dtype))) + irb.emit(zz.vstore(0, tvm.tir.const(0, return_dtype))) return irb.get() # body and reduce update cnts8 = [None] * 8 @@ -227,46 +229,50 @@ def _intrin_popcount(m, k_i, w_b, x_b, unipolar): w_ = ww.vload([bw, i, 0], 'uint8x16').astype(full_dtype) x_ = xx.vload([bx, 0], 'uint8x16').astype(full_dtype) if unipolar: - cnts = tvm.popcount(w_ & x_) - tvm.popcount(~w_ & x_) + cnts = tvm.tir.popcount(w_ & x_) - tvm.tir.popcount(~w_ & x_) else: - cnts = tvm.popcount(w_ & x_) - upper_half = tvm.call_pure_intrin(half_dtype, 'vectorhigh', cnts) - lower_half = tvm.call_pure_intrin(half_dtype, 'vectorlow', cnts) + cnts = tvm.tir.popcount(w_ & x_) + upper_half = tvm.tir.call_pure_intrin(half_dtype, 'vectorhigh', cnts) + lower_half = tvm.tir.call_pure_intrin(half_dtype, 'vectorlow', cnts) cnts8[i] = upper_half + lower_half for i in range(m//2): - cnts4[i] = tvm.call_llvm_intrin(half_dtype, vpadd, - args_1, cnts8[i*2], cnts8[i*2+1]) + cnts4[i] = tvm.tir.call_llvm_intrin(half_dtype, vpadd, + args_1, cnts8[i*2], cnts8[i*2+1]) for i in range(m//4): - cnts2[i] = tvm.call_llvm_intrin(half_dtype, vpadd, - args_1, cnts4[i*2], cnts4[i*2+1]) - cnts = tvm.call_pure_intrin(full_dtype, 'vectorcombine', cnts2[0], cnts2[1]) - shifted_cnts = cnts << tvm.const(bw+bx, pack_dtype) - out = tvm.call_llvm_intrin(return_dtype, vpadalu, - args_2, zz.vload(0, return_dtype), shifted_cnts) + cnts2[i] = tvm.tir.call_llvm_intrin(half_dtype, vpadd, + args_1, cnts4[i*2], cnts4[i*2+1]) + cnts = tvm.tir.call_pure_intrin( + full_dtype, 'vectorcombine', cnts2[0], cnts2[1]) + shifted_cnts = cnts << tvm.tir.const(bw+bx, pack_dtype) + out = tvm.tir.call_llvm_intrin( + return_dtype, vpadalu, + args_2, zz.vload(0, return_dtype), shifted_cnts) else: # ki == 8 for i in range(m): w_ = ww.vload([bw, i, 0], 'uint8x8').astype(half_dtype) x_ = xx.vload([bx, 0], 'uint8x8').astype(half_dtype) if unipolar: - cnts8[i] = tvm.popcount(w_ & x_) - tvm.popcount(~w_ & x_) + cnts8[i] = tvm.tir.popcount(w_ & x_) - tvm.tir.popcount(~w_ & x_) else: - cnts8[i] = tvm.popcount(w_ & x_) + cnts8[i] = tvm.tir.popcount(w_ & x_) for i in range(m//2): - cnts4[i] = tvm.call_llvm_intrin(half_dtype, vpadd, - args_1, cnts8[i*2], cnts8[i*2+1]) + cnts4[i] = tvm.tir.call_llvm_intrin(half_dtype, vpadd, + args_1, cnts8[i*2], cnts8[i*2+1]) for i in range(m//4): - cnts2[i] = tvm.call_llvm_intrin(half_dtype, vpadd, - args_1, cnts4[i*2], cnts4[i*2+1]) - cnts = tvm.call_pure_intrin(full_dtype, 'vectorcombine', cnts2[0], cnts2[1]) - shifted_cnts = cnts << tvm.const(bw+bx, pack_dtype) - out = tvm.call_llvm_intrin(return_dtype, vpadalu, - args_2, zz.vload(0, return_dtype), shifted_cnts) + cnts2[i] = tvm.tir.call_llvm_intrin(half_dtype, vpadd, + args_1, cnts4[i*2], cnts4[i*2+1]) + cnts = tvm.tir.call_pure_intrin( + full_dtype, 'vectorcombine', cnts2[0], cnts2[1]) + shifted_cnts = cnts << tvm.tir.const(bw+bx, pack_dtype) + out = tvm.tir.call_llvm_intrin( + return_dtype, vpadalu, + args_2, zz.vload(0, return_dtype), shifted_cnts) irb.emit(zz.vstore(0, out)) return irb.get() # body, reset, update return _instr(0), _instr(1), _instr(2) - with tvm.build_config(offset_factor=1, partition_const_loop=True): - return tvm.decl_tensor_intrin(z.op, _intrin_func, binds={w: Wb, x:Xb, z:Zb}) + with tvm.target.build_config(offset_factor=1, partition_const_loop=True): + return te.decl_tensor_intrin(z.op, _intrin_func, binds={w: Wb, x:Xb, z:Zb}) # ARM specific schedule that using custom microkernel def _schedule_spatial_conv2d_nhwc(cfg, s, data_pad, data_vec, kernel_vec, @@ -325,7 +331,7 @@ def _schedule_spatial_conv2d_nhwc(cfg, s, data_pad, data_vec, kernel_vec, @autotvm.register_topi_schedule("bitserial_conv2d_nhwc.arm_cpu") def schedule_bitserial_conv2d_nhwc(cfg, outs): """Arm cpu schedule for bitserial conv2d""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse(op): @@ -335,7 +341,7 @@ def schedule_bitserial_conv2d_nhwc(cfg, outs): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) if 'spatial_bitserial_conv_nhwc' in op.tag: @@ -347,7 +353,7 @@ def schedule_bitserial_conv2d_nhwc(cfg, outs): data_q = data_vec.op.input_tensors[0] data = data_q.op.input_tensors[0] data_pad = None - if isinstance(data_q.op, tvm.tensor.ComputeOp) and "pad" in data_q.op.tag: + if isinstance(data_q.op, te.tensor.ComputeOp) and "pad" in data_q.op.tag: data_pad = data_q data_q = data data = data.op.input_tensors[0] diff --git a/topi/python/topi/arm_cpu/bitserial_dense.py b/topi/python/topi/arm_cpu/bitserial_dense.py index 3f1889c..beed79d 100644 --- a/topi/python/topi/arm_cpu/bitserial_dense.py +++ b/topi/python/topi/arm_cpu/bitserial_dense.py @@ -18,6 +18,7 @@ """Schedule for bitserial dense operator.""" from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from topi.util import get_const_tuple from .. import tag @@ -32,15 +33,15 @@ def bitserial_dense(cfg, data, weight, data_bits, weight_bits, pack_dtype, out_d Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [batch, in_dim] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [out_dim, in_dim] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim] """ data_packed = bitpack(data, data_bits, pack_axis=1, bit_axis=1, pack_type=pack_dtype) @@ -83,23 +84,23 @@ def bitserial_dense(cfg, data, weight, data_bits, weight_bits, pack_dtype, out_d wvshape = (out_dim//VY, in_dim//VK, WB, VY, VK) oshape = (batch, out_dim) - k = tvm.reduce_axis((0, in_dim), name='k') - db = tvm.reduce_axis((0, DB), name='db') - wb = tvm.reduce_axis((0, WB), name='wb') + k = te.reduce_axis((0, in_dim), name='k') + db = te.reduce_axis((0, DB), name='db') + wb = te.reduce_axis((0, WB), name='wb') # Tile data and weights - weight_vec = tvm.compute(wvshape, lambda yo, ko, wb, vy, vk: - weight_packed[yo*VY+vy][wb][ko*VK+vk], name='weight_vec') - matmul_unipolar = tvm.compute(oshape, lambda x, y: tvm.sum( - (tvm.popcount(weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) & - data_packed[x, db, k].astype(out_dtype)) - - tvm.popcount(~weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) & - data_packed[x, db, k].astype(out_dtype))) + weight_vec = te.compute(wvshape, lambda yo, ko, wb, vy, vk: + weight_packed[yo*VY+vy][wb][ko*VK+vk], name='weight_vec') + matmul_unipolar = te.compute(oshape, lambda x, y: te.sum( + (tvm.tir.popcount(weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) & + data_packed[x, db, k].astype(out_dtype)) - + tvm.tir.popcount(~weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) & + data_packed[x, db, k].astype(out_dtype))) << (wb+db).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense_unipolar') - matmul = tvm.compute(oshape, lambda x, y: tvm.sum( - tvm.popcount(weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) & - data_packed[x, db, k].astype(out_dtype)) + matmul = te.compute(oshape, lambda x, y: te.sum( + tvm.tir.popcount(weight_vec[y//VY, k//VK, wb, y%VY, k%VK].astype(out_dtype) & + data_packed[x, db, k].astype(out_dtype)) << (wb+db).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense') cfg.add_flop(batch * out_dim * in_dim * binary_op_multiplier(pack_dtype)) @@ -124,8 +125,8 @@ def schedule_bitserial_dense(cfg, outs): s: Schedule The computation schedule for bitserial_dense. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(cfg, s, data_vec, weight_vec, output, unipolar): @@ -162,7 +163,7 @@ def schedule_bitserial_dense(cfg, outs): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) elif op.tag == 'bitserial_dense' or 'bitserial_dense_unipolar': diff --git a/topi/python/topi/arm_cpu/conv2d.py b/topi/python/topi/arm_cpu/conv2d.py index 2144d26..25b338e 100644 --- a/topi/python/topi/arm_cpu/conv2d.py +++ b/topi/python/topi/arm_cpu/conv2d.py @@ -19,6 +19,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm import tvm.contrib.nnpack @@ -27,9 +28,9 @@ from .. import nn from ..nn.util import get_const_int, get_pad_tuple from ..nn.winograd_util import winograd_transform_matrices from .conv2d_spatial_pack import conv2d_spatial_pack_nchw, \ - conv2d_spatial_pack_nhwc, \ - schedule_conv2d_spatial_pack_nchw, \ - schedule_conv2d_spatial_pack_nhwc + conv2d_spatial_pack_nhwc, \ + schedule_conv2d_spatial_pack_nchw, \ + schedule_conv2d_spatial_pack_nhwc @autotvm.register_topi_compute("conv2d_nchw_spatial_pack.arm_cpu") @@ -42,7 +43,7 @@ def conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dilation, out_ @autotvm.register_topi_schedule("conv2d_nchw_spatial_pack.arm_cpu") def schedule_conv2d_nchw_spatial_pack(cfg, outs): """Create schedule for conv2d_nchw""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): # schedule conv2d @@ -59,7 +60,7 @@ def schedule_conv2d_nchw_spatial_pack(cfg, outs): kernel = kernel_vec.op.input_tensors[0] else: kernel = kernel_vec - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() schedule_conv2d_spatial_pack_nchw(cfg, s, data_vec, kernel_vec, @@ -79,7 +80,7 @@ def conv2d_nhwc_spatial_pack(cfg, data, kernel, strides, padding, dilation, out_ @autotvm.register_topi_schedule("conv2d_nhwc_spatial_pack.arm_cpu") def schedule_conv2d_nhwc_spatial_pack(cfg, outs): """Create schedule for conv2d_nhwc""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'spatial_conv_output_NHWC' in op.tag: @@ -100,7 +101,7 @@ def conv2d_nchw_winograd(cfg, data, kernel, strides, padding, dilation, out_dtyp @autotvm.register_topi_schedule("conv2d_nchw_winograd.arm_cpu") def schedule_conv2d_nchw_winograd(cfg, outs): """Create schedule for conv2d_nchw_winograd""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'winograd_conv2d_output' in op.tag: @@ -136,8 +137,8 @@ def _decl_winograd(cfg, data, kernel, strides, padding, dilation, out_dtype, til assert KH == 3 and KW == 3 and HSTR == 1 and WSTR == 1 data_pad = nn.pad(data, (0, 0, pt, pl), (0, 0, pb, pr), name="data_pad") - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod r = KW m = tile_size @@ -158,48 +159,48 @@ def _decl_winograd(cfg, data, kernel, strides, padding, dilation, out_dtype, til VK = cfg['tile_k'].size[-1] # pack input tile - input_tile = tvm.compute((C, idxd(P, VP), alpha, alpha, VP), - lambda c, b, eps, nu, bb: - data_pad[idxd(b*VP + bb, nH*nW), c, - idxm(idxd(b*VP + bb, nW), nH) * m + eps, - idxm(b*VP + bb, nW) * m + nu], - name='d') + input_tile = te.compute((C, idxd(P, VP), alpha, alpha, VP), + lambda c, b, eps, nu, bb: + data_pad[idxd(b*VP + bb, nH*nW), c, + idxm(idxd(b*VP + bb, nW), nH) * m + eps, + idxm(b*VP + bb, nW) * m + nu], + name='d') # transform kernel if pre_computed: U = kernel else: - r_kh = tvm.reduce_axis((0, KH), 'r_kh') - r_kw = tvm.reduce_axis((0, KW), 'r_kw') - U = tvm.compute((alpha, alpha, idxd(K, VK), C, VK), lambda eps, nu, k, c, kk: - tvm.sum(kernel[k * VK + kk][c][r_kh][r_kw].astype(out_dtype) * - G[eps][r_kh] * G[nu][r_kw], axis=[r_kh, r_kw]), name='U') + r_kh = te.reduce_axis((0, KH), 'r_kh') + r_kw = te.reduce_axis((0, KW), 'r_kw') + U = te.compute((alpha, alpha, idxd(K, VK), C, VK), lambda eps, nu, k, c, kk: + te.sum(kernel[k * VK + kk][c][r_kh][r_kw].astype(out_dtype) * + G[eps][r_kh] * G[nu][r_kw], axis=[r_kh, r_kw]), name='U') # transform image - r_eps = tvm.reduce_axis((0, alpha), 'r_eps') - r_nu = tvm.reduce_axis((0, alpha), 'r_nu') - V = tvm.compute((alpha, alpha, idxd(P, VP), C, VP), lambda eps, nu, b, c, bb: - tvm.sum(input_tile[c][b][r_eps][r_nu][bb].astype(out_dtype) * - B[r_eps][eps] * B[r_nu][nu], axis=[r_eps, r_nu]), name='V') + r_eps = te.reduce_axis((0, alpha), 'r_eps') + r_nu = te.reduce_axis((0, alpha), 'r_nu') + V = te.compute((alpha, alpha, idxd(P, VP), C, VP), lambda eps, nu, b, c, bb: + te.sum(input_tile[c][b][r_eps][r_nu][bb].astype(out_dtype) * + B[r_eps][eps] * B[r_nu][nu], axis=[r_eps, r_nu]), name='V') # batch gemm - c = tvm.reduce_axis((0, C), name='c') - M = tvm.compute((alpha, alpha, K, P), lambda eps, nu, k, b: - tvm.sum(U[eps][nu][idxd(k, VK)][c][idxm(k, VK)] * - V[eps][nu][idxd(b, VP)][c][idxm(b, VP)], axis=c), name='M') + c = te.reduce_axis((0, C), name='c') + M = te.compute((alpha, alpha, K, P), lambda eps, nu, k, b: + te.sum(U[eps][nu][idxd(k, VK)][c][idxm(k, VK)] * + V[eps][nu][idxd(b, VP)][c][idxm(b, VP)], axis=c), name='M') # inverse transform - r_eps = tvm.reduce_axis((0, alpha), 'r_eps') - r_nu = tvm.reduce_axis((0, alpha), 'r_nu') - Y = tvm.compute((K, P, m, m), lambda k, b, vh, vw: - tvm.sum(M[r_eps][r_nu][k][b] * A[r_eps][vh] * A[r_nu][vw], - axis=[r_eps, r_nu]), name='Y') + r_eps = te.reduce_axis((0, alpha), 'r_eps') + r_nu = te.reduce_axis((0, alpha), 'r_nu') + Y = te.compute((K, P, m, m), lambda k, b, vh, vw: + te.sum(M[r_eps][r_nu][k][b] * A[r_eps][vh] * A[r_nu][vw], + axis=[r_eps, r_nu]), name='Y') # unpack output - output = tvm.compute((N, K, H, W), lambda n, k, h, w: - Y[k][n * nH * nW + idxd(h, m) * nW + idxd(w, m), - idxm(h, m), idxm(w, m)], - name='output', tag='winograd_conv2d_output') + output = te.compute((N, K, H, W), lambda n, k, h, w: + Y[k][n * nH * nW + idxd(h, m) * nW + idxd(w, m), + idxm(h, m), idxm(w, m)], + name='output', tag='winograd_conv2d_output') # we have to manually assign effective GFLOP for winograd cfg.add_flop(2 * N * K * H * W * KH * KW * C) @@ -220,7 +221,7 @@ def _schedule_winograd(cfg, s, output, last): s[d].compute_inline() # transform kernel - if isinstance(U.op, tvm.tensor.ComputeOp): + if isinstance(U.op, tvm.te.ComputeOp): kernel, G = U.op.input_tensors s[G].compute_inline() eps, nu, k, c, kk, = s[U].op.axis @@ -236,7 +237,7 @@ def _schedule_winograd(cfg, s, output, last): s[U].vectorize(kk) s[U].parallel(k) - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() # transform image @@ -310,7 +311,7 @@ def conv2d_nchw_winograd_nnpack(cfg, data, kernel, strides, padding, dilation, o @autotvm.register_topi_schedule("conv2d_nchw_winograd_nnpack.arm_cpu") def schedule_conv2d_nchw_winograd_nnpack(cfg, outs): """Create schedule for conv2d_nchw_winograd_nnpack""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'winograd_nnpack_conv2d_output' in op.tag: @@ -344,13 +345,13 @@ def _conv2d_arm_cpu_winograd_nnpack( cfg.define_knob('winograd_nnpack_algorithm', [convolution_algorithm]) assert N == 1 - with tvm.tag_scope("winograd_nnpack_conv2d_weight_transform"): + with tvm.te.tag_scope("winograd_nnpack_conv2d_weight_transform"): transformed_kernel = tvm.contrib.nnpack.convolution_inference_weight_transform( kernel, algorithm=cfg['winograd_nnpack_algorithm'].val) if autotvm.GLOBAL_SCOPE.in_tuning: - transformed_kernel = tvm.compute(transformed_kernel.shape, lambda *args: 0.0) + transformed_kernel = te.compute(transformed_kernel.shape, lambda *args: 0.0) - with tvm.tag_scope("winograd_nnpack_conv2d_output"): + with tvm.te.tag_scope("winograd_nnpack_conv2d_output"): output = tvm.contrib.nnpack.convolution_inference_without_weight_transform( data, transformed_kernel, bias=None, @@ -369,8 +370,8 @@ def _schedule_winograd_nnpack(cfg, s, output, last): (X, TK) = output.op.input_tensors[:2] # transform kernel - assert isinstance(TK.op, (tvm.tensor.ComputeOp, tvm.tensor.ExternOp, tvm.tensor.PlaceholderOp)) - if autotvm.GLOBAL_SCOPE.in_tuning and isinstance(TK.op, tvm.tensor.ComputeOp): + assert isinstance(TK.op, (te.tensor.ComputeOp, te.tensor.ExternOp, te.tensor.PlaceholderOp)) + if autotvm.GLOBAL_SCOPE.in_tuning and isinstance(TK.op, te.tensor.ComputeOp): # kernel transformation will be pre-computed during compilation, so we skip # this part to make tuning records correct s[TK].pragma(s[TK].op.axis[0], 'debug_skip_region') @@ -398,7 +399,7 @@ def conv2d_nchw_winograd_nnpack_without_weight_transform( W = (IW + pl + pr - 3) // WSTR + 1 assert N == 1 - with tvm.tag_scope("winograd_nnpack_conv2d_output"): + with tvm.te.tag_scope("winograd_nnpack_conv2d_output"): output = tvm.contrib.nnpack.convolution_inference_without_weight_transform( data=data, transformed_kernel=transformed_kernel, @@ -415,7 +416,7 @@ def conv2d_nchw_winograd_nnpack_without_weight_transform( @autotvm.register_topi_schedule("conv2d_nchw_winograd_nnpack_without_weight_transform.arm_cpu") def schedule_conv2d_nchw_winograd_nnpack_without_weight_transform(cfg, outs): """TOPI schedule callback""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'winograd_nnpack_conv2d_output' in op.tag: diff --git a/topi/python/topi/arm_cpu/conv2d_alter_op.py b/topi/python/topi/arm_cpu/conv2d_alter_op.py index bfbf5d6..3a22611 100644 --- a/topi/python/topi/arm_cpu/conv2d_alter_op.py +++ b/topi/python/topi/arm_cpu/conv2d_alter_op.py @@ -20,6 +20,7 @@ import logging import tvm +from tvm import te from tvm import relay from tvm import autotvm @@ -58,7 +59,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): data, kernel = tinfos out_dtype = out_type.dtype - idxd = tvm.indexdiv + idxd = tvm.tir.indexdiv if topi_tmpl == "conv2d_nchw_spatial_pack.arm_cpu": assert data_layout == "NCHW" and kernel_layout == "OIHW" @@ -69,7 +70,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['kernel_layout'] = 'OIHW%do' % VC new_data = data - new_kernel = tvm.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) + new_kernel = te.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], "conv2d_nchw_spatial_pack.arm_cpu") @@ -86,7 +87,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['kernel_layout'] = 'OHWI%do' % VC new_data = data - new_kernel = tvm.placeholder((idxd(CO, VC), KH, KW, CI, VC), dtype=kernel.dtype) + new_kernel = te.placeholder((idxd(CO, VC), KH, KW, CI, VC), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], "conv2d_nhwc_spatial_pack.arm_cpu") @@ -113,10 +114,10 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['tile_size'] = tile_size new_data = data - new_kernel = tvm.placeholder((KH + tile_size - 1, - KW + tile_size -1, - idxd(CO, VC), CI, VC), - kernel.dtype) + new_kernel = te.placeholder((KH + tile_size - 1, + KW + tile_size -1, + idxd(CO, VC), CI, VC), + kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], 'conv2d_nchw_winograd.arm_cpu') @@ -141,7 +142,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): out_dtype=weight_dtype) new_data = data - new_kernel = tvm.placeholder((CO, CI, 8, 8), "float32") + new_kernel = te.placeholder((CO, CI, 8, 8), "float32") new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, None, strides, padding, dilation, out_dtype], @@ -160,7 +161,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): # Store the same config for the altered operator (workload) new_data = data - new_kernel = tvm.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) + new_kernel = te.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], "depthwise_conv2d_nchw_spatial_pack.arm_cpu") diff --git a/topi/python/topi/arm_cpu/conv2d_int8.py b/topi/python/topi/arm_cpu/conv2d_int8.py index 5d177fe..06412b6 100644 --- a/topi/python/topi/arm_cpu/conv2d_int8.py +++ b/topi/python/topi/arm_cpu/conv2d_int8.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name,unused-variable,unused-argument,no-member """Conv2D int8 schedule on ARM""" - -import tvm +from tvm import te from tvm import autotvm from .. import tag from ..util import get_const_tuple @@ -55,8 +54,8 @@ def conv2d_NCHWc_int8(cfg, data, kernel, strides, # If no config was set, we can fallback to NCHW config. if cfg.is_fallback: - _get_default_config(cfg, tvm.placeholder((n, in_channel, ih, iw), dtype=data.dtype), - tvm.placeholder((num_filter, in_channel, kh, kw), dtype=kernel.dtype), + _get_default_config(cfg, te.placeholder((n, in_channel, ih, iw), dtype=data.dtype), + te.placeholder((num_filter, in_channel, kh, kw), dtype=kernel.dtype), strides, padding, out_dtype) return nn.conv2d_NCHWc_int8_compute(data, kernel, @@ -71,7 +70,7 @@ def conv2d_NCHWc_int8(cfg, data, kernel, strides, @autotvm.register_topi_schedule("conv2d_NCHWc_int8.arm_cpu") def schedule_conv2d_NCHWc_int8(cfg, outs): """Create schedule for tensors""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse(op): @@ -81,7 +80,7 @@ def schedule_conv2d_NCHWc_int8(cfg, outs): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) if 'conv2d_NCHWc_int8' in op.tag: @@ -89,9 +88,9 @@ def schedule_conv2d_NCHWc_int8(cfg, outs): kernel_vec = conv_out.op.input_tensors[1] data_vec = conv_out.op.input_tensors[0] data = data_vec.op.input_tensors[0] \ - if isinstance(data_vec.op, tvm.tensor.ComputeOp) and "pad" not in data_vec.op.tag \ + if isinstance(data_vec.op, te.tensor.ComputeOp) and "pad" not in data_vec.op.tag \ else data_vec - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, te.tensor.ComputeOp) and "pad" in data.op.tag: data_pad = data data = data_pad.op.input_tensors[0] diff --git a/topi/python/topi/arm_cpu/conv2d_spatial_pack.py b/topi/python/topi/arm_cpu/conv2d_spatial_pack.py index 032ac76..3bb9dc7 100644 --- a/topi/python/topi/arm_cpu/conv2d_spatial_pack.py +++ b/topi/python/topi/arm_cpu/conv2d_spatial_pack.py @@ -18,6 +18,7 @@ """Conv2D spatial pack implementation for ARM CPU""" from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from .. import nn from ..util import get_const_tuple @@ -98,46 +99,46 @@ def conv2d_spatial_pack_nchw(cfg, data, kernel, strides, padding, dilation, if dilation_h != 1 or dilation_w != 1: # undilate input data dvshape = (N, OH // VH, OW // VW, CI, KH, KW, VH, VW) - data_vec = tvm.compute(dvshape, lambda n, h, w, ci, kh, kw, vh, vw: - data_pad[n][ci][(h*VH+vh)*HSTR+kh*dilation_h] - [(w*VW+vw)*WSTR+kw*dilation_w], - name='data_vec_undilated') + data_vec = te.compute(dvshape, lambda n, h, w, ci, kh, kw, vh, vw: + data_pad[n][ci][(h*VH+vh)*HSTR+kh*dilation_h] + [(w*VW+vw)*WSTR+kw*dilation_w], + name='data_vec_undilated') else: dvshape = (N, OH // VH, OW // VW, CI, VH*HSTR + KH-1, VW*WSTR + KW-1) - data_vec = tvm.compute(dvshape, lambda n, h, w, ci, vh, vw: - data_pad[n][ci][h*VH*HSTR+vh][w*VW*WSTR+vw], - name='data_vec') + data_vec = te.compute(dvshape, lambda n, h, w, ci, vh, vw: + data_pad[n][ci][h*VH*HSTR+vh][w*VW*WSTR+vw], + name='data_vec') if pre_packed: kernel_vec = kernel else: - kernel_vec = tvm.compute(kvshape, lambda co, ci, kh, kw, vc: - kernel[co*VC+vc][ci][kh][kw], - name='kernel_vec') + kernel_vec = te.compute(kvshape, lambda co, ci, kh, kw, vc: + kernel[co*VC+vc][ci][kh][kw], + name='kernel_vec') - ci = tvm.reduce_axis((0, CI), name='ci') - kh = tvm.reduce_axis((0, KH), name='kh') - kw = tvm.reduce_axis((0, KW), name='kw') + ci = te.reduce_axis((0, CI), name='ci') + kh = te.reduce_axis((0, KH), name='kh') + kw = te.reduce_axis((0, KW), name='kw') if dilation_h != 1 or dilation_w != 1: - conv = tvm.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ - tvm.sum(data_vec[n, h, w, ci, kh, kw, vh, vw].astype(out_dtype) * - kernel_vec[co, ci, kh, kw, vc].astype(out_dtype), - axis=[ci, kh, kw]), name='conv') + conv = te.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ + te.sum(data_vec[n, h, w, ci, kh, kw, vh, vw].astype(out_dtype) * + kernel_vec[co, ci, kh, kw, vc].astype(out_dtype), + axis=[ci, kh, kw]), name='conv') else: - conv = tvm.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ - tvm.sum(data_vec[n, h, w, ci, vh*HSTR+kh, vw*WSTR+kw].astype(out_dtype) * - kernel_vec[co, ci, kh, kw, vc].astype(out_dtype), - axis=[ci, kh, kw]), name='conv') - - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod - - output = tvm.compute(oshape, lambda n, co, h, w: - conv[n, - idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW), - idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)], - name='output_unpack', tag='spatial_conv2d_output') + conv = te.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ + te.sum(data_vec[n, h, w, ci, vh*HSTR+kh, vw*WSTR+kw].astype(out_dtype) * + kernel_vec[co, ci, kh, kw, vc].astype(out_dtype), + axis=[ci, kh, kw]), name='conv') + + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod + + output = te.compute(oshape, lambda n, co, h, w: + conv[n, + idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW), + idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)], + name='output_unpack', tag='spatial_conv2d_output') return output def schedule_conv2d_spatial_pack_nchw(cfg, s, data_vec, kernel_vec, @@ -216,7 +217,7 @@ def conv2d_spatial_pack_nhwc(cfg, data, kernel, strides, padding, dilation, out_ dilated_kernel_w = (KW - 1) * dilation_w + 1 pad_top, pad_left, pad_down, pad_right = \ - get_pad_tuple(padding, (dilated_kernel_h, dilated_kernel_w)) + get_pad_tuple(padding, (dilated_kernel_h, dilated_kernel_w)) HSTR, WSTR = strides if isinstance(strides, (tuple, list)) else (strides, strides) OH = (IH + pad_top + pad_down - dilated_kernel_h) // HSTR + 1 @@ -257,40 +258,41 @@ def conv2d_spatial_pack_nhwc(cfg, data, kernel, strides, padding, dilation, out_ if dilation_h != 1 or dilation_w != 1: # undilate input data dvshape = (N, OHO, OWO, KH, KW, IC, OHI, OWI) - data_vec = tvm.compute(dvshape, lambda n, oho, owo, kh, kw, ic, ohi, owi: - data_pad[n][(oho*OHI+ohi)*HSTR+kh*dilation_h] - [(owo*OWI+owi)*WSTR+kw*dilation_w][ic], - name='data_vec_undilated') + data_vec = te.compute(dvshape, lambda n, oho, owo, kh, kw, ic, ohi, owi: + data_pad[n][(oho*OHI+ohi)*HSTR+kh*dilation_h] + [(owo*OWI+owi)*WSTR+kw*dilation_w][ic], + name='data_vec_undilated') else: dvshape = (N, OHO, OWO, KH + (OHI-1)*HSTR, KW + (OWI-1)*WSTR, IC) - data_vec = tvm.compute(dvshape, lambda n, oho, owo, ohi, owi, ic: - data_pad[n][oho*OHI*HSTR+ohi][owo*OWI*WSTR+owi][ic], - name='data_vec') - kernel_vec = tvm.compute(kvshape, lambda oco, kh, kw, ic, oci: \ - kernel[kh][kw][ic][oco*OCI+oci], - name='kernel_vec') + data_vec = te.compute(dvshape, lambda n, oho, owo, ohi, owi, ic: + data_pad[n][oho*OHI*HSTR+ohi][owo*OWI*WSTR+owi][ic], + name='data_vec') + kernel_vec = te.compute(kvshape, lambda oco, kh, kw, ic, oci: \ + kernel[kh][kw][ic][oco*OCI+oci], + name='kernel_vec') - ic = tvm.reduce_axis((0, IC), name='ic') - kh = tvm.reduce_axis((0, KH), name='kh') - kw = tvm.reduce_axis((0, KW), name='kw') + ic = te.reduce_axis((0, IC), name='ic') + kh = te.reduce_axis((0, KH), name='kh') + kw = te.reduce_axis((0, KW), name='kw') if dilation_h != 1 or dilation_w != 1: - conv = tvm.compute(ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \ - tvm.sum(data_vec[n, oho, owo, kh, kw, ohi, owi, ic].astype(out_dtype) * - kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype), - axis=[ic, kh, kw]), name='conv') + conv = te.compute(ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \ + te.sum(data_vec[n, oho, owo, kh, kw, ohi, owi, ic].astype(out_dtype) * + kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype), + axis=[ic, kh, kw]), name='conv') else: - conv = tvm.compute(ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \ - tvm.sum(data_vec[n, oho, owo, ohi*HSTR+kh, owi*WSTR+kw, ic].astype(out_dtype) * - kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype), - axis=[ic, kh, kw]), name='conv') - - idiv = tvm.indexdiv - imod = tvm.indexmod - output = tvm.compute(oshape, lambda n, oho, owo, oc: - conv[n][idiv(oho, OHI)][idiv(owo, OWI)][idiv(oc, OCI)]\ - [imod(oho, OHI)][imod(owo, OWI)][imod(oc, OCI)], - name='output_unpack', tag='spatial_conv_output_NHWC') + conv = te.compute( + ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \ + te.sum(data_vec[n, oho, owo, ohi*HSTR+kh, owi*WSTR+kw, ic].astype(out_dtype) * + kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype), + axis=[ic, kh, kw]), name='conv') + + idiv = tvm.tir.indexdiv + imod = tvm.tir.indexmod + output = te.compute(oshape, lambda n, oho, owo, oc: + conv[n][idiv(oho, OHI)][idiv(owo, OWI)][idiv(oc, OCI)]\ + [imod(oho, OHI)][imod(owo, OWI)][imod(oc, OCI)], + name='output_unpack', tag='spatial_conv_output_NHWC') return output def schedule_conv2d_spatial_pack_nhwc(cfg, s, op, output): diff --git a/topi/python/topi/arm_cpu/conv2d_transpose.py b/topi/python/topi/arm_cpu/conv2d_transpose.py index 93ff029..7eaa5ee 100644 --- a/topi/python/topi/arm_cpu/conv2d_transpose.py +++ b/topi/python/topi/arm_cpu/conv2d_transpose.py @@ -19,6 +19,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from ..nn import dilate, pad, get_pad_tuple @@ -31,10 +32,10 @@ def conv2d_transpose_nchw(cfg, Input, Filter, strides, padding, out_dtype): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [in_channel, num_filter, filter_height, filter_width] strides : tuple of two ints @@ -48,7 +49,7 @@ def conv2d_transpose_nchw(cfg, Input, Filter, strides, padding, out_dtype): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ return _decl_spatial_pack(cfg, Input, Filter, strides, padding, "NCHW", out_dtype, 2) @@ -105,31 +106,31 @@ def _decl_spatial_pack(cfg, data, kernel, strides, padding, layout, out_dtype, n ovshape = (N, CO // VC, OH // VH, OW // VW, VH, VW, VC) oshape = (N, CO, OH, OW) - data_vec = tvm.compute(dvshape, lambda n, h, w, ci, vh, vw: - data_pad[n][ci][h*VH + vh][w*VW + vw], - name='data_vec') + data_vec = te.compute(dvshape, lambda n, h, w, ci, vh, vw: + data_pad[n][ci][h*VH + vh][w*VW + vw], + name='data_vec') - kernel_vec = tvm.compute(kvshape, lambda co, ci, kh, kw, vc: - kernel[ci][co*VC+vc][kh][kw], - name='kernel_vec_conv2d_transpose') + kernel_vec = te.compute(kvshape, lambda co, ci, kh, kw, vc: + kernel[ci][co*VC+vc][kh][kw], + name='kernel_vec_conv2d_transpose') - ci = tvm.reduce_axis((0, CI), name='ci') - kh = tvm.reduce_axis((0, KH), name='kh') - kw = tvm.reduce_axis((0, KW), name='kw') + ci = te.reduce_axis((0, CI), name='ci') + kh = te.reduce_axis((0, KH), name='kh') + kw = te.reduce_axis((0, KW), name='kw') - conv = tvm.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ - tvm.sum(data_vec[n, h, w, ci, vh + kh, vw + kw].astype(out_dtype) * - kernel_vec[co, ci, KH - 1 - kh, KW - 1 - kw, vc].astype(out_dtype), - axis=[ci, kh, kw]), name='conv') + conv = te.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ + te.sum(data_vec[n, h, w, ci, vh + kh, vw + kw].astype(out_dtype) * + kernel_vec[co, ci, KH - 1 - kh, KW - 1 - kw, vc].astype(out_dtype), + axis=[ci, kh, kw]), name='conv') - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod - output = tvm.compute(oshape, lambda n, co, h, w: - conv[n, - idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW), - idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)], - name='output_unpack', tag='spatial_conv2d_transpose_output') + output = te.compute(oshape, lambda n, co, h, w: + conv[n, + idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW), + idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)], + name='output_unpack', tag='spatial_conv2d_transpose_output') return output @@ -137,7 +138,7 @@ def _decl_spatial_pack(cfg, data, kernel, strides, padding, layout, out_dtype, n @autotvm.register_topi_schedule("conv2d_transpose_nchw.arm_cpu") def schedule_conv2d_transpose_nchw(cfg, outs): """Schedule conv2d transpose for arm cpu""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'spatial_conv2d_transpose_output' in op.tag: @@ -155,7 +156,7 @@ def schedule_conv2d_transpose_nchw(cfg, outs): kernel = kernel_vec.op.input_tensors[0] else: kernel = kernel_vec - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() schedule_conv2d_spatial_pack_nchw(cfg, s, data_vec, kernel_vec, diff --git a/topi/python/topi/arm_cpu/depthwise_conv2d.py b/topi/python/topi/arm_cpu/depthwise_conv2d.py index 8d668f3..5214972 100644 --- a/topi/python/topi/arm_cpu/depthwise_conv2d.py +++ b/topi/python/topi/arm_cpu/depthwise_conv2d.py @@ -18,6 +18,7 @@ """Depthwise convolution schedule for ARM CPU""" import tvm +from tvm import te from tvm import autotvm from .. import nn @@ -48,8 +49,8 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): s: Schedule The computation schedule for depthwise_conv2d nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(cfg, s, data, data_pad, kernel, output): A, B, C = data, kernel, output @@ -129,7 +130,7 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): kernel = op.input_tensors[1] data = op.input_tensors[0] data_pad = None - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: data_pad = data data = data_pad.op.input_tensors[0] _schedule(cfg, s, data, data_pad, kernel, output) @@ -147,10 +148,10 @@ def depthwise_conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dila cfg: ConfigEntity The config for this template - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 4-D with shape [num_filter, multiplier, filter_height, filter_width] or pre-packed 5-D with shape [num_filter_chunk, multiplier, filter_height, filter_width, num_filter_block] @@ -169,7 +170,7 @@ def depthwise_conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dila Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ @@ -179,8 +180,8 @@ def depthwise_conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dila @autotvm.register_topi_schedule("depthwise_conv2d_nchw_spatial_pack.arm_cpu") def schedule_depthwise_conv2d_nchw_spatial_pack(cfg, outs): """Create the schedule for depthwise_conv2d_nchw_spatial_pack""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'spatial_depthwise_conv2d_nchw_output': @@ -192,7 +193,7 @@ def schedule_depthwise_conv2d_nchw_spatial_pack(cfg, outs): kernel = kernel_vec.op.input_tensors[0] else: kernel = kernel_vec - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() _schedule_spatial_pack(cfg, s, data_vec, kernel_vec, conv, output, outs[0]) @@ -284,50 +285,50 @@ def _decl_spatial_pack(cfg, data, kernel, strides, padding, dilation, out_dtype, if dilation_h != 1 or dilation_w != 1: # undilate input data dvshape = (N, OH // VH, OW // VW, C, KH, KW, VH, VW) - data_vec = tvm.compute(dvshape, lambda n, h, w, c, kh, kw, vh, vw: - data_pad[n][c][(h * VH + vh) * HSTR + kh * dilation_h] - [(w*VW+vw)*WSTR+kw*dilation_w], - name='data_vec_undilated') + data_vec = te.compute(dvshape, lambda n, h, w, c, kh, kw, vh, vw: + data_pad[n][c][(h * VH + vh) * HSTR + kh * dilation_h] + [(w*VW+vw)*WSTR+kw*dilation_w], + name='data_vec_undilated') else: dvshape = (N, OH // VH, OW // VW, C, VH*HSTR + KH-1, VW*WSTR + KW-1) - data_vec = tvm.compute(dvshape, lambda n, h, w, c, vh, vw: - data_pad[n][c][h * VH * HSTR + vh][w * VW * WSTR + vw], - name='data_vec') + data_vec = te.compute(dvshape, lambda n, h, w, c, vh, vw: + data_pad[n][c][h * VH * HSTR + vh][w * VW * WSTR + vw], + name='data_vec') if pre_packed: kernel_vec = kernel else: - kernel_vec = tvm.compute(kvshape, lambda co, m, kh, kw, vc: - kernel[co*VC+vc][m][kh][kw], - name='kernel_vec') + kernel_vec = te.compute(kvshape, lambda co, m, kh, kw, vc: + kernel[co*VC+vc][m][kh][kw], + name='kernel_vec') - kh = tvm.reduce_axis((0, KH), name='kh') - kw = tvm.reduce_axis((0, KW), name='kw') + kh = te.reduce_axis((0, KH), name='kh') + kw = te.reduce_axis((0, KW), name='kw') - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod if dilation_h != 1 or dilation_w != 1: - conv = tvm.compute( + conv = te.compute( ovshape, lambda n, co, h, w, vh, vw, vc: \ - tvm.sum(data_vec[n, h, w, idxdiv(co * VC + vc, M), kh, kw, vh, vw] - .astype(out_dtype) * - kernel_vec[idxdiv(co, M), idxmod(co, M), kh, kw, vc].astype(out_dtype), - axis=[kh, kw]), name='depthwise_conv') + te.sum(data_vec[n, h, w, idxdiv(co * VC + vc, M), kh, kw, vh, vw] + .astype(out_dtype) * + kernel_vec[idxdiv(co, M), idxmod(co, M), kh, kw, vc].astype(out_dtype), + axis=[kh, kw]), name='depthwise_conv') else: - conv = tvm.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ - tvm.sum(data_vec[n, h, w, idxdiv((co * VC + vc), M), vh * HSTR + kh, - vw * WSTR + kw].astype(out_dtype) * - kernel_vec[idxdiv(co, M), - idxmod(co, M), - kh, kw, vc].astype(out_dtype), - axis=[kh, kw]), name='depthwise_conv') - - output = tvm.compute(oshape, lambda n, co, h, w: - conv[n, - idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW), - idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)], - name='output_unpack', tag='spatial_depthwise_conv2d_nchw_output') + conv = te.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \ + te.sum(data_vec[n, h, w, idxdiv((co * VC + vc), M), vh * HSTR + kh, + vw * WSTR + kw].astype(out_dtype) * + kernel_vec[idxdiv(co, M), + idxmod(co, M), + kh, kw, vc].astype(out_dtype), + axis=[kh, kw]), name='depthwise_conv') + + output = te.compute(oshape, lambda n, co, h, w: + conv[n, + idxdiv(co, VC), idxdiv(h, VH), idxdiv(w, VW), + idxmod(h, VH), idxmod(w, VW), idxmod(co, VC)], + name='output_unpack', tag='spatial_depthwise_conv2d_nchw_output') return output def _schedule_spatial_pack(cfg, s, data_vec, kernel_vec, @@ -343,10 +344,10 @@ def _schedule_spatial_pack(cfg, s, data_vec, kernel_vec, data_pad = data_vec.op.input_tensors[0] if data_pad.op.name == "data_pad": - assert isinstance(data_pad.op, tvm.tensor.ComputeOp) + assert isinstance(data_pad.op, tvm.te.ComputeOp) has_padding = True else: - assert isinstance(data_pad.op, tvm.tensor.PlaceholderOp) + assert isinstance(data_pad.op, tvm.te.PlaceholderOp) has_padding = False cfg.define_knob('data_pad_inline', [0, 1, 2, 3, 4]) diff --git a/topi/python/topi/arm_cpu/injective.py b/topi/python/topi/arm_cpu/injective.py index 644a7e3..696b708 100644 --- a/topi/python/topi/arm_cpu/injective.py +++ b/topi/python/topi/arm_cpu/injective.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, unused-variable """Schedule for pooling operators""" import tvm +from tvm import te from ..util import is_empty_shape def schedule_injective_from_existing(sch, out): @@ -58,14 +59,14 @@ def schedule_injective(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) x = outs[0] if list(s[x].op.axis): # do not vectorize for broadcast (io, ii) = s[x].split(list(s[x].op.axis)[-1], 8) s[x].vectorize(ii) - tvm.schedule.AutoInlineInjective(s) + tvm.te.schedule.AutoInlineInjective(s) if not is_empty_shape(x.shape): schedule_injective_from_existing(s, x) @@ -85,10 +86,10 @@ def schedule_concatenate(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) x = outs[0] - tvm.schedule.AutoInlineInjective(s) + tvm.te.schedule.AutoInlineInjective(s) if len(s[x].op.axis) >= 4: fused = s[x].fuse(s[x].op.axis[0], s[x].op.axis[1], s[x].op.axis[2]) s[x].parallel(fused) diff --git a/topi/python/topi/arm_cpu/tensor_intrin.py b/topi/python/topi/arm_cpu/tensor_intrin.py index 2f300a1..135c87d 100644 --- a/topi/python/topi/arm_cpu/tensor_intrin.py +++ b/topi/python/topi/arm_cpu/tensor_intrin.py @@ -18,6 +18,7 @@ """Conv2D int8 schedule on ARM""" import tvm +from tvm import te def dot_int8_int8_int32(int32_lanes, dtype='uint'): """ @@ -57,27 +58,27 @@ def dot_int8_int8_int32(int32_lanes, dtype='uint'): """ num_int8_elements = 4 # 4 int8 elements in int32 - data = tvm.placeholder((num_int8_elements,), dtype='%s8' % dtype, name='data') - kernel = tvm.placeholder((int32_lanes, num_int8_elements), dtype='%s8' % dtype, name='kernel') + data = te.placeholder((num_int8_elements,), dtype='%s8' % dtype, name='data') + kernel = te.placeholder((int32_lanes, num_int8_elements), dtype='%s8' % dtype, name='kernel') - k = tvm.reduce_axis((0, num_int8_elements), name='k') - C = tvm.compute((int32_lanes,), - lambda i: tvm.sum(data[k].astype('%s32' % dtype) * - kernel[i, k].astype('%s32' % dtype), - axis=k), name="C") + k = te.reduce_axis((0, num_int8_elements), name='k') + C = te.compute((int32_lanes,), + lambda i: te.sum(data[k].astype('%s32' % dtype) * + kernel[i, k].astype('%s32' % dtype), + axis=k), name="C") - a_buffer = tvm.decl_buffer(data.shape, dtype='%s8' % dtype, name="a_buffer", - offset_factor=1, - strides=[1]) - b_buffer = tvm.decl_buffer(kernel.shape, dtype='%s8' % dtype, name="b_buffer", - offset_factor=1, - strides=[tvm.var('s'), 1]) + a_buffer = tvm.tir.decl_buffer(data.shape, dtype='%s8' % dtype, name="a_buffer", + offset_factor=1, + strides=[1]) + b_buffer = tvm.tir.decl_buffer(kernel.shape, dtype='%s8' % dtype, name="b_buffer", + offset_factor=1, + strides=[te.var('s'), 1]) def _intrin_func(ins, outs): def _instr(index): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() if index == 1: - ib.emit(outs[0].vstore(0, tvm.const(0, '%s32x%d' % (dtype, int32_lanes)))) + ib.emit(outs[0].vstore(0, tvm.tir.const(0, '%s32x%d' % (dtype, int32_lanes)))) return ib.get() dtype_a = '%s8x%d' % (dtype, num_int8_elements) @@ -85,26 +86,26 @@ def dot_int8_int8_int32(int32_lanes, dtype='uint'): dtype_c = '%s32x%d' % (dtype, int32_lanes) a_int8 = ins[0].vload([0], dtype_a) - re_int32 = tvm.call_pure_intrin('%s32' % dtype, 'reinterpret', a_int8) + re_int32 = tvm.tir.call_pure_intrin('%s32' % dtype, 'reinterpret', a_int8) # broadcast a vec_ai32 = re_int32.astype(dtype_c) - vec_a = tvm.call_pure_intrin(dtype_b, 'reinterpret', vec_ai32) + vec_a = tvm.tir.call_pure_intrin(dtype_b, 'reinterpret', vec_ai32) vec_b = ins[1].vload([0, 0], dtype_b) vec_c = outs[0].vload([0], dtype_c) inst = 'udot' if dtype == 'uint' else 'sdot' inst = 'llvm.aarch64.neon.%s.v%di32.v%di8' % ( inst, int32_lanes, int32_lanes * num_int8_elements) - vdot = tvm.call_llvm_intrin(dtype_c, - inst, - tvm.const(2, 'uint32'), - vec_c, vec_a, vec_b) + vdot = tvm.tir.call_llvm_intrin(dtype_c, + inst, + tvm.tir.const(2, 'uint32'), + vec_c, vec_a, vec_b) ib.emit(outs[0].vstore(0, vdot)) return ib.get() # body, reset, update return _instr(0), _instr(1), _instr(2) - with tvm.build_config(offset_factor=1, partition_const_loop=True): - return tvm.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer}) + with tvm.target.build_config(offset_factor=1, partition_const_loop=True): + return te.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer}) diff --git a/topi/python/topi/bifrost/conv2d.py b/topi/python/topi/bifrost/conv2d.py index 816024e..92e874a 100644 --- a/topi/python/topi/bifrost/conv2d.py +++ b/topi/python/topi/bifrost/conv2d.py @@ -19,6 +19,7 @@ """conv2d schedule on ARM Mali (Bifrost) GPU""" import tvm +from tvm import te from tvm import relay from tvm import autotvm @@ -41,10 +42,10 @@ def conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dilation, out_ cfg: ConfigEntity The config for this template - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] or pre-packed 5-D with shape [num_filter_chunk, in_channel, filter_height, filter_width, num_filter_block] @@ -63,7 +64,7 @@ def conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dilation, out_ Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ return conv2d_spatial_pack_nchw(cfg, data, kernel, strides, padding, @@ -87,7 +88,7 @@ def schedule_conv2d_nchw_spatial_pack(cfg, outs): s: Schedule The computation schedule for conv2d """ - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): # schedule conv2d @@ -104,7 +105,7 @@ def schedule_conv2d_nchw_spatial_pack(cfg, outs): kernel = kernel_vec.op.input_tensors[0] else: kernel = kernel_vec - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec) @@ -125,12 +126,12 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec): BW, TW, VW = cfg["tile_ow"].size # schedule padding - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: data_pad = data s[data_pad].compute_inline() # schedule data packing - if isinstance(data_vec.op, tvm.tensor.ComputeOp) and data_vec.op.name == 'data_vec_undilated': + if isinstance(data_vec.op, te.tensor.ComputeOp) and data_vec.op.name == 'data_vec_undilated': _, h, w, ci, _, _, vh, vw = s[data_vec].op.axis else: _, h, w, ci, vh, vw = s[data_vec].op.axis @@ -140,7 +141,7 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec): if vw.dom.extent.value < max_unroll: s[data_vec].unroll(vw) - if isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and kernel_vec.name == 'kernel_vec': + if isinstance(kernel_vec.op, tvm.te.ComputeOp) and kernel_vec.name == 'kernel_vec': if autotvm.GLOBAL_SCOPE.in_tuning: # kernel packing will be pre-computed during compilation, so we skip # this part to make tuning records correct @@ -151,8 +152,8 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec): fused = s[kernel_vec].fuse(co, ci, kh, kw, vc) fused, vec = s[kernel_vec].split(fused, VC) bb, tt = s[kernel_vec].split(fused, max_threads) - s[kernel_vec].bind(bb, tvm.thread_axis("blockIdx.x")) - s[kernel_vec].bind(tt, tvm.thread_axis("threadIdx.x")) + s[kernel_vec].bind(bb, te.thread_axis("blockIdx.x")) + s[kernel_vec].bind(tt, te.thread_axis("threadIdx.x")) if VC in vec_size: s[kernel_vec].vectorize(vec) @@ -193,7 +194,7 @@ def conv2d_nchw_winograd(cfg, data, kernel, strides, padding, dilation, out_dtyp @autotvm.register_topi_schedule("conv2d_nchw_winograd.bifrost") def schedule_conv2d_nchw_winograd(cfg, outs): - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'winograd_conv2d_output' in op.tag: @@ -210,7 +211,7 @@ def _decl_winograd_kernel_transform(kernel, tile_size, G): Parameters ---------- - kernel : tvm.Tensor + kernel : tvm.te.Tensor The kernel to transform tile_size : int @@ -218,7 +219,7 @@ def _decl_winograd_kernel_transform(kernel, tile_size, G): Returns ------- - U : tvm.Tensor + U : tvm.te.Tensor Transformed kernel """ @@ -238,22 +239,22 @@ def _decl_winograd_kernel_transform(kernel, tile_size, G): # Padded Kernel [K_round, C, KH, KW] # Pad the number of kernels to multiple of ALIGN - padded_kernel = tvm.compute((K_round, C, KH, KW), - lambda k, c, h, w: - tvm.if_then_else(k < K, - kernel[k][c][h][w], - tvm.const(0, out_dtype)), - name='padded_kernel') + padded_kernel = te.compute((K_round, C, KH, KW), + lambda k, c, h, w: + tvm.tir.if_then_else(k < K, + kernel[k][c][h][w], + tvm.tir.const(0, out_dtype)), + name='padded_kernel') # U [alpha, alpha, K_round, C] # Perform the kernel transform - r_kh = tvm.reduce_axis((0, KH), 'r_kh') - r_kw = tvm.reduce_axis((0, KW), 'r_kw') - U = tvm.compute((alpha, alpha, K_round, C), - lambda eps, nu, k, c: - tvm.sum(padded_kernel[k][c][r_kh][r_kw] * G[eps][r_kh] * G[nu][r_kw], - axis=[r_kh, r_kw]), - name='U') + r_kh = te.reduce_axis((0, KH), 'r_kh') + r_kw = te.reduce_axis((0, KW), 'r_kw') + U = te.compute((alpha, alpha, K_round, C), + lambda eps, nu, k, c: + te.sum(padded_kernel[k][c][r_kh][r_kw] * G[eps][r_kh] * G[nu][r_kw], + axis=[r_kh, r_kw]), + name='U') return U @@ -307,10 +308,10 @@ def _decl_winograd(cfg, data, kernel, strides, padding, dilation, out_dtype, til cfg.define_knob("data_transform_wgy", [1, 2, 4, 8, 16, 32, 64]) # Pack input tile - input_tile = tvm.compute((N, C, H + 2, W + 2), - lambda n, c, h, w: - data_pad[n][c][h][w], - name='d') + input_tile = te.compute((N, C, H + 2, W + 2), + lambda n, c, h, w: + data_pad[n][c][h][w], + name='d') if pre_computed: U = kernel @@ -319,33 +320,33 @@ def _decl_winograd(cfg, data, kernel, strides, padding, dilation, out_dtype, til # V [alpha * alpha, C, P_round) # Perform the image transform - r_eps = tvm.reduce_axis((0, alpha), 'r_eps') - r_nu = tvm.reduce_axis((0, alpha), 'r_nu') - V = tvm.compute((alpha * alpha, C, P_round), - lambda epsnu, c, b: - tvm.sum(input_tile[b // (nH*nW)][c][b // nW % nH * m + r_eps][b % nW * m +r_nu]\ - * B[r_eps][epsnu // alpha] * B[r_nu][epsnu % alpha], - axis=[r_eps, r_nu]), - name='V') + r_eps = te.reduce_axis((0, alpha), 'r_eps') + r_nu = te.reduce_axis((0, alpha), 'r_nu') + V = te.compute((alpha * alpha, C, P_round), + lambda epsnu, c, b: + te.sum(input_tile[b // (nH*nW)][c][b // nW % nH * m + r_eps][b % nW * m +r_nu]\ + * B[r_eps][epsnu // alpha] * B[r_nu][epsnu % alpha], + axis=[r_eps, r_nu]), + name='V') # Winograd GEMM is a wrapper around batched GEMM to convert U to a 3D Tensor _, M = decl_winograd_gemm(cfg, U, V) # Y [K, P, m, m] # Winograd output transform - r_eps = tvm.reduce_axis((0, alpha), 'r_eps') - r_nu = tvm.reduce_axis((0, alpha), 'r_nu') - Y = tvm.compute((K, P, m, m), lambda k, b, vh, vw: - tvm.sum(M[r_eps * alpha + r_nu][k][b] * A[r_eps][vh] * A[r_nu][vw], - axis=[r_eps, r_nu]), name='Y') + r_eps = te.reduce_axis((0, alpha), 'r_eps') + r_nu = te.reduce_axis((0, alpha), 'r_nu') + Y = te.compute((K, P, m, m), lambda k, b, vh, vw: + te.sum(M[r_eps * alpha + r_nu][k][b] * A[r_eps][vh] * A[r_nu][vw], + axis=[r_eps, r_nu]), name='Y') # Output [N, K, H, W] # Unpack back to NCHW format # The last term ensures alignment is not lost to bound inference - output = tvm.compute((N, K, H, W), lambda n, k, h, w: - Y[k][n * nH * nW + (h//m) * nW + w//m][h % m][w % m] - + tvm.const(0, out_dtype) * M[(alpha*alpha)-1][K_round-1][P_round-1], - name='output', tag='winograd_conv2d_output') + output = te.compute((N, K, H, W), lambda n, k, h, w: + Y[k][n * nH * nW + (h//m) * nW + w//m][h % m][w % m] + + tvm.tir.const(0, out_dtype) * M[(alpha*alpha)-1][K_round-1][P_round-1], + name='output', tag='winograd_conv2d_output') return output @@ -363,7 +364,7 @@ def _schedule_winograd(cfg, s, op): d, B = s[V].op.input_tensors data_pad = s[d].op.input_tensors[0] - if isinstance(U.op, tvm.tensor.ComputeOp): + if isinstance(U.op, tvm.te.ComputeOp): padded_kernel, G = s[U].op.input_tensors kernel = s[padded_kernel].op.input_tensors[0] s[G].compute_inline() @@ -390,7 +391,7 @@ def _schedule_winograd(cfg, s, op): yo, xo, yi, xi = tile_and_bind(s, U, k, c, 1, 4) # Dilation - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() # Pad data @@ -485,7 +486,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): data, kernel = tinfos out_dtype = out_type.dtype - idxd = tvm.indexdiv + idxd = tvm.tir.indexdiv if topi_tmpl == "conv2d_nchw_spatial_pack.bifrost": assert data_layout == "NCHW" and kernel_layout == "OIHW" @@ -496,7 +497,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['kernel_layout'] = 'OIHW%do' % VC new_data = data - new_kernel = tvm.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) + new_kernel = te.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], "conv2d_nchw_spatial_pack.bifrost") @@ -519,7 +520,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['tile_size'] = tile_size new_data = data - new_kernel = tvm.placeholder( + new_kernel = te.placeholder( (KH + tile_size - 1, KW + tile_size -1, CO, CI), kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], diff --git a/topi/python/topi/bifrost/dense.py b/topi/python/topi/bifrost/dense.py index 2a85db7..7104842 100644 --- a/topi/python/topi/bifrost/dense.py +++ b/topi/python/topi/bifrost/dense.py @@ -16,10 +16,7 @@ # under the License. # pylint: disable=invalid-name,unused-variable """dense schedule on ARM Mali Biforst GPU""" - -from __future__ import absolute_import as _abs - -import tvm +from tvm import te from tvm import autotvm from .. import nn @@ -47,8 +44,8 @@ def schedule_dense(cfg, outs): s: Schedule The computation schedule for dense. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'dense': @@ -79,10 +76,10 @@ def schedule_dense(cfg, outs): by, ty, yi = cfg['tile_y'].apply(s, output, y) bx, tx, xi = cfg['tile_x'].apply(s, output, x) - s[output].bind(by, tvm.thread_axis('blockIdx.y')) - s[output].bind(bx, tvm.thread_axis('blockIdx.x')) - s[output].bind(ty, tvm.thread_axis('threadIdx.y')) - s[output].bind(tx, tvm.thread_axis('threadIdx.x')) + s[output].bind(by, te.thread_axis('blockIdx.y')) + s[output].bind(bx, te.thread_axis('blockIdx.x')) + s[output].bind(ty, te.thread_axis('threadIdx.y')) + s[output].bind(tx, te.thread_axis('threadIdx.x')) if cfg['tile_y'].size[-1] < max_unroll: s[output].unroll(yi) @@ -108,6 +105,6 @@ def fuse_and_bind(s, tensor, axis=None, num_thread=None): axis = axis or s[tensor].op.axis fused = s[tensor].fuse(*axis) bx, tx = s[tensor].split(fused, num_thread) - s[tensor].bind(bx, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(tx, tvm.thread_axis("threadIdx.x")) + s[tensor].bind(bx, te.thread_axis("blockIdx.x")) + s[tensor].bind(tx, te.thread_axis("threadIdx.x")) return bx, tx diff --git a/topi/python/topi/bifrost/depthwise_conv2d.py b/topi/python/topi/bifrost/depthwise_conv2d.py index 4f7b0db..7a96705 100644 --- a/topi/python/topi/bifrost/depthwise_conv2d.py +++ b/topi/python/topi/bifrost/depthwise_conv2d.py @@ -20,6 +20,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import util from .. import tag @@ -38,8 +39,8 @@ def schedule_depthwise_conv2d_nchw(outs): s: Schedule The computation schedule for depthwise_conv2d nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(pad_data, kernel, conv): raw_data = s[pad_data].op.input_tensors[0] @@ -55,12 +56,12 @@ def schedule_depthwise_conv2d_nchw(outs): zo, zi = s[tensor].split(z, z_factor) yo, yi = s[tensor].split(y, y_factor) xo, xi = s[tensor].split(x, x_factor) - s[tensor].bind(zo, tvm.thread_axis("blockIdx.z")) - s[tensor].bind(zi, tvm.thread_axis("threadIdx.z")) - s[tensor].bind(yo, tvm.thread_axis("blockIdx.y")) - s[tensor].bind(yi, tvm.thread_axis("threadIdx.y")) - s[tensor].bind(xo, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(xi, tvm.thread_axis("threadIdx.x")) + s[tensor].bind(zo, te.thread_axis("blockIdx.z")) + s[tensor].bind(zi, te.thread_axis("threadIdx.z")) + s[tensor].bind(yo, te.thread_axis("blockIdx.y")) + s[tensor].bind(yi, te.thread_axis("threadIdx.y")) + s[tensor].bind(xo, te.thread_axis("blockIdx.x")) + s[tensor].bind(xi, te.thread_axis("threadIdx.x")) return zo, zi, yo, yi, xo, xi # set tunable parameters @@ -115,7 +116,7 @@ def schedule_depthwise_conv2d_nchw(outs): if op.tag == 'depthwise_conv2d_nchw': pad_data = op.input_tensors[0] kernel = op.input_tensors[1] - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() conv = op.output(0) _schedule(pad_data, kernel, conv) diff --git a/topi/python/topi/bifrost/gemm.py b/topi/python/topi/bifrost/gemm.py index cc6cf09..3dc0108 100644 --- a/topi/python/topi/bifrost/gemm.py +++ b/topi/python/topi/bifrost/gemm.py @@ -16,9 +16,6 @@ # under the License. # pylint: disable=invalid-name,unused-variable,unused-argument """GEMM schedules for Mali Bifrost""" - -import tvm - from .transforms import tile_and_bind, tile_and_bind3d, interleave_transpose, \ transpose_interleave from .. import util @@ -31,15 +28,15 @@ def decl_gemm(cfg, A, B): cfg : Config Schedule configuration - A : tvm.Tensor + A : tvm.te.Tensor 2D Tensor, shape [n, k] - B : tvm.Tensor + B : tvm.te.Tensor 2D Tensor, shape [k, m] Returns ------- - C : tvm.Tensor + C : tvm.te.Tensor 2D Tensor, shape [n, m] """ @@ -60,35 +57,35 @@ def decl_gemm(cfg, A, B): if unroll_gemm == 1: # No unrolling case must have the same set of tensors to keep scheduling consistent # Create identity tensors to take the place of A_unrolled, B_unrolled and R - A_unrolled = tvm.compute((n, k_size), lambda i, j: A[i, j], name="A_unrolled") - B_unrolled = tvm.compute((k_size, m), lambda i, j: B[i, j], name="B_unrolled") + A_unrolled = te.compute((n, k_size), lambda i, j: A[i, j], name="A_unrolled") + B_unrolled = te.compute((k_size, m), lambda i, j: B[i, j], name="B_unrolled") # Declare standard GEMM - k = tvm.reduce_axis((0, A.shape[1]), name='k') - C = tvm.compute((n, m), lambda i, j: - tvm.sum(A_unrolled[i, k] * B_unrolled[k, j], axis=k), name='C') + k = te.reduce_axis((0, A.shape[1]), name='k') + C = te.compute((n, m), lambda i, j: + te.sum(A_unrolled[i, k] * B_unrolled[k, j], axis=k), name='C') - R = tvm.compute((n, m), lambda i, j: C[i, j], name="R") + R = te.compute((n, m), lambda i, j: C[i, j], name="R") else: unrolled_k_size = k_size // unroll_gemm # Unroll the two input matrices along the shared k axis - A_unrolled = tvm.compute((unroll_gemm, n, unrolled_k_size), lambda b, i, j: - A[i][unrolled_k_size * b + j], name='A_unrolled') + A_unrolled = te.compute((unroll_gemm, n, unrolled_k_size), lambda b, i, j: + A[i][unrolled_k_size * b + j], name='A_unrolled') - B_unrolled = tvm.compute((unroll_gemm, unrolled_k_size, m), lambda b, i, j: - B[unrolled_k_size * b + i][j], name='B_unrolled') + B_unrolled = te.compute((unroll_gemm, unrolled_k_size, m), lambda b, i, j: + B[unrolled_k_size * b + i][j], name='B_unrolled') # Declare a batched GEMM - k = tvm.reduce_axis((0, unrolled_k_size), name='k') - C = tvm.compute((unroll_gemm, n, m), lambda b, i, j: - tvm.sum(A_unrolled[b][i][k] * B_unrolled[b][k][j], axis=k), name='C') + k = te.reduce_axis((0, unrolled_k_size), name='k') + C = te.compute((unroll_gemm, n, m), lambda b, i, j: + te.sum(A_unrolled[b][i][k] * B_unrolled[b][k][j], axis=k), name='C') # Then declare a reduction to reduce the sub matrices - k = tvm.reduce_axis((0, unroll_gemm), name='k') - R = tvm.compute((n, m), lambda i, j: - tvm.sum(C[k][i][j], axis=k), name='R') + k = te.reduce_axis((0, unroll_gemm), name='k') + R = te.compute((n, m), lambda i, j: + te.sum(C[k][i][j], axis=k), name='R') return R @@ -99,15 +96,15 @@ def decl_batched_gemm(cfg, A, B): cfg : Config Schedule configuration - A : tvm.Tensor + A : tvm.te.Tensor 3D Tensor, shape [b, n, k] - B : tvm.Tensor + B : tvm.te.Tensor 3D Tensor, shape [b, k, m] Returns ------- - C : tvm.Tensor + C : tvm.te.Tensor 3D Tensor, shape [b, n, m] """ @@ -127,9 +124,9 @@ def decl_batched_gemm(cfg, A, B): b_size = util.get_const_int(A.shape[0]) # Declare a batched GEMM - k = tvm.reduce_axis((0, k_size), name='k') - C = tvm.compute((b_size, n, m), lambda b, i, j: - tvm.sum(A[b][i][k] * B[b][k][j], axis=k), name='C') + k = te.reduce_axis((0, k_size), name='k') + C = te.compute((b_size, n, m), lambda b, i, j: + te.sum(A[b][i][k] * B[b][k][j], axis=k), name='C') return C @@ -143,10 +140,10 @@ def decl_winograd_gemm(cfg, A, B): cfg : Config Schedule configuration - A : tvm.Tensor + A : tvm.te.Tensor 4D Tensor, shape [a, a, n, k] - B : tvm.Tensor + B : tvm.te.Tensor 4D Tensor, shape [a * a, k, m] Returns @@ -157,8 +154,8 @@ def decl_winograd_gemm(cfg, A, B): n = util.get_const_int(A.shape[2]) k = util.get_const_int(A.shape[3]) - A_3D = tvm.compute((alpha * alpha, n, k), lambda b, i, j: - A[b // alpha][b % alpha][i][j], name='A_3D') + A_3D = te.compute((alpha * alpha, n, k), lambda b, i, j: + A[b // alpha][b % alpha][i][j], name='A_3D') C = decl_batched_gemm(cfg, A_3D, B) return A_3D, C @@ -171,16 +168,16 @@ def schedule_gemm(cfg, s, A, B, C, batched=False, schedule_transforms=True): cfg : Config Schedule configuration - s : tvm.schedule.Schedule + s : tvm.te.schedule.Schedule Operator schedule - A : tvm.Tensor + A : tvm.te.Tensor 2D/3D Tensor, shape [n, k]/[b, n, k] - B : tvm.Tensor + B : tvm.te.Tensor 2D/3D Tensor, shape [k, m]/[b, k, m] - C : tvm.Tensor + C : tvm.te.Tensor 2D/3D Tensor, shape [n, m]/[b, n, m] batched : bool @@ -287,19 +284,19 @@ def schedule_unrollable_gemm(cfg, s, A, B, C, R): cfg : Config Schedule configuration - s : tvm.schedule.Schedule + s : tvm.te.schedule.Schedule Operator schedule - A : tvm.Tensor + A : tvm.te.Tensor 2D/3D Tensor, shape [n, k]/[b, n, k] - B : tvm.Tensor + B : tvm.te.Tensor 2D/3D Tensor, shape [k, m]/[b, k, m] - C : tvm.Tensor + C : tvm.te.Tensor 2D/3D Tensor, shape [n, m]/[b, n, m] - R : tvm.Tensor + R : tvm.te.Tensor 2D Tensor, shape [n, m] """ @@ -340,21 +337,21 @@ def get_unrollable_gemm_ops(R): Parameters ---------- - R : tvm.Tensor + R : tvm.te.Tensor Reduced tensor, final stage of GEMM Returns ------- - A_unrolled : tvm.Tensor + A_unrolled : tvm.te.Tensor Matrix A unrolled along k - B_unrolled: tvm.Tensor + B_unrolled: tvm.te.Tensor Matrix B unrolled along k - C : tvm.Tensor + C : tvm.te.Tensor Result of batched GEMM - R : tvm.Tensor + R : tvm.te.Tensor Reduction of C, result of unrollable GEMM """ diff --git a/topi/python/topi/bifrost/transforms.py b/topi/python/topi/bifrost/transforms.py index d7fc292..3feb4e6 100644 --- a/topi/python/topi/bifrost/transforms.py +++ b/topi/python/topi/bifrost/transforms.py @@ -19,6 +19,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te def fuse_and_bind(s, tensor, axis=None, num_thread=None): """Fuse all the axis and bind to GPU threads""" @@ -26,18 +27,18 @@ def fuse_and_bind(s, tensor, axis=None, num_thread=None): fused = s[tensor].fuse(*axis) max_threads = tvm.target.Target.current(allow_none=False).max_num_threads bx, tx = s[tensor].split(fused, num_thread or max_threads) - s[tensor].bind(bx, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(tx, tvm.thread_axis("threadIdx.x")) + s[tensor].bind(bx, te.thread_axis("blockIdx.x")) + s[tensor].bind(tx, te.thread_axis("threadIdx.x")) return bx, tx def tile_and_bind(s, tensor, y, x, y_factor, x_factor=None): """Tile and bind to GPU threads""" x_factor = x_factor or y_factor yo, xo, yi, xi = s[tensor].tile(y, x, y_factor, x_factor) - s[tensor].bind(xo, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(xi, tvm.thread_axis("threadIdx.x")) - s[tensor].bind(yo, tvm.thread_axis("blockIdx.y")) - s[tensor].bind(yi, tvm.thread_axis("threadIdx.y")) + s[tensor].bind(xo, te.thread_axis("blockIdx.x")) + s[tensor].bind(xi, te.thread_axis("threadIdx.x")) + s[tensor].bind(yo, te.thread_axis("blockIdx.y")) + s[tensor].bind(yi, te.thread_axis("threadIdx.y")) return yo, xo, yi, xi def tile_and_bind3d(s, tensor, z, y, x, z_factor=2, y_factor=None, x_factor=None): @@ -47,12 +48,12 @@ def tile_and_bind3d(s, tensor, z, y, x, z_factor=2, y_factor=None, x_factor=None zo, zi = s[tensor].split(z, z_factor) yo, yi = s[tensor].split(y, y_factor) xo, xi = s[tensor].split(x, x_factor) - s[tensor].bind(zo, tvm.thread_axis("blockIdx.z")) - s[tensor].bind(zi, tvm.thread_axis("threadIdx.z")) - s[tensor].bind(yo, tvm.thread_axis("blockIdx.y")) - s[tensor].bind(yi, tvm.thread_axis("threadIdx.y")) - s[tensor].bind(xo, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(xi, tvm.thread_axis("threadIdx.x")) + s[tensor].bind(zo, te.thread_axis("blockIdx.z")) + s[tensor].bind(zi, te.thread_axis("threadIdx.z")) + s[tensor].bind(yo, te.thread_axis("blockIdx.y")) + s[tensor].bind(yi, te.thread_axis("threadIdx.y")) + s[tensor].bind(xo, te.thread_axis("blockIdx.x")) + s[tensor].bind(xi, te.thread_axis("threadIdx.x")) return zo, yo, xo, zi, yi, xi def pack_tensor(s, tensor, factor, readers): diff --git a/topi/python/topi/broadcast.py b/topi/python/topi/broadcast.py index ba39c9a..39b2841 100644 --- a/topi/python/topi/broadcast.py +++ b/topi/python/topi/broadcast.py @@ -27,7 +27,7 @@ def broadcast_to(data, shape): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input data shape : list or tuple @@ -35,7 +35,7 @@ def broadcast_to(data, shape): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return _cpp.broadcast_to(data, shape) @@ -45,14 +45,14 @@ def add(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -64,14 +64,14 @@ def subtract(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -83,14 +83,14 @@ def multiply(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -102,14 +102,14 @@ def divide(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -121,14 +121,14 @@ def floor_divide(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -140,14 +140,14 @@ def mod(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -159,14 +159,14 @@ def floor_mod(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -178,14 +178,14 @@ def maximum(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -197,14 +197,14 @@ def minimum(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -216,14 +216,14 @@ def power(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -235,14 +235,14 @@ def left_shift(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -254,14 +254,14 @@ def right_shift(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -273,14 +273,14 @@ def greater(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -292,14 +292,14 @@ def less(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -311,14 +311,14 @@ def equal(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -330,14 +330,14 @@ def not_equal(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -349,14 +349,14 @@ def greater_equal(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -368,14 +368,14 @@ def less_equal(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -387,14 +387,14 @@ def logical_and(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -406,14 +406,14 @@ def logical_or(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -425,14 +425,14 @@ def bitwise_and(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -444,14 +444,14 @@ def bitwise_or(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -463,14 +463,14 @@ def bitwise_xor(lhs, rhs): Parameters ---------- - lhs : tvm.Tensor or Expr + lhs : tvm.te.Tensor or Expr The left operand - rhs : tvm.Tensor or Expr + rhs : tvm.te.Tensor or Expr The right operand Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if both operands are Expr. Otherwise returns Tensor. """ @@ -482,11 +482,11 @@ def logical_not(data): Parameters ---------- - data : tvm.Tensor or Expr + data : tvm.te.Tensor or Expr Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if the operand are Expr. Otherwise returns Tensor. """ @@ -498,11 +498,11 @@ def bitwise_not(data): Parameters ---------- - data : tvm.Tensor or Expr + data : tvm.te.Tensor or Expr Returns ------- - ret : tvm.Tensor or Expr + ret : tvm.te.Tensor or Expr Returns Expr if the operand are Expr. Otherwise returns Tensor. """ diff --git a/topi/python/topi/cuda/batch_matmul.py b/topi/python/topi/cuda/batch_matmul.py index e293c7a..bf80182 100644 --- a/topi/python/topi/cuda/batch_matmul.py +++ b/topi/python/topi/cuda/batch_matmul.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name,too-many-locals,unused-variable """cuda batch_matmul operators""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from tvm.contrib import cublas from ..util import traverse_inline, get_const_tuple, get_max_power2_factor @@ -35,8 +34,8 @@ def schedule_batch_matmul(outs): s: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(op): C = op.output(0) @@ -60,13 +59,13 @@ def schedule_batch_matmul(outs): x_nthreads = min(x_bn, 8) ty, yi = s[C].split(y, nparts=y_nthreads) tx, xi = s[C].split(x, nparts=x_nthreads) - thread_x = tvm.thread_axis((0, x_nthreads), "threadIdx.x") - thread_y = tvm.thread_axis((0, y_nthreads), "threadIdx.y") + thread_x = te.thread_axis((0, x_nthreads), "threadIdx.x") + thread_y = te.thread_axis((0, y_nthreads), "threadIdx.y") s[C].reorder(b, by, bx, ty, tx, yi, xi) - s[C].bind(b, tvm.thread_axis("blockIdx.z")) - s[C].bind(by, tvm.thread_axis("blockIdx.y")) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) + s[C].bind(b, te.thread_axis("blockIdx.z")) + s[C].bind(by, te.thread_axis("blockIdx.y")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) s[C].bind(ty, thread_y) s[C].bind(tx, thread_x) s[C].pragma(yi, "auto_unroll_max_step", 16) @@ -111,15 +110,15 @@ def batch_matmul_cublas(x, y): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor 3-D with shape [batch, M, K] - y : tvm.Tensor + y : tvm.te.Tensor 3-D with shape [batch, N, K] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 3-D with shape [batch, M, N] """ return cublas.batch_matmul(x, y, False, True) diff --git a/topi/python/topi/cuda/conv1d.py b/topi/python/topi/cuda/conv1d.py index 56918e2..3ddecbe 100644 --- a/topi/python/topi/cuda/conv1d.py +++ b/topi/python/topi/cuda/conv1d.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, unused-argument """Compute definition for conv1d with cuda backend""" import tvm +from tvm import te from tvm import autotvm from .. import nn @@ -52,8 +53,8 @@ def schedule_conv1d_ncw(cfg, outs): s : Schedule The computation schedule for conv1d. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'conv1d_ncw': @@ -79,7 +80,7 @@ def schedule_conv1d_ncw(cfg, outs): ##### space definition end ##### if isinstance(kernel.op, - tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -103,14 +104,14 @@ def schedule_conv1d_ncw(cfg, outs): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) s[output].reorder(bn, bf, bx, vn, vf, vx, tn, tf, tx, ni, fi, xi) - s[output].bind(bn, tvm.thread_axis("blockIdx.z")) - s[output].bind(bf, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vn, tvm.thread_axis("vthread")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bn, te.thread_axis("blockIdx.z")) + s[output].bind(bf, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vn, te.thread_axis("vthread")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tx) # number of threads n_tz = cfg["tile_n"].size[2] * cfg["tile_f"].size[2] @@ -131,8 +132,8 @@ def schedule_conv1d_ncw(cfg, outs): fused = s[load].fuse(f, x) tz, fused = s[load].split(fused, nparts=n_tz) tx, fused = s[load].split(fused, nparts=n_tx) - s[load].bind(tz, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) @@ -177,8 +178,8 @@ def schedule_conv1d_nwc(cfg, outs): s : Schedule The computation schedule for conv1d. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'conv1d_nwc': @@ -204,7 +205,7 @@ def schedule_conv1d_nwc(cfg, outs): ##### space definition end ##### if isinstance(kernel.op, - tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -228,14 +229,14 @@ def schedule_conv1d_nwc(cfg, outs): bf, vf, tf, fi = cfg["tile_f"].apply(s, output, f) s[output].reorder(bn, bx, bf, vn, vx, vf, tn, tx, tf, ni, xi, fi) - s[output].bind(bn, tvm.thread_axis("blockIdx.z")) - s[output].bind(bx, tvm.thread_axis("blockIdx.y")) - s[output].bind(bf, tvm.thread_axis("blockIdx.x")) - s[output].bind(vn, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(vf, tvm.thread_axis("vthread")) - - s[output].bind(tf, tvm.thread_axis("threadIdx.x")) + s[output].bind(bn, te.thread_axis("blockIdx.z")) + s[output].bind(bx, te.thread_axis("blockIdx.y")) + s[output].bind(bf, te.thread_axis("blockIdx.x")) + s[output].bind(vn, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(vf, te.thread_axis("vthread")) + + s[output].bind(tf, te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tf) # number of threads n_tz = cfg["tile_n"].size[2] * cfg["tile_x"].size[2] @@ -256,8 +257,8 @@ def schedule_conv1d_nwc(cfg, outs): fused = s[load].fuse(x, f) tz, fused = s[load].split(fused, nparts=n_tz) tx, fused = s[load].split(fused, nparts=n_tx) - s[load].bind(tz, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) diff --git a/topi/python/topi/cuda/conv1d_transpose_ncw.py b/topi/python/topi/cuda/conv1d_transpose_ncw.py index 4802a0d..cf1b66c 100644 --- a/topi/python/topi/cuda/conv1d_transpose_ncw.py +++ b/topi/python/topi/cuda/conv1d_transpose_ncw.py @@ -18,6 +18,7 @@ """Conv1d transpose template for cuda backend""" import tvm +from tvm import te from tvm import autotvm from .. import nn from ..util import get_const_tuple, traverse_inline @@ -30,9 +31,9 @@ def conv1d_transpose_ncw(cfg, data, kernel, stride, padding, out_dtype): ---------- cfg: ConfigEntity The config for this template - Input : tvm.Tensor + Input : tvm.te.Tensor 3-D with shape [batch, in_channel, inp_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 3-D with shape [in_channel, num_filter, kernel_size] stride : tuple of one int The spatial stride along width @@ -45,7 +46,7 @@ def conv1d_transpose_ncw(cfg, data, kernel, stride, padding, out_dtype): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor u 3-D with shape [batch, out_channel, out_width] """ if isinstance(stride, (tuple, list)): @@ -58,21 +59,21 @@ def conv1d_transpose_ncw(cfg, data, kernel, stride, padding, out_dtype): pad_left = kernel_size - 1 - pad_left pad_right = kernel_size - 1 - pad_right dilated_width = stride * (inp_width - 1) + 1 - data = tvm.compute( + data = te.compute( (batch, inp_channels, pad_left + dilated_width + pad_right), - lambda n, c, x: tvm.if_then_else( - tvm.all(x >= pad_left, - x < pad_left + dilated_width, - tvm.indexmod(x - pad_left, stride).equal(0)), - data[n, c, tvm.indexdiv(x - pad_left, stride)], - tvm.const(0., "float32")), + lambda n, c, x: tvm.tir.if_then_else( + tvm.tir.all(x >= pad_left, + x < pad_left + dilated_width, + tvm.tir.indexmod(x - pad_left, stride).equal(0)), + data[n, c, tvm.tir.indexdiv(x - pad_left, stride)], + tvm.tir.const(0., "float32")), name='data_pad') - dc = tvm.reduce_axis((0, inp_channels), name='dc') - dw = tvm.reduce_axis((0, kernel_size), name='dw') - data_out = tvm.compute( + dc = te.reduce_axis((0, inp_channels), name='dc') + dw = te.reduce_axis((0, kernel_size), name='dw') + data_out = te.compute( (batch, out_channels, out_width), - lambda b, c, w: tvm.sum( + lambda b, c, w: te.sum( data[b, dc, w + dw].astype(out_dtype) * kernel[dc, c, kernel_size - 1 - dw].astype(out_dtype), axis=[dc, dw]), tag="conv1d_transpose_ncw") @@ -97,8 +98,8 @@ def schedule_conv1d_transpose_ncw(cfg, outs): s: Schedule The computation schedule for conv1d transpose. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'conv1d_transpose_ncw': @@ -123,7 +124,7 @@ def schedule_conv1d_transpose_ncw(cfg, outs): ##### space definition end ##### - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -147,14 +148,14 @@ def schedule_conv1d_transpose_ncw(cfg, outs): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) s[output].reorder(bn, bf, bx, vn, vf, vx, tn, tf, tx, ni, fi, xi) - s[output].bind(bn, tvm.thread_axis("blockIdx.z")) - s[output].bind(bf, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vn, tvm.thread_axis("vthread")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bn, te.thread_axis("blockIdx.z")) + s[output].bind(bf, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vn, te.thread_axis("vthread")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tx) # number of threads n_tz = cfg["tile_n"].size[2] * cfg["tile_f"].size[2] @@ -175,8 +176,8 @@ def schedule_conv1d_transpose_ncw(cfg, outs): fused = s[load].fuse(f, x) tz, fused = s[load].split(fused, nparts=n_tz) tx, fused = s[load].split(fused, nparts=n_tx) - s[load].bind(tz, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) s[output].pragma(kernel_scope, 'unroll_explicit', cfg['unroll_explicit'].val) diff --git a/topi/python/topi/cuda/conv2d.py b/topi/python/topi/cuda/conv2d.py index e1ada32..c247893 100644 --- a/topi/python/topi/cuda/conv2d.py +++ b/topi/python/topi/cuda/conv2d.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-argument """Compute definition for conv2d with cuda backend""" -import tvm +from tvm import te from tvm import autotvm from tvm.contrib import cudnn @@ -35,8 +35,8 @@ def conv2d_nchw(cfg, data, kernel, strides, padding, dilation, out_dtype='float3 @autotvm.register_topi_schedule("conv2d_nchw.cuda") def schedule_conv2d_nchw(cfg, outs): """Create the schedule for conv2d_nchw""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'conv2d_nchw': @@ -55,8 +55,8 @@ def schedule_conv2d_nchw(cfg, outs): # # @autotvm.register_topi_schedule("conv2d_nhwc.cuda") # def schedule_conv2d_nhwc(cfg, outs): -# outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs -# s = tvm.create_schedule([x.op for x in outs]) +# outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs +# s = te.create_schedule([x.op for x in outs]) # # def _callback(op): # if op.tag == 'conv2d_nhwc': diff --git a/topi/python/topi/cuda/conv2d_alter_op.py b/topi/python/topi/cuda/conv2d_alter_op.py index f3e4f4c..b598271 100644 --- a/topi/python/topi/cuda/conv2d_alter_op.py +++ b/topi/python/topi/cuda/conv2d_alter_op.py @@ -19,6 +19,7 @@ import logging import tvm +from tvm import te from tvm import relay from tvm import autotvm @@ -70,10 +71,10 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): ic_block_factor = oc_block_factor = 4 # Store the same config for the altered operator (workload) - new_data = tvm.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor), - dtype=data.dtype) - new_kernel = tvm.placeholder((CO // oc_block_factor, CI // ic_block_factor, KH, KW, \ - oc_block_factor, ic_block_factor), dtype=kernel.dtype) + new_data = te.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor), + dtype=data.dtype) + new_kernel = te.placeholder((CO // oc_block_factor, CI // ic_block_factor, KH, KW, \ + oc_block_factor, ic_block_factor), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, new_layout, out_dtype], "conv2d_NCHWc_int8.cuda") @@ -100,8 +101,8 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): # Store the same config for the altered operator (workload) new_data = data - new_weight = tvm.placeholder((KH + tile_size - 1, KW + tile_size - 1, CI, CO), - dtype=kernel.dtype) + new_weight = te.placeholder((KH + tile_size - 1, KW + tile_size - 1, CI, CO), + dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_weight, strides, padding, dilation, out_dtype], "conv2d_nchw_winograd_without_weight_transform.cuda") @@ -122,11 +123,11 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): ic_block_factor = oc_block_factor = 4 # Store the same config for the altered operator (workload) - new_data = tvm.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor), - dtype=data.dtype) - new_kernel = tvm.placeholder((CO // oc_block_factor, CI // ic_block_factor // groups, - KH, KW, oc_block_factor, ic_block_factor), - dtype=kernel.dtype) + new_data = te.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor), + dtype=data.dtype) + new_kernel = te.placeholder((CO // oc_block_factor, CI // ic_block_factor // groups, + KH, KW, oc_block_factor, ic_block_factor), + dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, groups, out_dtype], "group_conv2d_NCHWc_int8.cuda") diff --git a/topi/python/topi/cuda/conv2d_direct.py b/topi/python/topi/cuda/conv2d_direct.py index 2fab8cf..db6bff2 100644 --- a/topi/python/topi/cuda/conv2d_direct.py +++ b/topi/python/topi/cuda/conv2d_direct.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name """The templates for cuda conv2d operators""" import tvm +from tvm import te from tvm import autotvm from ..util import get_const_tuple @@ -50,7 +51,7 @@ def schedule_direct_cuda(cfg, s, conv): pad_data, kernel = s[conv].op.input_tensors s[pad_data].compute_inline() - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -74,15 +75,15 @@ def schedule_direct_cuda(cfg, s, conv): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) bf = s[output].fuse(n, bf) - s[output].bind(bf, tvm.thread_axis("blockIdx.z")) - s[output].bind(by, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(tf, tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bf, te.thread_axis("blockIdx.z")) + s[output].bind(by, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(tf, te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[output].reorder(bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi) s[OL].compute_at(s[output], tx) @@ -104,9 +105,9 @@ def schedule_direct_cuda(cfg, s, conv): tz, fused = s[load].split(fused, nparts=cfg["tile_f"].size[2]) ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2]) tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # unroll s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) diff --git a/topi/python/topi/cuda/conv2d_hwcn.py b/topi/python/topi/cuda/conv2d_hwcn.py index b0925ae..e45083f 100644 --- a/topi/python/topi/cuda/conv2d_hwcn.py +++ b/topi/python/topi/cuda/conv2d_hwcn.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, too-many-locals, too-many-statements, unused-argument """Schedule for conv2d_hwcn with auto fusion""" import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity @@ -44,8 +45,8 @@ def schedule_conv2d_hwcn(cfg, outs): s: Schedule The computation schedule for conv2d_hwcn. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - sch = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + sch = te.create_schedule([x.op for x in outs]) def schedule(Apad, W, B): """Schedule conv2d_hwcn""" sch[Apad].compute_inline() @@ -93,13 +94,13 @@ def schedule_conv2d_hwcn(cfg, outs): bx, txz, tx, ni = cfg['tile_ni'].apply(sch, Out, ni) sch[Out].reorder(bz, by, bx, tyz, txz, ty, tx, fi, ni) - sch[Out].bind(bz, tvm.thread_axis('blockIdx.z')) - sch[Out].bind(by, tvm.thread_axis('blockIdx.y')) - sch[Out].bind(bx, tvm.thread_axis('blockIdx.x')) - sch[Out].bind(tyz, tvm.thread_axis('vthread')) - sch[Out].bind(txz, tvm.thread_axis('vthread')) - sch[Out].bind(ty, tvm.thread_axis('threadIdx.y')) - sch[Out].bind(tx, tvm.thread_axis('threadIdx.x')) + sch[Out].bind(bz, te.thread_axis('blockIdx.z')) + sch[Out].bind(by, te.thread_axis('blockIdx.y')) + sch[Out].bind(bx, te.thread_axis('blockIdx.x')) + sch[Out].bind(tyz, te.thread_axis('vthread')) + sch[Out].bind(txz, te.thread_axis('vthread')) + sch[Out].bind(ty, te.thread_axis('threadIdx.y')) + sch[Out].bind(tx, te.thread_axis('threadIdx.x')) # Schedule BL local write sch[BL].compute_at(sch[Out], tx) @@ -121,8 +122,8 @@ def schedule_conv2d_hwcn(cfg, outs): tx, ni = sch[AA].split(ni, nparts=cfg['tile_ni'].size[2]) _, ni = sch[AA].split(ni, factor=4) sch[AA].reorder(ty, tx, yi, xi, ci, ni) - sch[AA].bind(ty, tvm.thread_axis('threadIdx.y')) - sch[AA].bind(tx, tvm.thread_axis('threadIdx.x')) + sch[AA].bind(ty, te.thread_axis('threadIdx.y')) + sch[AA].bind(tx, te.thread_axis('threadIdx.x')) sch[AA].vectorize(ni) # Schedule for W's shared memory load yi, xi, ci, fi = sch[WW].op.axis @@ -130,8 +131,8 @@ def schedule_conv2d_hwcn(cfg, outs): tx, fi = sch[WW].split(fi, nparts=cfg['tile_ni'].size[2]) _, fi = sch[WW].split(fi, factor=4) sch[WW].reorder(ty, tx, yi, xi, ci, fi) - sch[WW].bind(ty, tvm.thread_axis('threadIdx.y')) - sch[WW].bind(tx, tvm.thread_axis('threadIdx.x')) + sch[WW].bind(ty, te.thread_axis('threadIdx.y')) + sch[WW].bind(tx, te.thread_axis('threadIdx.x')) sch[WW].vectorize(fi) scheduled_ops = [] @@ -142,12 +143,12 @@ def schedule_conv2d_hwcn(cfg, outs): if operator not in sch.outputs: sch[operator].compute_inline() for tensor in operator.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) elif operator.tag == 'conv2d_hwcn': Apad = operator.input_tensors[0] W = operator.input_tensors[1] - if isinstance(W.op, tvm.tensor.ComputeOp) and 'dilate' in W.op.tag: + if isinstance(W.op, tvm.te.ComputeOp) and 'dilate' in W.op.tag: sch[W].compute_inline() B = operator.output(0) schedule(Apad, W, B) diff --git a/topi/python/topi/cuda/conv2d_int8.py b/topi/python/topi/cuda/conv2d_int8.py index 53a7bd9..ad97fa6 100644 --- a/topi/python/topi/cuda/conv2d_int8.py +++ b/topi/python/topi/cuda/conv2d_int8.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name """Int8 conv2d in NCHWc layout""" import tvm +from tvm import te from tvm import autotvm from .injective import schedule_injective_from_existing @@ -35,11 +36,11 @@ def conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, layout, out_ cfg: ConfigEntity The config for this template - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] or 5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] or 6-D with shape [num_filter_chunk, in_channel_chunk, filter_height, filter_width, num_filter_block, in_channel_block] @@ -61,7 +62,7 @@ def conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, layout, out_ Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, out_channel_chunk, out_height, out_width, out_channel_block] """ assert layout in ["NCHW", "NCHW4c"] @@ -74,17 +75,17 @@ def conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, layout, out_ assert channels % ic_block_factor == 0, \ "Number of input channels should be multiple of {}".format( ic_block_factor) - packed_data = tvm.compute((batch, channels // ic_block_factor, height, width, - ic_block_factor), - lambda n, c, h, w, vc: data[n, c*ic_block_factor + vc, h, w], - name="packed_data") + packed_data = te.compute((batch, channels // ic_block_factor, height, width, + ic_block_factor), + lambda n, c, h, w, vc: data[n, c*ic_block_factor + vc, h, w], + name="packed_data") out_channels, in_channels, kernel_h, kernel_w = get_const_tuple( kernel.shape) assert out_channels % 4 == 0, \ "Number of output channels should be multiple of {}".format( oc_block_factor) - packed_kernel = tvm.compute( + packed_kernel = te.compute( (out_channels // oc_block_factor, in_channels // ic_block_factor, kernel_h, kernel_w, oc_block_factor, ic_block_factor), lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block: @@ -124,23 +125,23 @@ def conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, layout, out_ oshape = (batch, oc_chunk, out_height, out_width, oc_block) - icc = tvm.reduce_axis((0, ic_chunk), name='ic_chunk') - icb = tvm.reduce_axis((0, ic_block), name='ic_block') - kh = tvm.reduce_axis((0, kernel_h), name='kh') - kw = tvm.reduce_axis((0, kernel_w), name='kw') + icc = te.reduce_axis((0, ic_chunk), name='ic_chunk') + icb = te.reduce_axis((0, ic_block), name='ic_block') + kh = te.reduce_axis((0, kernel_h), name='kh') + kw = te.reduce_axis((0, kernel_w), name='kw') - conv = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: - tvm.sum(pad_data[n, icc, oh*stride_h+kh*dilation_h, \ - ow*stride_w+kw*dilation_w, icb] - .astype('int32') * - packed_kernel[oc_chunk, icc, - kh, kw, oc_block, icb] - .astype('int32'), - axis=[icc, kh, kw, icb])) + conv = te.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: + te.sum(pad_data[n, icc, oh*stride_h+kh*dilation_h, \ + ow*stride_w+kw*dilation_w, icb] + .astype('int32') * + packed_kernel[oc_chunk, icc, + kh, kw, oc_block, icb] + .astype('int32'), + axis=[icc, kh, kw, icb])) - output = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: - conv[n, oc_chunk, oh, ow, oc_block].astype(out_dtype), - tag="conv2d_NCHWc_int8") + output = te.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: + conv[n, oc_chunk, oh, ow, oc_block].astype(out_dtype), + tag="conv2d_NCHWc_int8") # num flop num_flop = batch * oc_chunk * oc_block * out_height * out_width * \ @@ -156,8 +157,8 @@ _dp4a = dp4a('shared', 'shared', 'local') @autotvm.register_topi_schedule("conv2d_NCHWc_int8.cuda") def schedule_conv2d_NCHWc_int8(cfg, outs): """Schedule conv2d int8 NCHWc template""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'conv2d_NCHWc_int8': @@ -171,7 +172,7 @@ def _schedule_conv2d_NCHWc_int8(cfg, s, output): conv = output.op.input_tensors[0] packed_data, packed_kernel = conv.op.input_tensors - if isinstance(packed_data.op, tvm.tensor.ComputeOp) and "pad" in packed_data.op.tag: + if isinstance(packed_data.op, tvm.te.ComputeOp) and "pad" in packed_data.op.tag: pad_data = packed_data packed_data = pad_data.op.input_tensors[0] else: @@ -183,8 +184,8 @@ def _schedule_conv2d_NCHWc_int8(cfg, s, output): s[packed_data].pragma(s[packed_data].op.axis[0], "debug_skip_region") s[packed_kernel].pragma(s[packed_kernel].op.axis[0], "debug_skip_region") else: - if isinstance(packed_kernel.op, tvm.tensor.ComputeOp) and\ - packed_kernel.name == 'packed_kernel': + if isinstance(packed_kernel.op, tvm.te.ComputeOp) and\ + packed_kernel.name == 'packed_kernel': # data and kernel are not pre-computed, schedule layout transform here schedule_injective_from_existing(s, packed_data) schedule_injective_from_existing(s, packed_kernel) @@ -219,20 +220,20 @@ def _schedule_conv2d_NCHWc_int8(cfg, s, output): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) s[output].reorder(bn, bf, by, bx, vn, vf, vy, vx, tn, tf, ty, tx, ni, fi, yi, xi) - s[output].bind(bn, tvm.thread_axis("blockIdx.z")) - s[output].bind(bf, tvm.thread_axis("blockIdx.y")) - s[output].bind(s[output].fuse(by, bx), tvm.thread_axis("blockIdx.x")) - s[output].bind(vn, tvm.thread_axis("vthread")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) + s[output].bind(bn, te.thread_axis("blockIdx.z")) + s[output].bind(bf, te.thread_axis("blockIdx.y")) + s[output].bind(s[output].fuse(by, bx), te.thread_axis("blockIdx.x")) + s[output].bind(vn, te.thread_axis("vthread")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) cfg.define_knob("fuse_yx", [0, 1]) # fuse ty,tx or tn,tf if cfg["fuse_yx"].val: - s[output].bind(tn, tvm.thread_axis("threadIdx.z")) - s[output].bind(tf, tvm.thread_axis("threadIdx.y")) + s[output].bind(tn, te.thread_axis("threadIdx.z")) + s[output].bind(tf, te.thread_axis("threadIdx.y")) tyx = s[output].fuse(ty, tx) - s[output].bind(tyx, tvm.thread_axis("threadIdx.x")) + s[output].bind(tyx, te.thread_axis("threadIdx.x")) s[conv].compute_at(s[output], tyx) # number of threads @@ -240,9 +241,9 @@ def _schedule_conv2d_NCHWc_int8(cfg, s, output): n_ty = cfg["tile_f"].size[2] n_tx = cfg["tile_y"].size[2] * cfg["tile_x"].size[2] else: - s[output].bind(s[output].fuse(tn, tf), tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(s[output].fuse(tn, tf), te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[conv].compute_at(s[output], tx) # number of threads @@ -285,9 +286,9 @@ def _schedule_conv2d_NCHWc_int8(cfg, s, output): fused, tx = s[load].split(fused, factor=n_tx) fused, ty = s[load].split(fused, factor=n_ty) fused, tz = s[load].split(fused, factor=n_tz) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # double buffer cfg.define_knob('AA_double_buffer', [0, 1]) diff --git a/topi/python/topi/cuda/conv2d_transpose_nchw.py b/topi/python/topi/cuda/conv2d_transpose_nchw.py index 8751800..17bd37d 100644 --- a/topi/python/topi/cuda/conv2d_transpose_nchw.py +++ b/topi/python/topi/cuda/conv2d_transpose_nchw.py @@ -18,6 +18,7 @@ """Conv2d transpose template for cuda backend""" import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity from .. import nn @@ -32,9 +33,9 @@ def conv2d_transpose_nchw(cfg, data, kernel, stride, padding, out_dtype): ---------- cfg: ConfigEntity The config for this template - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [in_channel, num_filter, filter_height, filter_width] strides : tuple of two ints The spatial stride along height and width @@ -45,7 +46,7 @@ def conv2d_transpose_nchw(cfg, data, kernel, stride, padding, out_dtype): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ batch, inp_channels, inp_height, inp_width = get_const_tuple(data.shape) @@ -56,42 +57,42 @@ def conv2d_transpose_nchw(cfg, data, kernel, stride, padding, out_dtype): padding, (kernel_height, kernel_width)) out_width = (inp_width - 1) * stride_width + \ - kernel_width - pad_left - pad_right + kernel_width - pad_left - pad_right pad_left = kernel_width - 1 - pad_left pad_right = kernel_width - 1 - pad_right dilated_width = stride_width * (inp_width - 1) + 1 out_height = (inp_height - 1) * stride_height + \ - kernel_height - pad_top - pad_bottom + kernel_height - pad_top - pad_bottom pad_top = kernel_height - 1 - pad_top pad_bottom = kernel_height - 1 - pad_bottom dilated_height = stride_height * (inp_height - 1) + 1 # compute pad - data = tvm.compute( + data = te.compute( (batch, inp_channels, pad_top + dilated_height + pad_bottom, pad_left + dilated_width + pad_right), - lambda n, c, y, x: tvm.if_then_else( - tvm.all(x >= pad_left, - x < pad_left + dilated_width, - tvm.indexmod(x - pad_left, stride_width).equal(0), - y >= pad_top, - y < pad_top + dilated_height, - tvm.indexmod(y - pad_top, stride_height).equal(0)), + lambda n, c, y, x: tvm.tir.if_then_else( + tvm.tir.all(x >= pad_left, + x < pad_left + dilated_width, + tvm.tir.indexmod(x - pad_left, stride_width).equal(0), + y >= pad_top, + y < pad_top + dilated_height, + tvm.tir.indexmod(y - pad_top, stride_height).equal(0)), data[n, c, - tvm.indexdiv(y - pad_top, stride_height), - tvm.indexdiv(x - pad_left, stride_width)], - tvm.const(0., "float32")), + tvm.tir.indexdiv(y - pad_top, stride_height), + tvm.tir.indexdiv(x - pad_left, stride_width)], + tvm.tir.const(0., "float32")), name='data_pad') # compute transposed conv - dc = tvm.reduce_axis((0, inp_channels), name='dc') - dh = tvm.reduce_axis((0, kernel_height), name='dh') - dw = tvm.reduce_axis((0, kernel_width), name='dw') - data_out = tvm.compute( + dc = te.reduce_axis((0, inp_channels), name='dc') + dh = te.reduce_axis((0, kernel_height), name='dh') + dw = te.reduce_axis((0, kernel_width), name='dw') + data_out = te.compute( (batch, out_channels, out_height, out_width), - lambda b, c, h, w: tvm.sum( + lambda b, c, h, w: te.sum( data[b, dc, h + dh, w + dw].astype(out_dtype) * kernel[dc, c, @@ -119,8 +120,8 @@ def schedule_conv2d_transpose_nchw(cfg, outs): s: Schedule The computation schedule for conv2d transpose. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _fallback_schedule(N, F, Y, X): # pylint: disable=unused-argument @@ -181,7 +182,7 @@ def schedule_conv2d_transpose_nchw(cfg, outs): ##### space definition end ##### - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -206,21 +207,21 @@ def schedule_conv2d_transpose_nchw(cfg, outs): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) s[output].reorder(bn, bf, by, bx, vn, vf, vy, vx, tn, tf, ty, tx, ni, fi, yi, xi) - s[output].bind(bn, tvm.thread_axis("blockIdx.z")) - s[output].bind(bf, tvm.thread_axis("blockIdx.y")) - s[output].bind(s[output].fuse(by, bx), tvm.thread_axis("blockIdx.x")) - s[output].bind(vn, tvm.thread_axis("vthread")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) + s[output].bind(bn, te.thread_axis("blockIdx.z")) + s[output].bind(bf, te.thread_axis("blockIdx.y")) + s[output].bind(s[output].fuse(by, bx), te.thread_axis("blockIdx.x")) + s[output].bind(vn, te.thread_axis("vthread")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) cfg.define_knob("fuse_yx", [0, 1]) # fuse ty,tx or tn,tf if cfg["fuse_yx"].val: - s[output].bind(tn, tvm.thread_axis("threadIdx.z")) - s[output].bind(tf, tvm.thread_axis("threadIdx.y")) + s[output].bind(tn, te.thread_axis("threadIdx.z")) + s[output].bind(tf, te.thread_axis("threadIdx.y")) tyx = s[output].fuse(ty, tx) - s[output].bind(s[output].fuse(ty, tx), tvm.thread_axis("threadIdx.x")) + s[output].bind(s[output].fuse(ty, tx), te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tyx) # number of threads @@ -228,9 +229,9 @@ def schedule_conv2d_transpose_nchw(cfg, outs): n_ty = cfg["tile_f"].size[2] n_tx = cfg["tile_y"].size[2] * cfg["tile_x"].size[2] else: - s[output].bind(s[output].fuse(tn, tf), tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(s[output].fuse(tn, tf), te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tx) # number of threads @@ -254,9 +255,9 @@ def schedule_conv2d_transpose_nchw(cfg, outs): tz, fused = s[load].split(fused, nparts=n_tz) ty, fused = s[load].split(fused, nparts=n_ty) tx, fused = s[load].split(fused, nparts=n_tx) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) s[output].pragma(kernel_scope, 'unroll_explicit', cfg['unroll_explicit'].val) diff --git a/topi/python/topi/cuda/conv2d_winograd.py b/topi/python/topi/cuda/conv2d_winograd.py index 6e09be9..881f63a 100644 --- a/topi/python/topi/cuda/conv2d_winograd.py +++ b/topi/python/topi/cuda/conv2d_winograd.py @@ -19,6 +19,7 @@ import logging import tvm +from tvm import te from tvm import autotvm from .. import nn @@ -75,50 +76,50 @@ def winograd_cuda(cfg, data, kernel, strides, padding, dilation, out_dtype, # transform kernel if not pre_computed: - r_kh = tvm.reduce_axis((0, KH), name='r_kh') - r_kw = tvm.reduce_axis((0, KW), name='r_kw') - kernel_pack = tvm.compute((alpha, alpha, CI, CO), lambda eps, nu, ci, co: - tvm.sum(kernel[co][ci][r_kh][r_kw] * - G[eps][r_kh] * G[nu][r_kw], - axis=[r_kh, r_kw]), name='kernel_pack') + r_kh = te.reduce_axis((0, KH), name='r_kh') + r_kw = te.reduce_axis((0, KW), name='r_kw') + kernel_pack = te.compute((alpha, alpha, CI, CO), lambda eps, nu, ci, co: + te.sum(kernel[co][ci][r_kh][r_kw] * + G[eps][r_kh] * G[nu][r_kw], + axis=[r_kh, r_kw]), name='kernel_pack') else: kernel_pack = kernel - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod # pack input tile - input_tile = tvm.compute((CI, P, alpha, alpha), lambda c, p, eps, nu: - data_pad[idxdiv(p, (nH * nW))][c][idxmod(idxdiv(p, nW), nH) * m + eps] - [idxmod(p, nW) * m + nu], name='d') + input_tile = te.compute((CI, P, alpha, alpha), lambda c, p, eps, nu: + data_pad[idxdiv(p, (nH * nW))][c][idxmod(idxdiv(p, nW), nH) * m + eps] + [idxmod(p, nW) * m + nu], name='d') # transform data - r_a = tvm.reduce_axis((0, alpha), 'r_a') - r_b = tvm.reduce_axis((0, alpha), 'r_a') - data_pack = tvm.compute((alpha, alpha, CI, P), lambda eps, nu, ci, p: - tvm.sum(input_tile[ci][p][r_a][r_b] * B[r_a][eps] * B[r_b][nu], - axis=[r_a, r_b]), name='data_pack') + r_a = te.reduce_axis((0, alpha), 'r_a') + r_b = te.reduce_axis((0, alpha), 'r_a') + data_pack = te.compute((alpha, alpha, CI, P), lambda eps, nu, ci, p: + te.sum(input_tile[ci][p][r_a][r_b] * B[r_a][eps] * B[r_b][nu], + axis=[r_a, r_b]), name='data_pack') # do batch gemm - ci = tvm.reduce_axis((0, CI), name='ci') - bgemm = tvm.compute((alpha, alpha, CO, P), lambda eps, nu, co, p: - tvm.sum(kernel_pack[eps][nu][ci][co] * - data_pack[eps][nu][ci][p], - axis=[ci]), name='bgemm') + ci = te.reduce_axis((0, CI), name='ci') + bgemm = te.compute((alpha, alpha, CO, P), lambda eps, nu, co, p: + te.sum(kernel_pack[eps][nu][ci][co] * + data_pack[eps][nu][ci][p], + axis=[ci]), name='bgemm') # inverse transform - r_a = tvm.reduce_axis((0, alpha), 'r_a') - r_b = tvm.reduce_axis((0, alpha), 'r_a') - inverse = tvm.compute((CO, P, m, m), lambda co, p, vh, vw: - tvm.sum(bgemm[r_a][r_b][co][p] * A[r_a][vh] * A[r_b][vw], - axis=[r_a, r_b]), name='inverse') + r_a = te.reduce_axis((0, alpha), 'r_a') + r_b = te.reduce_axis((0, alpha), 'r_a') + inverse = te.compute((CO, P, m, m), lambda co, p, vh, vw: + te.sum(bgemm[r_a][r_b][co][p] * A[r_a][vh] * A[r_b][vw], + axis=[r_a, r_b]), name='inverse') # output - output = tvm.compute((N, CO, H, W), lambda n, co, h, w: - inverse[co, - n * nH * nW + idxdiv(h, m) * nW + idxdiv(w, m), - idxmod(h, m), - idxmod(w, m)], - name='output', tag='conv2d_nchw_winograd') + output = te.compute((N, CO, H, W), lambda n, co, h, w: + inverse[co, + n * nH * nW + idxdiv(h, m) * nW + idxdiv(w, m), + idxmod(h, m), + idxmod(w, m)], + name='output', tag='conv2d_nchw_winograd') cfg.add_flop(2 * N * CO * H * W * CI * KH * KW) return output @@ -147,8 +148,8 @@ def schedule_winograd_cuda(cfg, s, output, pre_computed): fused = s[data_pack].fuse(c, p) bb, tt = s[data_pack].split(fused, 128) s[data_pack].reorder(bb, tt, pi, eps, nu) - s[data_pack].bind(bb, tvm.thread_axis("blockIdx.x")) - s[data_pack].bind(tt, tvm.thread_axis("threadIdx.x")) + s[data_pack].bind(bb, te.thread_axis("blockIdx.x")) + s[data_pack].bind(tt, te.thread_axis("threadIdx.x")) s[data_l].compute_at(s[data_pack], pi) s[input_tile].compute_at(s[data_pack], pi) @@ -172,12 +173,12 @@ def schedule_winograd_cuda(cfg, s, output, pre_computed): fused = s[kernel_pack].fuse(ci, co) bb, tt = s[kernel_pack].split(fused, 128) s[kernel_pack].reorder(bb, tt, eps, nu, r_a, r_b) - s[kernel_pack].bind(bb, tvm.thread_axis("blockIdx.x")) - s[kernel_pack].bind(tt, tvm.thread_axis("threadIdx.x")) + s[kernel_pack].bind(bb, te.thread_axis("blockIdx.x")) + s[kernel_pack].bind(tt, te.thread_axis("threadIdx.x")) else: kernel = kernel_pack - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() ##### space definition begin ##### @@ -213,15 +214,15 @@ def schedule_winograd_cuda(cfg, s, output, pre_computed): bz, vz, tz, zi = cfg["tile_b"].apply(s, C, b) by, vy, ty, yi = cfg["tile_y"].apply(s, C, y) bx, vx, tx, xi = cfg["tile_x"].apply(s, C, x) - s[C].bind(bz, tvm.thread_axis("blockIdx.z")) - s[C].bind(by, tvm.thread_axis("blockIdx.y")) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(vz, tvm.thread_axis("vthread")) - s[C].bind(vy, tvm.thread_axis("vthread")) - s[C].bind(vx, tvm.thread_axis("vthread")) - s[C].bind(tz, tvm.thread_axis("threadIdx.z")) - s[C].bind(ty, tvm.thread_axis("threadIdx.y")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bz, te.thread_axis("blockIdx.z")) + s[C].bind(by, te.thread_axis("blockIdx.y")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(vz, te.thread_axis("vthread")) + s[C].bind(vy, te.thread_axis("vthread")) + s[C].bind(vx, te.thread_axis("vthread")) + s[C].bind(tz, te.thread_axis("threadIdx.z")) + s[C].bind(ty, te.thread_axis("threadIdx.y")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) s[C].reorder(bgemm_scope, bz, by, bx, vz, vy, vx, tz, ty, tx, zi, yi, xi) # tile reduction axes @@ -241,9 +242,9 @@ def schedule_winograd_cuda(cfg, s, output, pre_computed): fused, tx = s[load].split(fused, cfg["tile_x"].size[2]) fused, ty = s[load].split(fused, cfg["tile_y"].size[2]) fused, tz = s[load].split(fused, cfg["tile_b"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) s[C].pragma(bgemm_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) s[C].pragma(bgemm_scope, 'unroll_explicit', cfg['unroll_explicit'].val) @@ -264,8 +265,8 @@ def schedule_winograd_cuda(cfg, s, output, pre_computed): fused = s[output].fuse(n, co, ho, wo) bb, tt = s[output].split(fused, 128) - s[output].bind(bb, tvm.thread_axis("blockIdx.x")) - s[output].bind(tt, tvm.thread_axis("threadIdx.x")) + s[output].bind(bb, te.thread_axis("blockIdx.x")) + s[output].bind(tt, te.thread_axis("threadIdx.x")) if OL is not None: s[OL].compute_at(s[output], tt) @@ -286,7 +287,7 @@ def conv2d_nchw_winograd(cfg, data, kernel, strides, padding, dilation, out_dtyp @autotvm.register_topi_schedule("conv2d_nchw_winograd.cuda") def schedule_conv2d_nchw_winograd(cfg, outs): - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'conv2d_nchw_winograd' in op.tag: @@ -306,7 +307,7 @@ def conv2d_nchw_winograd_without_weight_transform(cfg, data, kernel, strides, @autotvm.register_topi_schedule("conv2d_nchw_winograd_without_weight_transform.cuda") def schedule_conv2d_nchw_winograd_without_weight_transform(cfg, outs): """TOPI schedule callback""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'conv2d_nchw_winograd' in op.tag: diff --git a/topi/python/topi/cuda/conv3d.py b/topi/python/topi/cuda/conv3d.py index 0a6a71c..cc13aa5 100644 --- a/topi/python/topi/cuda/conv3d.py +++ b/topi/python/topi/cuda/conv3d.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-argument """Compute definition for conv3d with cuda backend""" -import tvm +from tvm import te from tvm import autotvm from tvm.contrib import cudnn @@ -34,10 +34,10 @@ def conv3d_ncdhw(cfg, data, kernel, strides, padding, dilation, out_dtype='float cfg: ConfigEntity The config for this template - data : tvm.Tensor + data : tvm.te.Tensor 5-D with shape [batch, in_channel, in_depth, in_height, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width] strides : int or a list/tuple of three ints @@ -54,7 +54,7 @@ def conv3d_ncdhw(cfg, data, kernel, strides, padding, dilation, out_dtype='float Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, out_channel, out_depth, out_height, out_width] """ return nn.conv3d_ncdhw(data, kernel, strides, padding, dilation, out_dtype) @@ -78,8 +78,8 @@ def schedule_conv3d_ncdhw(cfg, outs): s: Schedule The computation schedule for conv2d. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'conv3d_ncdhw': @@ -96,10 +96,10 @@ def conv3d_ndhwc(cfg, data, kernel, strides, padding, dilation, out_dtype='float Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 5-D with shape [batch, in_depth, in_height, in_width, in_channel] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 5-D with shape [filter_depth, filter_height, filter_width, in_channel, num_filter] stride : int or a list/tuple of three ints @@ -113,7 +113,7 @@ def conv3d_ndhwc(cfg, data, kernel, strides, padding, dilation, out_dtype='float Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 5-D with shape [batch, out_depth, out_height, out_width, out_channel] """ return nn.conv3d_ndhwc(data, kernel, strides, padding, dilation, out_dtype) @@ -137,8 +137,8 @@ def schedule_conv3d_ndhwc(cfg, outs): s: Schedule The computation schedule for conv2d. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'conv3d_ndhwc': @@ -159,10 +159,10 @@ def conv3d_cudnn(cfg, data, kernel, strides, padding, dilation, layout='NCDHW', cfg: ConfigEntity The config for this template - data : tvm.Tensor + data : tvm.te.Tensor 5-D with shape [batch, in_channel, in_depth, in_height, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width] strides : int or a list/tuple of three ints @@ -182,7 +182,7 @@ def conv3d_cudnn(cfg, data, kernel, strides, padding, dilation, layout='NCDHW', Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, out_channel, out_depth, out_height, out_width] """ if layout == 'NCDHW': diff --git a/topi/python/topi/cuda/conv3d_direct.py b/topi/python/topi/cuda/conv3d_direct.py index fa6c878..50b73d6 100644 --- a/topi/python/topi/cuda/conv3d_direct.py +++ b/topi/python/topi/cuda/conv3d_direct.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name """The templates for cuda conv3d operators""" import tvm +from tvm import te from tvm import autotvm from ..util import get_const_tuple @@ -57,7 +58,7 @@ def schedule_direct_conv3d_cuda(cfg, s, conv, layout, workload_name): pad_data, kernel = s[conv].op.input_tensors s[pad_data].compute_inline() - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -84,16 +85,16 @@ def schedule_direct_conv3d_cuda(cfg, s, conv, layout, workload_name): bf = s[output].fuse(n, bf) s[output].reorder(bf, bd, by, bx, vf, vd, vy, vx, tf, td, ty, tx, fi, di, yi, xi) - s[output].bind(bf, tvm.thread_axis("blockIdx.z")) - s[output].bind(s[output].fuse(bd, by), tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vd, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(s[output].fuse(td, tf), tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bf, te.thread_axis("blockIdx.z")) + s[output].bind(s[output].fuse(bd, by), te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vd, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(s[output].fuse(td, tf), te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tx) # tile reduction axes @@ -116,9 +117,9 @@ def schedule_direct_conv3d_cuda(cfg, s, conv, layout, workload_name): td, fused = s[load].split(fused, nparts=cfg["tile_d"].size[2]) ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2]) tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(s[load].fuse(td, ty), tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(s[load].fuse(td, ty), te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # unroll s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) diff --git a/topi/python/topi/cuda/deformable_conv2d.py b/topi/python/topi/cuda/deformable_conv2d.py index bdec4e1..8c31835 100644 --- a/topi/python/topi/cuda/deformable_conv2d.py +++ b/topi/python/topi/cuda/deformable_conv2d.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name,unused-argument """Schedule template of deformable conv2d with cuda backend""" import tvm +from tvm import te from tvm import autotvm from .. import nn from ..util import traverse_inline @@ -46,8 +47,8 @@ def schedule_deformable_conv2d_nchw(cfg, outs): s: Schedule The computation schedule for conv2d. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'deformable_conv2d_nchw': @@ -78,7 +79,7 @@ def _schedule_direct_cuda(cfg, s, conv): data_deform, kernel = s[conv].op.input_tensors s[data_deform].compute_inline() - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -102,15 +103,15 @@ def _schedule_direct_cuda(cfg, s, conv): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) bf = s[output].fuse(n, bf) - s[output].bind(bf, tvm.thread_axis("blockIdx.z")) - s[output].bind(by, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(tf, tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bf, te.thread_axis("blockIdx.z")) + s[output].bind(by, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(tf, te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[output].reorder(bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi) s[OL].compute_at(s[output], tx) @@ -135,9 +136,9 @@ def _schedule_direct_cuda(cfg, s, conv): tz, fused = s[load].split(fused, nparts=cfg["tile_f"].size[2]) ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2]) tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # unroll s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) diff --git a/topi/python/topi/cuda/dense.py b/topi/python/topi/cuda/dense.py index 93797a4..f5b6563 100644 --- a/topi/python/topi/cuda/dense.py +++ b/topi/python/topi/cuda/dense.py @@ -16,9 +16,8 @@ # under the License. # pylint: disable=invalid-name, unused-argument """Schedule for dense operator""" -from __future__ import absolute_import as _abs import logging -import tvm +from tvm import te import tvm.autotvm as autotvm from tvm.autotvm.task.space import SplitEntity from tvm.contrib import cublas @@ -45,9 +44,9 @@ def dense_cublas(cfg, data, weight, bias=None, out_dtype=None): matmul = cublas.matmul(data, weight, False, True) cfg.add_flop(batch * in_dim * out_dim * 2) if bias is not None: - matmul = tvm.compute((batch, out_dim), - lambda i, j: matmul[i, j] + bias[j], - tag=tag.BROADCAST) + matmul = te.compute((batch, out_dim), + lambda i, j: matmul[i, j] + bias[j], + tag=tag.BROADCAST) return matmul @@ -66,8 +65,8 @@ def dense_small_batch(cfg, data, weight, bias=None, out_dtype=None): @autotvm.register_topi_schedule("dense_small_batch.cuda") def schedule_dense_small_batch(cfg, outs): """Schedule float32/64 dense with small batch size""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'dense': @@ -91,11 +90,11 @@ def _schedule_dense_small_batch(cfg, s, C): else: Out = s.outputs[0].output(0) s[C].compute_at(s[Out], s[Out].op.axis[1]) - s[Out].bind(s[Out].op.axis[0], tvm.thread_axis("blockIdx.y")) - s[Out].bind(s[Out].op.axis[1], tvm.thread_axis("blockIdx.x")) + s[Out].bind(s[Out].op.axis[0], te.thread_axis("blockIdx.y")) + s[Out].bind(s[Out].op.axis[1], te.thread_axis("blockIdx.x")) tx = s[C].op.reduce_axis[0] - thread_x = tvm.thread_axis("threadIdx.x") + thread_x = te.thread_axis("threadIdx.x") s[C].bind(tx, thread_x) s[CF].compute_at(s[C], tx) s[C].set_store_predicate(thread_x.var.equal(0)) @@ -111,8 +110,8 @@ def dense_large_batch(cfg, data, weight, bias=None, out_dtype=None): @autotvm.register_topi_schedule("dense_large_batch.cuda") def schedule_dense_large_batch(cfg, outs): """Schedule float32/64 dense with large batch size""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'dense': @@ -185,12 +184,12 @@ def _schedule_dense_large_batch(cfg, s, C): s[CC].compute_at(s[C], tx) # Binding - s[C].bind(by, tvm.thread_axis("blockIdx.y")) - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tyz, tvm.thread_axis("vthread")) - s[C].bind(txz, tvm.thread_axis("vthread")) - s[C].bind(ty, tvm.thread_axis("threadIdx.y")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(by, te.thread_axis("blockIdx.y")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tyz, te.thread_axis("vthread")) + s[C].bind(txz, te.thread_axis("vthread")) + s[C].bind(ty, te.thread_axis("threadIdx.y")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) # Split reduction yo, xo = CC.op.axis @@ -207,8 +206,8 @@ def _schedule_dense_large_batch(cfg, s, C): ty, _ = s[AA].split(s[AA].op.axis[0], nparts=num_thread_x) _, xi = s[AA].split(s[AA].op.axis[1], factor=num_thread_x * 4) tx, xi = s[AA].split(xi, nparts=num_thread_x) - s[AA].bind(ty, tvm.thread_axis("threadIdx.y")) - s[AA].bind(tx, tvm.thread_axis("threadIdx.x")) + s[AA].bind(ty, te.thread_axis("threadIdx.y")) + s[AA].bind(tx, te.thread_axis("threadIdx.x")) s[AA].double_buffer() # Schedule for B' shared memory load @@ -216,8 +215,8 @@ def _schedule_dense_large_batch(cfg, s, C): ty, _ = s[BB].split(s[BB].op.axis[0], nparts=num_thread_y) _, xi = s[BB].split(s[BB].op.axis[1], factor=num_thread_y * 4) tx, xi = s[BB].split(xi, nparts=num_thread_y) - s[BB].bind(ty, tvm.thread_axis("threadIdx.y")) - s[BB].bind(tx, tvm.thread_axis("threadIdx.x")) + s[BB].bind(ty, te.thread_axis("threadIdx.y")) + s[BB].bind(tx, te.thread_axis("threadIdx.x")) s[BB].double_buffer() @@ -229,19 +228,19 @@ def dense_int8(cfg, data, weight, bias=None, out_dtype=None): batch, in_dim = get_const_tuple(data.shape) out_dim, _ = get_const_tuple(weight.shape) - k = tvm.reduce_axis((0, in_dim), name='k') + k = te.reduce_axis((0, in_dim), name='k') - matmul = tvm.compute((batch, out_dim), - lambda i, j: tvm.sum(data[i, k].astype(out_dtype) * - weight[j, k].astype(out_dtype), axis=[k]), - tag="dense_int8") + matmul = te.compute((batch, out_dim), + lambda i, j: te.sum(data[i, k].astype(out_dtype) * + weight[j, k].astype(out_dtype), axis=[k]), + tag="dense_int8") cfg.add_flop(batch * in_dim * out_dim * 2) if bias is not None: - matmul = tvm.compute((batch, out_dim), - lambda i, j: matmul[i, j] + bias[j].astype(out_dtype), - tag=tag.BROADCAST) + matmul = te.compute((batch, out_dim), + lambda i, j: matmul[i, j] + bias[j].astype(out_dtype), + tag=tag.BROADCAST) cfg.add_flop(batch * out_dim) return matmul @@ -250,8 +249,8 @@ def dense_int8(cfg, data, weight, bias=None, out_dtype=None): @autotvm.register_topi_schedule("dense_int8.cuda") def schedule_dense_int8(cfg, outs): """Dense schedule for int8 on CUDA""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if "dense_int8" in op.tag: @@ -302,12 +301,12 @@ def _schedule_dense_int8(cfg, s, output): bx, vx, tx, xi = cfg['tile_x'].apply(s, output, x) s[output].reorder(by, bx, vy, vx, ty, tx, yi, xi) - s[output].bind(by, tvm.thread_axis('blockIdx.y')) - s[output].bind(bx, tvm.thread_axis('blockIdx.x')) - s[output].bind(vy, tvm.thread_axis('vthread')) - s[output].bind(vx, tvm.thread_axis('vthread')) - s[output].bind(ty, tvm.thread_axis('threadIdx.y')) - s[output].bind(tx, tvm.thread_axis('threadIdx.x')) + s[output].bind(by, te.thread_axis('blockIdx.y')) + s[output].bind(bx, te.thread_axis('blockIdx.x')) + s[output].bind(vy, te.thread_axis('vthread')) + s[output].bind(vx, te.thread_axis('vthread')) + s[output].bind(ty, te.thread_axis('threadIdx.y')) + s[output].bind(tx, te.thread_axis('threadIdx.x')) n_ty = cfg['tile_y'].size[2] n_tx = cfg['tile_x'].size[2] @@ -325,8 +324,8 @@ def _schedule_dense_int8(cfg, s, output): fused, tx = s[load].split(fused, factor=n_tx) fused, ty = s[load].split(fused, factor=n_ty) - s[load].bind(tx, tvm.thread_axis('threadIdx.x')) - s[load].bind(ty, tvm.thread_axis('threadIdx.y')) + s[load].bind(tx, te.thread_axis('threadIdx.x')) + s[load].bind(ty, te.thread_axis('threadIdx.y')) s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) s[output].pragma(kernel_scope, 'unroll_explicit', False) diff --git a/topi/python/topi/cuda/depthwise_conv2d.py b/topi/python/topi/cuda/depthwise_conv2d.py index 062f95f..db9da84 100644 --- a/topi/python/topi/cuda/depthwise_conv2d.py +++ b/topi/python/topi/cuda/depthwise_conv2d.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, unused-argument """Schedule for depthwise_conv2d with auto fusion""" import tvm +from tvm import te from tvm import autotvm from ..util import traverse_inline from .. import tag @@ -43,8 +44,8 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): s: Schedule The computation schedule for depthwise_conv2d nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'depthwise_conv2d_nchw': @@ -75,7 +76,7 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): ##### space definition end ##### s[pad_data].compute_inline() - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -100,15 +101,15 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): kernel_scope, n = s[output].split(n, nparts=1) bf = s[output].fuse(n, bf) - s[output].bind(bf, tvm.thread_axis("blockIdx.z")) - s[output].bind(by, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(tf, tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bf, te.thread_axis("blockIdx.z")) + s[output].bind(by, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(tf, te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[output].reorder(bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi) s[OL].compute_at(s[output], tx) @@ -123,9 +124,9 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): fused, tx = s[load].split(fused, cfg["tile_x"].size[2]) fused, ty = s[load].split(fused, cfg["tile_y"].size[2]) fused, tz = s[load].split(fused, cfg["tile_f"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) s[output].pragma(kernel_scope, 'unroll_explicit', cfg['unroll_explicit'].val) @@ -147,8 +148,8 @@ def schedule_depthwise_conv2d_nhwc(outs): s: Schedule The computation schedule for depthwise_conv2d nhwc. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(temp, Filter, DepthwiseConv2d): s[temp].compute_inline() @@ -160,13 +161,13 @@ def schedule_depthwise_conv2d_nhwc(outs): Output = outs[0].op.output(0) s[DepthwiseConv2d].set_scope("local") - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis("threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis("threadIdx.x") b, h, w, c = s[Output].op.axis # num_thread here could be 728, it is larger than cuda.max_num_threads - num_thread = tvm.ir_pass.Simplify(temp.shape[3]).value + num_thread = tvm.tir.ir_pass.Simplify(temp.shape[3]).value target = tvm.target.Target.current() if target and (target.target_name not in ["cuda", "nvptx"]): num_thread = target.max_num_threads @@ -199,13 +200,13 @@ def schedule_depthwise_conv2d_nhwc(outs): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule depthwise_conv2d if OP.tag == 'depthwise_conv2d_nhwc': PaddedInput = OP.input_tensors[0] Filter = OP.input_tensors[1] - if isinstance(Filter.op, tvm.tensor.ComputeOp) and 'dilate' in Filter.op.tag: + if isinstance(Filter.op, tvm.te.ComputeOp) and 'dilate' in Filter.op.tag: s[Filter].compute_inline() DepthwiseConv2d = OP.output(0) _schedule(PaddedInput, Filter, DepthwiseConv2d) @@ -231,14 +232,14 @@ def schedule_depthwise_conv2d_backward_input_nhwc(outs): The computation schedule for depthwise_conv2d backward wrt input with layout nhwc. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(Padded_out_grad, In_grad): s[Padded_out_grad].compute_inline() - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis("threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis("threadIdx.x") _, h, w, c = In_grad.op.axis fused_hwc = s[In_grad].fuse(h, w, c) @@ -276,13 +277,13 @@ def schedule_depthwise_conv2d_backward_weight_nhwc(outs): The computation schedule for depthwise_conv2d backward wrt weight with layout nhwc. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(Weight_grad): - block_x = tvm.thread_axis("blockIdx.x") - thread_y = tvm.thread_axis("threadIdx.y") - thread_x = tvm.thread_axis("threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_y = te.thread_axis("threadIdx.y") + thread_x = te.thread_axis("threadIdx.x") db, dh, dw = Weight_grad.op.reduce_axis diff --git a/topi/python/topi/cuda/group_conv2d_nchw.py b/topi/python/topi/cuda/group_conv2d_nchw.py index 5abf298..c5cf72b 100644 --- a/topi/python/topi/cuda/group_conv2d_nchw.py +++ b/topi/python/topi/cuda/group_conv2d_nchw.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name """The template for cuda group_conv2d_nchw""" import tvm +from tvm import te from tvm import autotvm from .injective import schedule_injective_from_existing @@ -51,8 +52,8 @@ def schedule_group_conv2d_nchw(cfg, outs): s: Schedule The computation schedule for group conv2d. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == "group_conv2d_nchw": @@ -115,21 +116,21 @@ def _schedule_group_conv2d_nchw_direct(cfg, s, conv): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) s[output].reorder(bn, bg, bf, by, bx, vn, vg, vf, vy, vx, tn, tf, ty, tx, ni, fi, yi, xi) - s[output].bind(bn, tvm.thread_axis("blockIdx.z")) - s[output].bind(s[output].fuse(bg, bf), tvm.thread_axis("blockIdx.y")) - s[output].bind(s[output].fuse(by, bx), tvm.thread_axis("blockIdx.x")) - s[output].bind(vn, tvm.thread_axis("vthread")) - s[output].bind(vg, tvm.thread_axis("vthread")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) + s[output].bind(bn, te.thread_axis("blockIdx.z")) + s[output].bind(s[output].fuse(bg, bf), te.thread_axis("blockIdx.y")) + s[output].bind(s[output].fuse(by, bx), te.thread_axis("blockIdx.x")) + s[output].bind(vn, te.thread_axis("vthread")) + s[output].bind(vg, te.thread_axis("vthread")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) cfg.define_knob("fuse_yx", [0, 1]) # fuse ty,tx or tn,tf if cfg["fuse_yx"].val: - s[output].bind(tn, tvm.thread_axis("threadIdx.z")) - s[output].bind(tf, tvm.thread_axis("threadIdx.y")) + s[output].bind(tn, te.thread_axis("threadIdx.z")) + s[output].bind(tf, te.thread_axis("threadIdx.y")) tyx = s[output].fuse(ty, tx) - s[output].bind(tyx, tvm.thread_axis("threadIdx.x")) + s[output].bind(tyx, te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tyx) # number of threads @@ -137,9 +138,9 @@ def _schedule_group_conv2d_nchw_direct(cfg, s, conv): n_ty = cfg["tile_f"].size[2] n_tx = cfg["tile_y"].size[2] * cfg["tile_x"].size[2] else: - s[output].bind(s[output].fuse(tn, tf), tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(s[output].fuse(tn, tf), te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[OL].compute_at(s[output], tx) # number of threads @@ -165,9 +166,9 @@ def _schedule_group_conv2d_nchw_direct(cfg, s, conv): fused, tx = s[load].split(fused, factor=n_tx) fused, ty = s[load].split(fused, factor=n_ty) fused, tz = s[load].split(fused, factor=n_tz) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # unroll s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) @@ -185,11 +186,11 @@ def group_conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, groups Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] or 5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 4-D with shape [num_filter, in_channel // groups, filter_height, filter_width] or 6-D with shape [num_filter_chunk, in_channel_chunk // groups, filter_height, filter_width, num_filter_block, in_channel_block] @@ -211,7 +212,7 @@ def group_conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, groups Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 5-D with shape [batch, out_channel, out_height, out_width, out_channel_block] """ ic_block_factor = 4 @@ -230,11 +231,11 @@ def group_conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, groups assert out_channels % oc_block_factor == 0, \ "Number of output channels per group must divide {}".format(oc_block_factor) - packed_data = tvm.compute((batch, channels // ic_block_factor, height, width, - ic_block_factor), - lambda n, c, h, w, vc: data[n, c*ic_block_factor + vc, h, w], - name="packed_data") - packed_kernel = tvm.compute( + packed_data = te.compute((batch, channels // ic_block_factor, height, width, + ic_block_factor), + lambda n, c, h, w, vc: data[n, c*ic_block_factor + vc, h, w], + name="packed_data") + packed_kernel = te.compute( (out_channels // oc_block_factor, in_channels // ic_block_factor, kernel_h, kernel_w, oc_block_factor, ic_block_factor), lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block: @@ -286,10 +287,10 @@ def group_conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, groups oshape = (batch, oc_chunk, out_height, out_width, oc_block) - icc = tvm.reduce_axis((0, ic_chunk // groups), name='ic_chunk') - icb = tvm.reduce_axis((0, ic_block_factor), name='ic_block') - kh = tvm.reduce_axis((0, kernel_h), name='kh') - kw = tvm.reduce_axis((0, kernel_w), name='kw') + icc = te.reduce_axis((0, ic_chunk // groups), name='ic_chunk') + icb = te.reduce_axis((0, ic_block_factor), name='ic_block') + kh = te.reduce_axis((0, kernel_h), name='kh') + kw = te.reduce_axis((0, kernel_w), name='kw') # NOTE(kumasento): explanation of this snippet - # oc_chunk//groups and ic_chunk//groups give you the number of blocks, @@ -302,20 +303,20 @@ def group_conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, groups # # Compared with a normal convolution, group convolution only sums # input channels from the group that an output channel resides in. - conv = tvm.compute( + conv = te.compute( oshape, lambda n, occ, oh, ow, ocb: - tvm.sum(pad_data[n, occ//(oc_chunk//groups)*(ic_chunk//groups)+icc, - oh*stride_h+kh*dilation_h, ow*stride_w+kw*dilation_w, icb] - .astype('int32') * - packed_kernel[occ, icc, kh, kw, ocb, icb].astype('int32'), - axis=[icc, kh, kw, icb])) + te.sum(pad_data[n, occ//(oc_chunk//groups)*(ic_chunk//groups)+icc, + oh*stride_h+kh*dilation_h, ow*stride_w+kw*dilation_w, icb] + .astype('int32') * + packed_kernel[occ, icc, kh, kw, ocb, icb].astype('int32'), + axis=[icc, kh, kw, icb])) # Type conversion - output = tvm.compute(oshape, lambda *index: conv(*index).astype(out_dtype), - tag='group_conv2d_NCHWc_int8') + output = te.compute(oshape, lambda *index: conv(*index).astype(out_dtype), + tag='group_conv2d_NCHWc_int8') num_flop = batch * oc_chunk * oc_block * out_height * out_width * \ - ic_chunk * ic_block * kernel_h * kernel_w * 2 // groups + ic_chunk * ic_block * kernel_h * kernel_w * 2 // groups cfg.add_flop(num_flop) return output @@ -339,8 +340,8 @@ def schedule_group_conv2d_NCHWc_int8(cfg, outs): s: Schedule The computation schedule for group conv2d. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == "group_conv2d_NCHWc_int8": @@ -361,7 +362,7 @@ def _schedule_group_conv2d_NCHWc_int8(cfg, s, output): conv = output.op.input_tensors[0] packed_data, packed_kernel = conv.op.input_tensors - if isinstance(packed_data.op, tvm.tensor.ComputeOp) and "pad" in packed_data.op.tag: + if isinstance(packed_data.op, tvm.te.ComputeOp) and "pad" in packed_data.op.tag: pad_data = packed_data packed_data = pad_data.op.input_tensors[0] else: @@ -374,7 +375,7 @@ def _schedule_group_conv2d_NCHWc_int8(cfg, s, output): s[packed_kernel].pragma( s[packed_kernel].op.axis[0], "debug_skip_region") else: - if isinstance(packed_kernel.op, tvm.tensor.ComputeOp) and \ + if isinstance(packed_kernel.op, tvm.te.ComputeOp) and \ packed_kernel.name == 'packed_kernel': # data and kernel are not pre-computed, schedule layout transform here schedule_injective_from_existing(s, packed_data) @@ -407,7 +408,7 @@ def _schedule_group_conv2d_NCHWc_int8(cfg, s, output): kernel_scope, n = s[output].split(n, nparts=1) g, f = s[output].split(f, nparts=groups) - s[output].bind(n, tvm.thread_axis('blockIdx.z')) + s[output].bind(n, te.thread_axis('blockIdx.z')) bn, vn, tn, ni = cfg["tile_n"].apply(s, output, n) bg, vg = cfg["tile_g"].apply(s, output, g) bf, vf, tf, fi = cfg["tile_f"].apply(s, output, f) @@ -416,20 +417,20 @@ def _schedule_group_conv2d_NCHWc_int8(cfg, s, output): s[output].reorder(bn, bg, bf, by, bx, vn, vg, vf, vy, vx, tn, tf, ty, tx, ni, fi, yi, xi) - s[output].bind(bn, tvm.thread_axis("blockIdx.z")) - s[output].bind(s[output].fuse(bg, bf), tvm.thread_axis("blockIdx.y")) - s[output].bind(s[output].fuse(by, bx), tvm.thread_axis("blockIdx.x")) - s[output].bind(vn, tvm.thread_axis("vthread")) - s[output].bind(vg, tvm.thread_axis("vthread")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) + s[output].bind(bn, te.thread_axis("blockIdx.z")) + s[output].bind(s[output].fuse(bg, bf), te.thread_axis("blockIdx.y")) + s[output].bind(s[output].fuse(by, bx), te.thread_axis("blockIdx.x")) + s[output].bind(vn, te.thread_axis("vthread")) + s[output].bind(vg, te.thread_axis("vthread")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) cfg.define_knob("fuse_yx", [0, 1]) # fuse ty,tx or tn,tf if cfg["fuse_yx"].val: - s[output].bind(tn, tvm.thread_axis("threadIdx.z")) - s[output].bind(tf, tvm.thread_axis("threadIdx.y")) + s[output].bind(tn, te.thread_axis("threadIdx.z")) + s[output].bind(tf, te.thread_axis("threadIdx.y")) tyx = s[output].fuse(ty, tx) - s[output].bind(tyx, tvm.thread_axis("threadIdx.x")) + s[output].bind(tyx, te.thread_axis("threadIdx.x")) s[conv].compute_at(s[output], tyx) # number of threads @@ -437,10 +438,10 @@ def _schedule_group_conv2d_NCHWc_int8(cfg, s, output): n_ty = cfg["tile_f"].size[2] n_tx = cfg["tile_y"].size[2] * cfg["tile_x"].size[2] else: - s[output].bind(tn, tvm.thread_axis("threadIdx.z")) - s[output].bind(s[output].fuse(tn, tf), tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(tn, te.thread_axis("threadIdx.z")) + s[output].bind(s[output].fuse(tn, tf), te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[conv].compute_at(s[output], tx) # number of threads @@ -476,9 +477,9 @@ def _schedule_group_conv2d_NCHWc_int8(cfg, s, output): fused, tx = s[load].split(fused, factor=n_tx) fused, ty = s[load].split(fused, factor=n_ty) fused, tz = s[load].split(fused, factor=n_tz) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # double buffer cfg.define_knob('AA_double_buffer', [0, 1]) diff --git a/topi/python/topi/cuda/injective.py b/topi/python/topi/cuda/injective.py index 1690407..303fe5f 100644 --- a/topi/python/topi/cuda/injective.py +++ b/topi/python/topi/cuda/injective.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, unused-variable, """Schedule for composition of injective operator""" import tvm +from tvm import te from .. import util def schedule_injective_from_existing(sch, out): @@ -56,12 +57,12 @@ def schedule_injective_from_existing(sch, out): xo, xi = sch[out].split(fused, factor=num_thread * max_block) bx, tx = sch[out].split(xi, factor=num_thread) sch[out].reorder(bx, tx, xo) - sch[out].bind(bx, tvm.thread_axis("blockIdx.x")) - sch[out].bind(tx, tvm.thread_axis("threadIdx.x")) + sch[out].bind(bx, te.thread_axis("blockIdx.x")) + sch[out].bind(tx, te.thread_axis("threadIdx.x")) else: bx, tx = sch[out].split(fused, factor=num_thread) - sch[out].bind(tx, tvm.thread_axis("threadIdx.x")) - sch[out].bind(bx, tvm.thread_axis("blockIdx.x")) + sch[out].bind(tx, te.thread_axis("threadIdx.x")) + sch[out].bind(bx, te.thread_axis("blockIdx.x")) return sch @@ -79,10 +80,10 @@ def schedule_injective(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + tvm.te.schedule.AutoInlineInjective(s) for out in outs: if not util.is_empty_shape(out.shape): schedule_injective_from_existing(s, out) diff --git a/topi/python/topi/cuda/nms.py b/topi/python/topi/cuda/nms.py index 27a5272..e008dcd 100644 --- a/topi/python/topi/cuda/nms.py +++ b/topi/python/topi/cuda/nms.py @@ -19,20 +19,20 @@ """Non-maximum suppression operator""" import math import tvm +from tvm import te -from tvm import api -from tvm.intrin import if_then_else +from tvm.tir import if_then_else from .sort import argsort from .. import tag def cuda_atomic_add_rule(op): if op.dtype == "float32": - return tvm.call_pure_extern("float32", "atomicAdd", op.args[0], op.args[1]) + return tvm.tir.call_pure_extern("float32", "atomicAdd", op.args[0], op.args[1]) if op.dtype == "float64": - return tvm.call_pure_extern("float64", "atomicAdd", op.args[0], op.args[1]) + return tvm.tir.call_pure_extern("float64", "atomicAdd", op.args[0], op.args[1]) if op.dtype == "int32": - return tvm.call_pure_extern("int32", "atomicAdd", op.args[0], op.args[1]) + return tvm.tir.call_pure_extern("int32", "atomicAdd", op.args[0], op.args[1]) raise RuntimeError("only support int32, float32 and float64") @@ -41,7 +41,7 @@ tvm.target.intrin.register_intrin_rule( def atomic_add(x, y): - return tvm.call_pure_intrin(y.dtype, "atomic_add", x, y) + return tvm.tir.call_pure_intrin(y.dtype, "atomic_add", x, y) def get_valid_counts_ir(data, valid_count, flag, score_threshold, id_index, score_index): @@ -78,7 +78,7 @@ def get_valid_counts_ir(data, valid_count, flag, score_threshold, id_index, scor num_anchors = data.shape[1] elem_length = data.shape[2] - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() data = ib.buffer_ptr(data) @@ -86,22 +86,22 @@ def get_valid_counts_ir(data, valid_count, flag, score_threshold, id_index, scor flag = ib.buffer_ptr(flag) atomic_add_return = ib.allocate( valid_count.dtype, (1,), name='atomic_add_return', scope='local') - one_count = tvm.const(1, dtype=valid_count.dtype) - score_threshold = tvm.make.node( + one_count = tvm.tir.const(1, dtype=valid_count.dtype) + score_threshold = tvm.ir.make_node( "FloatImm", dtype="float32", value=score_threshold) - id_index = tvm.make.node("IntImm", dtype="int32", value=id_index) - score_index = tvm.make.node("IntImm", dtype="int32", value=score_index) + id_index = tvm.ir.make_node("IntImm", dtype="int32", value=id_index) + score_index = tvm.ir.make_node("IntImm", dtype="int32", value=score_index) max_threads = int(tvm.target.Target.current( allow_none=False).max_num_threads) nthread_tx = max_threads nthread_bx = batch_size * num_anchors // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) tid = bx * max_threads + tx - idxd = tvm.indexdiv + idxd = tvm.tir.indexdiv # initialize valid_count with ib.if_scope(tid < batch_size): @@ -111,11 +111,12 @@ def get_valid_counts_ir(data, valid_count, flag, score_threshold, id_index, scor flag[tid] = 0 with ib.if_scope(tid < batch_size * num_anchors): i = idxd(tid, num_anchors) - with ib.if_scope(tvm.all(data[tid * elem_length + score_index] > score_threshold, - tvm.any(id_index < 0, data[tid * elem_length + id_index] >= 0))): + with ib.if_scope( + tvm.tir.all(data[tid * elem_length + score_index] > score_threshold, + tvm.tir.any(id_index < 0, data[tid * elem_length + id_index] >= 0))): flag[tid] = 1 - atomic_add_return[0] = atomic_add(tvm.call_pure_intrin("handle", "tvm_address_of", - valid_count[i]), one_count) + atomic_add_return[0] = atomic_add(tvm.tir.call_pure_intrin("handle", "tvm_address_of", + valid_count[i]), one_count) return ib.get() @@ -140,7 +141,7 @@ def flag_scan(flag, prefix_sum): batch_size = flag.shape[0] num_anchors = flag.shape[1] - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() flag = ib.buffer_ptr(flag) prefix_sum = ib.buffer_ptr(prefix_sum) @@ -149,13 +150,13 @@ def flag_scan(flag, prefix_sum): allow_none=False).max_num_threads) nthread_tx = max_threads nthread_bx = batch_size * num_anchors // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) tid = bx * max_threads + tx - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod # initialize prefix_sum with ib.if_scope(tid < batch_size * num_anchors): @@ -200,9 +201,9 @@ def out_rewrite(data, flag, prefix_sum, valid_count, out): num_anchors = out.shape[1] elem_length = out.shape[2] - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() - one = tvm.const(1, dtype=out.dtype) + one = tvm.tir.const(1, dtype=out.dtype) data = ib.buffer_ptr(data) flag = ib.buffer_ptr(flag) valid_count = ib.buffer_ptr(valid_count) @@ -213,20 +214,20 @@ def out_rewrite(data, flag, prefix_sum, valid_count, out): allow_none=False).max_num_threads) nthread_tx = max_threads nthread_bx = batch_size * num_anchors // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) tid = bx * max_threads + tx - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod with ib.if_scope(tid < batch_size * num_anchors): i = idxd(tid, num_anchors) j = idxm(tid, num_anchors) base_idx = i * num_anchors * elem_length - with ib.if_scope(tvm.all(flag[tid] > 0, prefix_sum[tid] >= 0, - prefix_sum[tid] < num_anchors)): + with ib.if_scope(tvm.tir.all(flag[tid] > 0, prefix_sum[tid] >= 0, + prefix_sum[tid] < num_anchors)): with ib.for_range(0, elem_length) as k: out[base_idx + prefix_sum[tid] * elem_length + k] = data[tid * elem_length + k] @@ -243,7 +244,7 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor Input data. 3-D tensor with shape [batch_size, num_anchors, elem_length]. score_threshold : optional, float @@ -257,28 +258,28 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): Returns ------- - valid_count : tvm.Tensor + valid_count : tvm.te.Tensor 1-D tensor for valid number of boxes. - out_tensor : tvm.Tensor + out_tensor : tvm.te.Tensor Rearranged data tensor. """ batch_size = data.shape[0] num_anchors = data.shape[1] - data_buf = api.decl_buffer( + data_buf = tvm.tir.decl_buffer( data.shape, data.dtype, "data_buf", data_alignment=8) - valid_count_buf = api.decl_buffer( + valid_count_buf = tvm.tir.decl_buffer( (batch_size,), "int32", "valid_count_buf", data_alignment=8) - temp_flag_buf = api.decl_buffer( + temp_flag_buf = tvm.tir.decl_buffer( (batch_size, num_anchors,), "int32", "temp_flag", data_alignment=8) - temp_partial_buf = api.decl_buffer( + temp_partial_buf = tvm.tir.decl_buffer( (batch_size, num_anchors), "int32", "temp_partial", data_alignment=8) - out_buf = api.decl_buffer( + out_buf = tvm.tir.decl_buffer( data.shape, data.dtype, "out_buf", data_alignment=8) valid_count, temp_flag = \ - tvm.extern([(batch_size,), (batch_size, num_anchors)], [data], - lambda ins, outs: get_valid_counts_ir( + te.extern([(batch_size,), (batch_size, num_anchors)], [data], + lambda ins, outs: get_valid_counts_ir( ins[0], outs[0], outs[1], score_threshold, id_index, score_index), dtype=["int32", "int32"], in_buffers=[data_buf], @@ -287,8 +288,8 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): tag="get_valid_counts_gpu") temp_partial = \ - tvm.extern([(batch_size, num_anchors)], [temp_flag], - lambda ins, outs: flag_scan( + te.extern([(batch_size, num_anchors)], [temp_flag], + lambda ins, outs: flag_scan( ins[0], outs[0]), dtype=["int32"], in_buffers=[temp_flag_buf], @@ -296,8 +297,8 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): name="flag_scan") out = \ - tvm.extern([data.shape], [data, temp_flag, temp_partial, valid_count], - lambda ins, outs: out_rewrite( + te.extern([data.shape], [data, temp_flag, temp_partial, valid_count], + lambda ins, outs: out_rewrite( ins[0], ins[1], ins[2], ins[3], outs[0]), dtype=[data.dtype], in_buffers=[data_buf, temp_flag_buf, @@ -357,22 +358,22 @@ def nms_ir(data, sorted_index, valid_count, out, box_indices, def calculate_overlap(out_tensor, box_a_idx, box_b_idx): """Calculate overlap of two boxes. """ - w = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) - - tvm.max(out_tensor[box_a_idx], out_tensor[box_b_idx])) - h = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) - - tvm.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1])) + w = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) + - tvm.te.max(out_tensor[box_a_idx], out_tensor[box_b_idx])) + h = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) + - tvm.te.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1])) i = w * h u = (out_tensor[box_a_idx + 2] - out_tensor[box_a_idx]) * \ (out_tensor[box_a_idx + 3] - out_tensor[box_a_idx + 1]) + \ (out_tensor[box_b_idx + 2] - out_tensor[box_b_idx]) * \ (out_tensor[box_b_idx + 3] - out_tensor[box_b_idx + 1]) - i - return tvm.expr.Select(u <= 0.0, 0.0, i / u) + return tvm.tir.Select(u <= 0.0, 0.0, i / u) batch_size = data.shape[0] num_anchors = data.shape[1] box_data_length = data.shape[2] - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() data = ib.buffer_ptr(data) sorted_index = ib.buffer_ptr(sorted_index) @@ -386,27 +387,27 @@ def nms_ir(data, sorted_index, valid_count, out, box_indices, tvm.target.Target.current(allow_none=False).max_num_threads) nthread_tx = max_threads nthread_bx = num_anchors // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) j = bx * max_threads + tx - iou_threshold = tvm.make.node( + iou_threshold = tvm.ir.make_node( "FloatImm", dtype="float32", value=iou_threshold) - top_k = tvm.make.node("IntImm", dtype="int32", value=top_k) - coord_start = tvm.make.node("IntImm", dtype="int32", value=coord_start) - id_index = tvm.make.node("IntImm", dtype="int32", value=id_index) - score_index = tvm.make.node("IntImm", dtype="int32", value=score_index) - force_suppress = tvm.make.node( + top_k = tvm.ir.make_node("IntImm", dtype="int32", value=top_k) + coord_start = tvm.ir.make_node("IntImm", dtype="int32", value=coord_start) + id_index = tvm.ir.make_node("IntImm", dtype="int32", value=id_index) + score_index = tvm.ir.make_node("IntImm", dtype="int32", value=score_index) + force_suppress = tvm.ir.make_node( "IntImm", dtype="int32", value=1 if force_suppress else 0) with ib.for_range(0, batch_size, for_type="unroll") as i: base_idx = i * num_anchors * box_data_length - with ib.if_scope(tvm.all(iou_threshold > 0, valid_count[i] > 0)): + with ib.if_scope(tvm.tir.all(iou_threshold > 0, valid_count[i] > 0)): # Reorder output nkeep = if_then_else( - tvm.all(top_k > 0, top_k < valid_count[i]), + tvm.tir.all(top_k > 0, top_k < valid_count[i]), top_k, valid_count[i]) with ib.if_scope(j < nkeep): with ib.for_range(0, box_data_length) as k: @@ -415,7 +416,7 @@ def nms_ir(data, sorted_index, valid_count, out, box_indices, * box_data_length + k)] box_indices[i * num_anchors + j] = sorted_index[i * num_anchors + j] - with ib.if_scope(tvm.all(top_k > 0, top_k < valid_count[i])): + with ib.if_scope(tvm.tir.all(top_k > 0, top_k < valid_count[i])): with ib.if_scope(j < valid_count[i] - nkeep): with ib.for_range(0, box_data_length) as k: out[(base_idx + (j + nkeep) * box_data_length + k)] = -1.0 @@ -423,19 +424,21 @@ def nms_ir(data, sorted_index, valid_count, out, box_indices, # Apply nms with ib.for_range(0, valid_count[i]) as k: offset_k = k * box_data_length - with ib.if_scope(tvm.all(out[base_idx + offset_k + score_index] > 0, - tvm.any(id_index < 0, out[base_idx + - offset_k + id_index] >= 0))): + with ib.if_scope( + tvm.tir.all(out[base_idx + offset_k + score_index] > 0, + tvm.tir.any(id_index < 0, out[base_idx + + offset_k + id_index] >= 0))): with ib.if_scope(j < valid_count[i]): offset_j = j * box_data_length - with ib.if_scope(tvm.all(j > k, - out[base_idx + offset_j + - score_index] > 0, - tvm.any(id_index < 0, - out[base_idx + offset_j + id_index] >= 0), - tvm.any(force_suppress > 0, id_index < 0, - out[base_idx + offset_k + id_index] == - out[base_idx + offset_j + id_index]))): + with ib.if_scope( + tvm.tir.all(j > k, + out[base_idx + offset_j + + score_index] > 0, + tvm.tir.any(id_index < 0, + out[base_idx + offset_j + id_index] >= 0), + tvm.tir.any(force_suppress > 0, id_index < 0, + out[base_idx + offset_k + id_index] == + out[base_idx + offset_j + id_index]))): iou = calculate_overlap(out, base_idx + offset_j + coord_start, base_idx + offset_k + coord_start) with ib.if_scope(iou >= iou_threshold): @@ -495,7 +498,7 @@ def invalid_to_bottom_pre(data, flag, idx): num_anchors = data.shape[1] elem_length = data.shape[2] - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() data = ib.buffer_ptr(data) flag = ib.buffer_ptr(flag) @@ -505,8 +508,8 @@ def invalid_to_bottom_pre(data, flag, idx): tvm.target.Target.current(allow_none=False).max_num_threads)) nthread_tx = max_threads nthread_bx = num_anchors // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) j = bx * max_threads + tx @@ -554,7 +557,7 @@ def invalid_to_bottom_ir(data, flag, idx, out): num_anchors = data.shape[1] elem_length = data.shape[2] - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() data = ib.buffer_ptr(data) flag = ib.buffer_ptr(flag) @@ -565,8 +568,8 @@ def invalid_to_bottom_ir(data, flag, idx, out): tvm.target.Target.current(allow_none=False).max_num_threads)) nthread_tx = max_threads nthread_bx = num_anchors // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) j = bx * max_threads + tx @@ -591,12 +594,12 @@ def non_max_suppression(data, valid_count, max_output_size=-1, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 3-D tensor with shape [batch_size, num_anchors, elem_length]. The last dimension should be in format of [class_id, score, box_left, box_top, box_right, box_bottom]. - valid_count : tvm.Tensor + valid_count : tvm.te.Tensor 1-D tensor for valid number of boxes. max_output_size : optional, int @@ -629,7 +632,7 @@ def non_max_suppression(data, valid_count, max_output_size=-1, Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 3-D tensor with shape [batch_size, num_anchors, elem_length]. Example @@ -638,8 +641,8 @@ def non_max_suppression(data, valid_count, max_output_size=-1, # An example to use nms dshape = (1, 5, 6) - data = tvm.placeholder(dshape, name="data") - valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count") + data = te.placeholder(dshape, name="data") + valid_count = te.placeholder((dshape[0],), dtype="int32", name="valid_count") iou_threshold = 0.7 force_suppress = True top_k = -1 @@ -659,63 +662,63 @@ def non_max_suppression(data, valid_count, max_output_size=-1, num_anchors = data.shape[1] valid_count_dtype = "int32" - valid_count_buf = api.decl_buffer(valid_count.shape, valid_count_dtype, - "valid_count_buf", data_alignment=4) + valid_count_buf = tvm.tir.decl_buffer(valid_count.shape, valid_count_dtype, + "valid_count_buf", data_alignment=4) score_axis = score_index score_shape = (batch_size, num_anchors) - score_tensor = tvm.compute( + score_tensor = te.compute( score_shape, lambda i, j: data[i, j, score_axis], tag=tag.ELEMWISE) sort_tensor = argsort( score_tensor, valid_count=valid_count, axis=1, is_ascend=False) - sort_tensor_buf = api.decl_buffer(sort_tensor.shape, sort_tensor.dtype, - "sort_tensor_buf", data_alignment=8) + sort_tensor_buf = tvm.tir.decl_buffer(sort_tensor.shape, sort_tensor.dtype, + "sort_tensor_buf", data_alignment=8) - data_buf = api.decl_buffer( + data_buf = tvm.tir.decl_buffer( data.shape, data.dtype, "data_buf", data_alignment=8) - out_buf = api.decl_buffer( + out_buf = tvm.tir.decl_buffer( data.shape, data.dtype, "out_buf", data_alignment=8) out, box_indices = \ - tvm.extern([data.shape, score_shape], - [data, sort_tensor, valid_count], - lambda ins, outs: nms_ir( - ins[0], ins[1], ins[2], outs[0], outs[1], - max_output_size, iou_threshold, force_suppress, - top_k, coord_start, id_index, score_index), - dtype=[data.dtype, "int32"], - in_buffers=[data_buf, sort_tensor_buf, valid_count_buf], - name="nms", - tag="nms") + te.extern([data.shape, score_shape], + [data, sort_tensor, valid_count], + lambda ins, outs: nms_ir( + ins[0], ins[1], ins[2], outs[0], outs[1], + max_output_size, iou_threshold, force_suppress, + top_k, coord_start, id_index, score_index), + dtype=[data.dtype, "int32"], + in_buffers=[data_buf, sort_tensor_buf, valid_count_buf], + name="nms", + tag="nms") if return_indices: return box_indices if invalid_to_bottom: - output_buf = api.decl_buffer( + output_buf = tvm.tir.decl_buffer( data.shape, data.dtype, "output_buf", data_alignment=8) - temp_flag_buf = api.decl_buffer( + temp_flag_buf = tvm.tir.decl_buffer( score_shape, valid_count_dtype, "temp_flag", data_alignment=8) - temp_idx_buf = api.decl_buffer( + temp_idx_buf = tvm.tir.decl_buffer( score_shape, valid_count_dtype, "temp_idx", data_alignment=8) - temp_flag, temp_idx = tvm.extern([score_shape, score_shape], [out], - lambda ins, outs: invalid_to_bottom_pre( - ins[0], outs[0], outs[1]), - dtype=["int32", "int32"], - in_buffers=[out_buf], - out_buffers=[ - temp_flag_buf, temp_idx_buf], - name="invalid_to_bottom_phase_one") - - output = tvm.extern([data.shape], [out, temp_flag, temp_idx], - lambda ins, outs: invalid_to_bottom_ir( - ins[0], ins[1], ins[2], outs[0]), - dtype=[data.dtype], - in_buffers=[out_buf, temp_flag_buf, temp_idx_buf], - out_buffers=[output_buf], - name="invalid_to_bottom", - tag="invalid_to_bottom") + temp_flag, temp_idx = te.extern([score_shape, score_shape], [out], + lambda ins, outs: invalid_to_bottom_pre( + ins[0], outs[0], outs[1]), + dtype=["int32", "int32"], + in_buffers=[out_buf], + out_buffers=[ + temp_flag_buf, temp_idx_buf], + name="invalid_to_bottom_phase_one") + + output = te.extern([data.shape], [out, temp_flag, temp_idx], + lambda ins, outs: invalid_to_bottom_ir( + ins[0], ins[1], ins[2], outs[0]), + dtype=[data.dtype], + in_buffers=[out_buf, temp_flag_buf, temp_idx_buf], + out_buffers=[output_buf], + name="invalid_to_bottom", + tag="invalid_to_bottom") return output return out diff --git a/topi/python/topi/cuda/pooling.py b/topi/python/topi/cuda/pooling.py index 2bebd39..26c18ee 100644 --- a/topi/python/topi/cuda/pooling.py +++ b/topi/python/topi/cuda/pooling.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, unused-variable, unused-argument """Schedule for pooling operators""" import tvm +from tvm import te from .. import tag from ..util import traverse_inline @@ -35,15 +36,15 @@ def schedule_adaptive_pool(outs): s: Schedule The computation schedule for adaptive_pool. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(Pool): num_thread = 8 - block_x = tvm.thread_axis("blockIdx.x") - block_y = tvm.thread_axis("blockIdx.y") - thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") - thread_y = tvm.thread_axis((0, num_thread), "threadIdx.y") + block_x = te.thread_axis("blockIdx.x") + block_y = te.thread_axis("blockIdx.y") + thread_x = te.thread_axis((0, num_thread), "threadIdx.x") + thread_y = te.thread_axis((0, num_thread), "threadIdx.y") if Pool.op in s.outputs: Out = Pool OL = s.cache_write(Pool, "local") @@ -71,7 +72,7 @@ def schedule_adaptive_pool(outs): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule global_pool elif OP.tag.startswith('adaptive_pool'): @@ -103,10 +104,10 @@ def schedule_pool(outs, layout): s: Schedule The computation schedule for pool. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(PaddedInput, Pool): - if isinstance(PaddedInput.op, tvm.tensor.ComputeOp): + if isinstance(PaddedInput.op, tvm.te.ComputeOp): s[PaddedInput].compute_inline() num_thread = tvm.target.Target.current(allow_none=False).max_num_threads if Pool.op in s.outputs: @@ -117,8 +118,8 @@ def schedule_pool(outs, layout): s[Pool].set_scope("local") fused = s[Out].fuse(*s[Out].op.axis) bx, tx = s[Out].split(fused, factor=num_thread) - s[Out].bind(bx, tvm.thread_axis("blockIdx.x")) - s[Out].bind(tx, tvm.thread_axis("threadIdx.x")) + s[Out].bind(bx, te.thread_axis("blockIdx.x")) + s[Out].bind(tx, te.thread_axis("threadIdx.x")) if Pool.op in s.outputs: s[OL].compute_at(s[Out], tx) else: @@ -133,7 +134,7 @@ def schedule_pool(outs, layout): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule pool elif OP.tag.startswith('pool'): @@ -163,8 +164,8 @@ def schedule_pool_grad(outs): s: Schedule The computation schedule for pool_grad. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule_pool_grad(op): if op in s.outputs: @@ -174,15 +175,15 @@ def schedule_pool_grad(outs): fused = s[out].fuse(*s[out].op.axis) num_thread = tvm.target.Target.current(allow_none=False).max_num_threads bx, tx = s[out].split(fused, factor=num_thread) - s[out].bind(bx, tvm.thread_axis("blockIdx.x")) - s[out].bind(tx, tvm.thread_axis("threadIdx.x")) + s[out].bind(bx, te.thread_axis("blockIdx.x")) + s[out].bind(tx, te.thread_axis("threadIdx.x")) if tag.COMM_REDUCE_IDX in op.input_tensors[0].op.tag: max_pool_index = op.input_tensors[0] s[max_pool_index].compute_at(s[out], tx) pool_input = max_pool_index.op.input_tensors[0] - if isinstance(pool_input.op, tvm.tensor.ComputeOp): + if isinstance(pool_input.op, tvm.te.ComputeOp): # handle padding s[pool_input].compute_inline() if op not in s.outputs: diff --git a/topi/python/topi/cuda/rcnn/proposal.py b/topi/python/topi/cuda/rcnn/proposal.py index 489c354..3546448 100644 --- a/topi/python/topi/cuda/rcnn/proposal.py +++ b/topi/python/topi/cuda/rcnn/proposal.py @@ -18,6 +18,7 @@ """Proposal operator""" import math import tvm +from tvm import te from ...vision.rcnn import generate_anchor, reg_bbox, reg_iou from ...util import get_const_tuple, get_const_int @@ -28,16 +29,16 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r Parameters ---------- - cls_prob_buf : tvm.schedule.Buffer + cls_prob_buf : tvm.te.schedule.Buffer 4-D with shape [batch, 2 * num_anchors, height, width] - bbox_pred_buf : tvm.schedule.Buffer + bbox_pred_buf : tvm.te.schedule.Buffer 4-D with shape [batch, 4 * num_anchors, height, width] - im_info_buf : tvm.schedule.Buffer + im_info_buf : tvm.te.schedule.Buffer 2-D with shape [batch, 3] - out_buf : tvm.schedule.Buffer + out_buf : tvm.te.schedule.Buffer 3-D with shape [batch, num_bbox, 5] The last dimension is in format of [w_start, h_start, w_end, h_end, score] @@ -67,10 +68,10 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads) nthread_tx = max_threads nthread_bx = (batch * height * width) // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") tid = bx * max_threads + tx - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) @@ -79,8 +80,8 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r p_im_info = ib.buffer_ptr(im_info_buf) p_out = ib.buffer_ptr(out_buf) - idxm = tvm.indexmod - idxd = tvm.indexdiv + idxm = tvm.tir.indexmod + idxd = tvm.tir.indexdiv with ib.if_scope(tid < batch * height * width): w = idxm(tid, width) @@ -104,10 +105,10 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r regression_func = reg_iou if iou_loss else reg_bbox pred_x1, pred_y1, pred_x2, pred_y2 = regression_func(x1, y1, x2, y2, *delta) - pred_x1 = tvm.max(tvm.min(pred_x1, im_width - 1.0), 0.0) - pred_y1 = tvm.max(tvm.min(pred_y1, im_height - 1.0), 0.0) - pred_x2 = tvm.max(tvm.min(pred_x2, im_width - 1.0), 0.0) - pred_y2 = tvm.max(tvm.min(pred_y2, im_height - 1.0), 0.0) + pred_x1 = tvm.te.max(tvm.te.min(pred_x1, im_width - 1.0), 0.0) + pred_y1 = tvm.te.max(tvm.te.min(pred_y1, im_height - 1.0), 0.0) + pred_x2 = tvm.te.max(tvm.te.min(pred_x2, im_width - 1.0), 0.0) + pred_y2 = tvm.te.max(tvm.te.min(pred_y2, im_height - 1.0), 0.0) real_height = (im_height / feature_stride).astype('int32') real_width = (im_width / feature_stride).astype('int32') @@ -117,15 +118,15 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r min_size = p_im_info[b * 3 + 2] * rpn_min_size pred_score = p_score[((b * num_anchors * 2 + num_anchors + k) * height + h) * width + w] - pred_score = tvm.expr.Select(tvm.any(h >= real_height, w >= real_width), - -1.0, pred_score) + pred_score = tvm.tir.Select(tvm.tir.any(h >= real_height, w >= real_width), + -1.0, pred_score) p_out[out_index * 5 + 0] = pred_x1 p_out[out_index * 5 + 1] = pred_y1 p_out[out_index * 5 + 2] = pred_x2 p_out[out_index * 5 + 3] = pred_y2 p_out[out_index * 5 + 4] = pred_score - with ib.if_scope(tvm.any(bbox_w < min_size, bbox_h < min_size)): + with ib.if_scope(tvm.tir.any(bbox_w < min_size, bbox_h < min_size)): p_out[out_index * 5 + 0] -= min_size / 2.0 p_out[out_index * 5 + 1] -= min_size / 2.0 p_out[out_index * 5 + 2] += min_size / 2.0 @@ -140,10 +141,10 @@ def argsort_ir(data_buf, out_index_buf): Parameters ---------- - data_buf : tvm.schedule.Buffer + data_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox] - out_index_buf : tvm.schedule.Buffer + out_index_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox]. Indices of data in sorted order. Returns @@ -153,20 +154,20 @@ def argsort_ir(data_buf, out_index_buf): """ batch, num_bbox = get_const_tuple(data_buf.shape) max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads) - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() p_data = ib.buffer_ptr(data_buf) index_out = ib.buffer_ptr(out_index_buf) nthread_tx = max_threads nthread_bx = (num_bbox + 1) // 2 // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("vthread") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("vthread") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "virtual_thread", nthread_bx) tid = bx * nthread_tx + tx temp_data = ib.allocate("float32", (1,), name="temp_data", scope="local") temp_index = ib.allocate("int32", (1,), name="temp_index", scope="local") - idxm = tvm.indexmod + idxm = tvm.tir.indexmod with ib.for_range(0, batch, for_type="unroll") as b: start = b * num_bbox @@ -177,16 +178,16 @@ def argsort_ir(data_buf, out_index_buf): with ib.for_range(0, num_bbox) as k: offset = start + 2 * tid + idxm(k, 2) with ib.if_scope( - tvm.all(offset + 1 < num_bbox, p_data[offset] < p_data[offset + 1])): + tvm.tir.all(offset + 1 < num_bbox, p_data[offset] < p_data[offset + 1])): temp_data[0] = p_data[offset] p_data[offset] = p_data[offset + 1] p_data[offset + 1] = temp_data[0] temp_index[0] = index_out[offset] index_out[offset] = index_out[offset + 1] index_out[offset + 1] = temp_index[0] - ib.emit(tvm.make.Call(None, 'tvm_storage_sync', - tvm.convert(['shared']), - tvm.expr.Call.Intrinsic, None, 0)) + ib.emit(tvm.tir.Call(None, 'tvm_storage_sync', + tvm.runtime.convert(['shared']), + tvm.tir.Call.Intrinsic, None, 0)) return ib.get() @@ -195,11 +196,11 @@ def nms_ir(sorted_bbox_buf, out_buf, nms_threshold): Parameters ---------- - sorted_bbox_buf : tvm.schedule.Buffer + sorted_bbox_buf : tvm.te.schedule.Buffer 3-D with shape [batch, num_bbox, 5]. The last dimension is in format of [w_start, h_start, w_end, h_end, score]. - out_buf : tvm.schedule.Buffer + out_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox]. Boolean mask of whether a bounding box should be removed. nms_threshold : float @@ -213,10 +214,10 @@ def nms_ir(sorted_bbox_buf, out_buf, nms_threshold): def calculate_overlap(out_tensor, box_a_idx, box_b_idx): """Calculate overlap of two boxes. """ - w = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) - - tvm.max(out_tensor[box_a_idx], out_tensor[box_b_idx]) + 1.0) - h = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) - - tvm.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]) + 1.0) + w = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) + - tvm.te.max(out_tensor[box_a_idx], out_tensor[box_b_idx]) + 1.0) + h = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) + - tvm.te.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]) + 1.0) i = w * h u = (out_tensor[box_a_idx + 2] - out_tensor[box_a_idx] + 1.0) * \ (out_tensor[box_a_idx + 3] - out_tensor[box_a_idx + 1] + 1.0) + \ @@ -226,9 +227,9 @@ def nms_ir(sorted_bbox_buf, out_buf, nms_threshold): batch, num_bbox = get_const_tuple(out_buf.shape) max_threads = int(math.sqrt(tvm.target.Target.current(allow_none=False).max_num_threads)) - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") - ib = tvm.ir_builder.create() + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") + ib = tvm.tir.ir_builder.create() p_data = ib.buffer_ptr(sorted_bbox_buf) p_out = ib.buffer_ptr(out_buf) nthread_tx = max_threads @@ -241,13 +242,13 @@ def nms_ir(sorted_bbox_buf, out_buf, nms_threshold): with ib.if_scope(i < num_bbox): p_out[base_idx + i] = False with ib.for_range(0, num_bbox - 1) as l: - with ib.if_scope(tvm.all(i < num_bbox, i > l, p_out[base_idx + l] == False)): + with ib.if_scope(tvm.tir.all(i < num_bbox, i > l, p_out[base_idx + l] == False)): iou = calculate_overlap(p_data, (base_idx + l) * 5, (base_idx + i) * 5) with ib.if_scope(iou > nms_threshold): p_out[base_idx + i] = True - ib.emit(tvm.make.Call(None, 'tvm_storage_sync', - tvm.convert(['shared']), - tvm.expr.Call.Intrinsic, None, 0)) + ib.emit(tvm.tir.Call(None, 'tvm_storage_sync', + tvm.runtime.convert(['shared']), + tvm.tir.Call.Intrinsic, None, 0)) return ib.get() @@ -256,14 +257,14 @@ def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf): Parameters ---------- - sorted_bbox_buf : tvm.schedule.Buffer + sorted_bbox_buf : tvm.te.schedule.Buffer 3-D with shape [batch, num_bbox, 5]. The last dimension is in format of [w_start, h_start, w_end, h_end, score]. - remove_mask_buf : tvm.schedule.Buffer + remove_mask_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox]. Boolean mask of whether a bounding box should be removed. - out_buf : tvm.schedule.Buffer + out_buf : tvm.te.schedule.Buffer 2-D with shape [batch * rpn_post_nms_top_n, 5]. The last dimension is in format of [batch_index, w_start, h_start, w_end, h_end]. @@ -275,8 +276,8 @@ def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf): batch, num_bbox, _ = get_const_tuple(sorted_bbox_buf.shape) rpn_post_nms_top_n = get_const_int(out_buf.shape[0]) // batch nthread_tx = batch - tx = tvm.thread_axis("threadIdx.x") - ib = tvm.ir_builder.create() + tx = te.thread_axis("threadIdx.x") + ib = tvm.tir.ir_builder.create() ib.scope_attr(tx, "thread_extent", nthread_tx) i = ib.allocate('int32', (1,), 'i', scope='local') i[0] = 0 @@ -292,14 +293,14 @@ def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf): with ib.if_scope(p_remove[b * num_bbox + j] == False): nkeep[0] += 1 with ib.if_scope(nkeep[0] > 0): - with ib.for_range(0, tvm.ceil( - tvm.const(rpn_post_nms_top_n, 'float32') / nkeep[0]).astype('int32')): + with ib.for_range(0, te.ceil( + tvm.tir.const(rpn_post_nms_top_n, 'float32') / nkeep[0]).astype('int32')): with ib.for_range(0, num_bbox) as j: offset_j = (b * num_bbox + j) * 5 offset_i = (b * rpn_post_nms_top_n + i[0]) * 5 - with ib.if_scope(tvm.all(i[0] < rpn_post_nms_top_n, - p_remove[(b*num_bbox+j)] == False)): - p_out[offset_i] = tvm.expr.Cast('float32', b) + with ib.if_scope(tvm.tir.all(i[0] < rpn_post_nms_top_n, + p_remove[(b*num_bbox+j)] == False)): + p_out[offset_i] = tvm.tir.Cast('float32', b) with ib.for_range(0, 4, for_type='unroll') as k: p_out[offset_i + k + 1] = p_sorted_bbox[offset_j + k] i[0] = i[0] + 1 @@ -314,13 +315,13 @@ def proposal(cls_prob, bbox_pred, im_info, scales, ratios, feature_stride, thres Parameters ---------- - cls_prob : tvm.Tensor + cls_prob : tvm.te.Tensor 4-D with shape [batch, 2 * num_anchors, height, width] - bbox_pred : tvm.Tensor + bbox_pred : tvm.te.Tensor 4-D with shape [batch, 4 * num_anchors, height, width] - im_info : tvm.Tensor + im_info : tvm.te.Tensor 2-D with shape [batch, 3] scales : list/tuple of float @@ -350,7 +351,7 @@ def proposal(cls_prob, bbox_pred, im_info, scales, ratios, feature_stride, thres Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 2-D tensor with shape [batch * rpn_post_nms_top_n, 5]. The last dimension is in format of [batch_index, w_start, h_start, w_end, h_end]. """ @@ -360,20 +361,20 @@ def proposal(cls_prob, bbox_pred, im_info, scales, ratios, feature_stride, thres num_bbox = height * width * num_anchors rpn_pre_nms_top_n = min(rpn_pre_nms_top_n, num_bbox) if rpn_pre_nms_top_n > 0 else num_bbox - bbox = tvm.extern((batch, num_bbox, 5), [cls_prob, bbox_pred, im_info], lambda ins, outs: - predict_bbox_ir(ins[0], ins[1], ins[2], outs[0], scales, ratios, - feature_stride, rpn_min_size, iou_loss), - dtype=bbox_pred.dtype) - score = tvm.compute((batch, num_bbox), lambda b, i: bbox[b, i, 4], tag='bbox_score') - sorted_index = tvm.extern([score.shape], [score], - lambda ins, outs: argsort_ir(ins[0], outs[0]), - dtype='int32') - sorted_bbox = tvm.compute((batch, rpn_pre_nms_top_n, 5), - lambda b, i, j: bbox[b, sorted_index[b, i], j], tag='sorted_bbox') - nms_remove_mask = tvm.extern((batch, rpn_pre_nms_top_n), [sorted_bbox], - lambda ins, outs: nms_ir(ins[0], outs[0], threshold), - dtype='bool') - nms_out = tvm.extern((batch * rpn_post_nms_top_n, 5), [sorted_bbox, nms_remove_mask], - lambda ins, outs: prepare_output_ir(ins[0], ins[1], outs[0]), - dtype=sorted_bbox.dtype) + bbox = te.extern((batch, num_bbox, 5), [cls_prob, bbox_pred, im_info], lambda ins, outs: + predict_bbox_ir(ins[0], ins[1], ins[2], outs[0], scales, ratios, + feature_stride, rpn_min_size, iou_loss), + dtype=bbox_pred.dtype) + score = te.compute((batch, num_bbox), lambda b, i: bbox[b, i, 4], tag='bbox_score') + sorted_index = te.extern([score.shape], [score], + lambda ins, outs: argsort_ir(ins[0], outs[0]), + dtype='int32') + sorted_bbox = te.compute((batch, rpn_pre_nms_top_n, 5), + lambda b, i, j: bbox[b, sorted_index[b, i], j], tag='sorted_bbox') + nms_remove_mask = te.extern((batch, rpn_pre_nms_top_n), [sorted_bbox], + lambda ins, outs: nms_ir(ins[0], outs[0], threshold), + dtype='bool') + nms_out = te.extern((batch * rpn_post_nms_top_n, 5), [sorted_bbox, nms_remove_mask], + lambda ins, outs: prepare_output_ir(ins[0], ins[1], outs[0]), + dtype=sorted_bbox.dtype) return nms_out diff --git a/topi/python/topi/cuda/reduction.py b/topi/python/topi/cuda/reduction.py index 0b9d588..d885c09 100644 --- a/topi/python/topi/cuda/reduction.py +++ b/topi/python/topi/cuda/reduction.py @@ -18,6 +18,7 @@ """Schedule for reduce operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import tag from .injective import schedule_injective_from_existing @@ -39,13 +40,13 @@ def _schedule_reduce(op, sch, is_idx_reduce=False): # without it, CL_INVALID_WORK_GROUP_SIZE occurred when running test_topi_reduce.py # don't know why num_thread = 16 - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") - thread_y = tvm.thread_axis((0, num_thread), "threadIdx.y") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis((0, num_thread), "threadIdx.x") + thread_y = te.thread_axis((0, num_thread), "threadIdx.y") else: all_reduce = True num_thread = tvm.target.Target.current(allow_none=False).max_num_threads - thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") + thread_x = te.thread_axis((0, num_thread), "threadIdx.x") # Fuse and refactor the reduce axis fused_reduce = sch[data_out].fuse(*[sch[data_out].op.reduce_axis[i] @@ -79,7 +80,7 @@ def _schedule_reduce(op, sch, is_idx_reduce=False): else: if is_idx_reduce: spatial_axis = sch[real_output].fuse(*(sch[real_output].op.axis)) - sch[real_output].bind(spatial_axis, tvm.thread_axis("blockIdx.x")) + sch[real_output].bind(spatial_axis, te.thread_axis("blockIdx.x")) sch[temp_idx_input].compute_at(sch[real_output], spatial_axis) sch[temp_val_input].compute_at(sch[real_output], @@ -102,13 +103,13 @@ def schedule_reduce(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - sch = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + sch = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse_before_reduce(operator): """Internal traverse function""" - if isinstance(operator, tvm.tensor.PlaceholderOp): + if isinstance(operator, tvm.te.PlaceholderOp): return if tag.is_injective(operator.tag): sch[operator].compute_inline() diff --git a/topi/python/topi/cuda/softmax.py b/topi/python/topi/cuda/softmax.py index afd11ea..ded3ff9 100644 --- a/topi/python/topi/cuda/softmax.py +++ b/topi/python/topi/cuda/softmax.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, trailing-whitespace """Schedule for softmax operator""" -import tvm +from tvm import te from .injective import schedule_injective_from_existing @@ -34,8 +34,8 @@ def schedule_softmax(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) softmax = outs[0] op_tag = softmax.op.tag @@ -60,8 +60,8 @@ def schedule_softmax(outs): s = schedule_injective_from_existing(s, op.output(0)) else: num_thread = 64 - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis((0, num_thread), "threadIdx.x") if exp is not None: s[exp].bind(exp.op.axis[0], block_x) diff --git a/topi/python/topi/cuda/sort.py b/topi/python/topi/cuda/sort.py index 88ca9d8..f9e535e 100644 --- a/topi/python/topi/cuda/sort.py +++ b/topi/python/topi/cuda/sort.py @@ -17,8 +17,8 @@ # pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, too-many-statements, singleton-comparison, unused-argument """Argsort operator """ import tvm +from tvm import te -from tvm import api from .injective import schedule_injective_from_existing from ..math import identity from ..transform import strided_slice @@ -38,8 +38,8 @@ def _schedule_sort(outs): s: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse(op): @@ -86,7 +86,7 @@ def sort_ir(data, values_out, axis, is_ascend, indices_out=None): elif i > axis: axis_mul_after *= value max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads) - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() data = ib.buffer_ptr(data) values_out = ib.buffer_ptr(values_out) if indices_out is not None: @@ -94,8 +94,8 @@ def sort_ir(data, values_out, axis, is_ascend, indices_out=None): nthread_tx = max_threads nthread_bx = shape[axis] // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("vthread") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("vthread") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "virtual_thread", nthread_bx) tid = bx * nthread_tx + tx @@ -110,12 +110,12 @@ def sort_ir(data, values_out, axis, is_ascend, indices_out=None): values_out[base_idx + tid * axis_mul_after] = data[base_idx + tid * axis_mul_after] if indices_out is not None: indices_out[base_idx + tid * axis_mul_after] = \ - tvm.generic.cast(tid, indices_out.dtype) - ib.emit(tvm.make.Call(None, 'tvm_storage_sync', - tvm.convert(['shared']), - tvm.expr.Call.Intrinsic, None, 0)) - idxd = tvm.indexdiv - idxm = tvm.indexmod + tvm.tir.generic.cast(tid, indices_out.dtype) + ib.emit(tvm.tir.Call(None, 'tvm_storage_sync', + tvm.runtime.convert(['shared']), + tvm.tir.Call.Intrinsic, None, 0)) + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod with ib.for_range(0, axis_mul_before) as i: with ib.for_range(0, axis_mul_after) as j: @@ -126,11 +126,11 @@ def sort_ir(data, values_out, axis, is_ascend, indices_out=None): with ib.if_scope(tid < idxd(current_sort_num + 1, 2)): offset = base_idx + (2 * tid + idxm(k, 2)) * axis_mul_after if is_ascend: - cond = tvm.all(2 * tid + idxm(k, 2) + 1 < current_sort_num, - values_out[offset] > values_out[offset + axis_mul_after]) + cond = tvm.tir.all(2 * tid + idxm(k, 2) + 1 < current_sort_num, + values_out[offset] > values_out[offset + axis_mul_after]) else: - cond = tvm.all(2 * tid + idxm(k, 2) + 1 < current_sort_num, - values_out[offset] < values_out[offset + axis_mul_after]) + cond = tvm.tir.all(2 * tid + idxm(k, 2) + 1 < current_sort_num, + values_out[offset] < values_out[offset + axis_mul_after]) with ib.if_scope(cond): temp_data[0] = values_out[offset] values_out[offset] = values_out[offset + axis_mul_after] @@ -139,9 +139,9 @@ def sort_ir(data, values_out, axis, is_ascend, indices_out=None): temp_index[0] = indices_out[offset] indices_out[offset] = indices_out[offset + axis_mul_after] indices_out[offset + axis_mul_after] = temp_index[0] - ib.emit(tvm.make.Call(None, 'tvm_storage_sync', - tvm.convert(['shared']), - tvm.expr.Call.Intrinsic, None, 0)) + ib.emit(tvm.tir.Call(None, 'tvm_storage_sync', + tvm.runtime.convert(['shared']), + tvm.tir.Call.Intrinsic, None, 0)) return ib.get() @@ -185,23 +185,23 @@ def sort_nms_ir(data, valid_count, output, axis, is_ascend): elif i > axis: axis_mul_after *= value max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads) - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() data = ib.buffer_ptr(data) valid_count = ib.buffer_ptr(valid_count) output = ib.buffer_ptr(output) nthread_tx = max_threads nthread_bx = size // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("vthread") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("vthread") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "virtual_thread", nthread_bx) tid = bx * nthread_tx + tx temp_data = ib.allocate("float32", (1,), name="temp_data", scope="local") temp_index = ib.allocate("int32", (1,), name="temp_index", scope="local") - is_ascend = tvm.make.node("IntImm", dtype="int32", value=is_ascend) + is_ascend = tvm.ir.make_node("IntImm", dtype="int32", value=is_ascend) - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod with ib.for_range(0, axis_mul_before) as i: with ib.for_range(0, axis_mul_after) as j: @@ -213,27 +213,27 @@ def sort_nms_ir(data, valid_count, output, axis, is_ascend): with ib.for_range(0, current_sort_num) as k: with ib.if_scope(tid < idxd(current_sort_num + 1, 2)): offset = base_idx + (2 * tid + idxm(k, 2)) * axis_mul_after - with ib.if_scope(tvm.all(is_ascend == 1, \ - 2 * tid + idxm(k, 2) + 1 < current_sort_num, \ - data[offset] > data[offset + axis_mul_after])): + with ib.if_scope(tvm.tir.all(is_ascend == 1, \ + 2 * tid + idxm(k, 2) + 1 < current_sort_num, \ + data[offset] > data[offset + axis_mul_after])): temp_data[0] = data[offset] data[offset] = data[offset + axis_mul_after] data[offset + axis_mul_after] = temp_data[0] temp_index[0] = output[offset] output[offset] = output[offset + axis_mul_after] output[offset + axis_mul_after] = temp_index[0] - with ib.if_scope(tvm.all(is_ascend == 0, \ - 2 * tid + idxm(k, 2) + 1 < current_sort_num, \ - data[offset] < data[offset + axis_mul_after])): + with ib.if_scope(tvm.tir.all(is_ascend == 0, \ + 2 * tid + idxm(k, 2) + 1 < current_sort_num, \ + data[offset] < data[offset + axis_mul_after])): temp_data[0] = data[offset] data[offset] = data[offset + axis_mul_after] data[offset + axis_mul_after] = temp_data[0] temp_index[0] = output[offset] output[offset] = output[offset + axis_mul_after] output[offset + axis_mul_after] = temp_index[0] - ib.emit(tvm.make.Call(None, 'tvm_storage_sync', - tvm.convert(['shared']), - tvm.expr.Call.Intrinsic, None, 0)) + ib.emit(tvm.tir.Call(None, 'tvm_storage_sync', + tvm.runtime.convert(['shared']), + tvm.tir.Call.Intrinsic, None, 0)) return ib.get() @@ -243,10 +243,10 @@ def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): Parameters ---------- - data: tvm.Tensor + data: tvm.te.Tensor The input array. - valid_count : tvm.Tensor, optional + valid_count : tvm.te.Tensor, optional The number of valid elements to be sorted. axis : int, optional @@ -260,35 +260,35 @@ def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor The output of this function. """ if valid_count is not None: sorted_data = identity(data) - sorted_data_buf = api.decl_buffer(data.shape, data.dtype, "sorted_data_buf", - data_alignment=8) - valid_count_buf = api.decl_buffer(valid_count.shape, valid_count.dtype, - "valid_count_buf", data_alignment=4) - out_buf = api.decl_buffer(data.shape, "int32", "out_buf", data_alignment=4) - out = tvm.extern([data.shape], - [sorted_data, valid_count], - lambda ins, outs: sort_nms_ir( - ins[0], ins[1], outs[0], axis, is_ascend), - dtype="int32", - in_buffers=[sorted_data_buf, valid_count_buf], - out_buffers=[out_buf], - name="argsort_nms_gpu", - tag="argsort_nms_gpu") + sorted_data_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "sorted_data_buf", + data_alignment=8) + valid_count_buf = tvm.tir.decl_buffer(valid_count.shape, valid_count.dtype, + "valid_count_buf", data_alignment=4) + out_buf = tvm.tir.decl_buffer(data.shape, "int32", "out_buf", data_alignment=4) + out = te.extern([data.shape], + [sorted_data, valid_count], + lambda ins, outs: sort_nms_ir( + ins[0], ins[1], outs[0], axis, is_ascend), + dtype="int32", + in_buffers=[sorted_data_buf, valid_count_buf], + out_buffers=[out_buf], + name="argsort_nms_gpu", + tag="argsort_nms_gpu") else: - value_buf = api.decl_buffer(data.shape, data.dtype, "value_buf", data_alignment=8) - indices_buf = api.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8) - out = tvm.extern([data.shape, data.shape], - [data], - lambda ins, outs: sort_ir( - ins[0], outs[0], axis, is_ascend, indices_out=outs[1]), - out_buffers=[value_buf, indices_buf], - name="argsort_gpu", - tag="argsort_gpu")[1] + value_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "value_buf", data_alignment=8) + indices_buf = tvm.tir.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8) + out = te.extern([data.shape, data.shape], + [data], + lambda ins, outs: sort_ir( + ins[0], outs[0], axis, is_ascend, indices_out=outs[1]), + out_buffers=[value_buf, indices_buf], + name="argsort_gpu", + tag="argsort_gpu")[1] return out def schedule_argsort(outs): @@ -312,7 +312,7 @@ def topk(data, k=1, axis=-1, ret_type="both", is_ascend=False, dtype="int64"): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tensor. k : int, optional @@ -335,31 +335,31 @@ def topk(data, k=1, axis=-1, ret_type="both", is_ascend=False, dtype="int64"): Returns ------- - out : tvm.Tensor or List[tvm.Tensor] + out : tvm.te.Tensor or List[tvm.te.Tensor] The computed result. """ assert ret_type in ["both", "values", "indices"] ndim = len(data.shape) axis = axis + ndim if axis < 0 else axis assert 0 <= axis < ndim - values_buf = api.decl_buffer(data.shape, data.dtype, "values_buf", data_alignment=8) - indices_buf = api.decl_buffer(data.shape, dtype, "indices_buf", data_alignment=8) + values_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "values_buf", data_alignment=8) + indices_buf = tvm.tir.decl_buffer(data.shape, dtype, "indices_buf", data_alignment=8) if ret_type == "values": - output = tvm.extern([data.shape], - [data], - lambda ins, outs: sort_ir( - ins[0], outs[0], axis, is_ascend), - out_buffers=[values_buf], - name="topk_gpu", - tag="topk_gpu") + output = te.extern([data.shape], + [data], + lambda ins, outs: sort_ir( + ins[0], outs[0], axis, is_ascend), + out_buffers=[values_buf], + name="topk_gpu", + tag="topk_gpu") else: - output = tvm.extern([data.shape, data.shape], - [data], - lambda ins, outs: sort_ir( - ins[0], outs[0], axis, is_ascend, indices_out=outs[1]), - out_buffers=[values_buf, indices_buf], - name="topk_gpu", - tag="topk_gpu") + output = te.extern([data.shape, data.shape], + [data], + lambda ins, outs: sort_ir( + ins[0], outs[0], axis, is_ascend, indices_out=outs[1]), + out_buffers=[values_buf, indices_buf], + name="topk_gpu", + tag="topk_gpu") if k < 1: if ret_type == "indices": return output[1] diff --git a/topi/python/topi/cuda/ssd/multibox.py b/topi/python/topi/cuda/ssd/multibox.py index 0b3f50b..30784f4 100644 --- a/topi/python/topi/cuda/ssd/multibox.py +++ b/topi/python/topi/cuda/ssd/multibox.py @@ -16,12 +16,10 @@ # under the License. # pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, too-many-statements, too-many-function-args """SSD multibox operators""" -from __future__ import absolute_import as _abs import math import tvm - -from tvm import api -from tvm.intrin import if_then_else, exp +from tvm import te +from tvm.tir import if_then_else, exp import topi @@ -58,11 +56,11 @@ def multibox_prior_ir(data, out, sizes, ratios, steps, offsets): """ max_threads = int(math.sqrt( tvm.target.Target.current(allow_none=False).max_num_threads)) - tx = tvm.thread_axis("threadIdx.x") - ty = tvm.thread_axis("threadIdx.y") - bx = tvm.thread_axis("blockIdx.x") - by = tvm.thread_axis("blockIdx.y") - ib = tvm.ir_builder.create() + tx = te.thread_axis("threadIdx.x") + ty = te.thread_axis("threadIdx.y") + bx = te.thread_axis("blockIdx.x") + by = te.thread_axis("blockIdx.y") + ib = tvm.tir.ir_builder.create() p_out = ib.buffer_ptr(out) in_height = data.shape[2] in_width = data.shape[3] @@ -115,7 +113,7 @@ def multibox_prior(data, sizes=(1,), ratios=(1,), steps=(-1, -1), Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, c_in, h_in, w_in]] sizes : tuple of float @@ -135,17 +133,17 @@ def multibox_prior(data, sizes=(1,), ratios=(1,), steps=(-1, -1), Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 3-D tensor with shape [1, h_in * w_in * (num_sizes + num_ratios - 1), 4] """ num_sizes = len(sizes) num_ratios = len(ratios) oshape = ( 1, data.shape[2] * data.shape[3] * (num_sizes + num_ratios - 1), 4) - out = tvm.extern(oshape, [data], lambda ins, outs: - multibox_prior_ir( - ins[0], outs[0], sizes, ratios, steps, offsets), - tag="multibox_prior") + out = te.extern(oshape, [data], lambda ins, outs: + multibox_prior_ir( + ins[0], outs[0], sizes, ratios, steps, offsets), + tag="multibox_prior") if clip: out = topi.clip(out, 0, 1) return out @@ -182,7 +180,7 @@ def transform_loc_pre(cls_prob, valid_count, temp_valid_count, temp_cls_id, temp num_classes = cls_prob.shape[1] num_anchors = cls_prob.shape[2] - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() cls_prob = ib.buffer_ptr(cls_prob) cls_id = ib.buffer_ptr(temp_cls_id) @@ -190,18 +188,18 @@ def transform_loc_pre(cls_prob, valid_count, temp_valid_count, temp_cls_id, temp temp_valid_count = ib.buffer_ptr(temp_valid_count) score = ib.buffer_ptr(temp_score) - threshold = tvm.make.node("FloatImm", dtype="float32", value=threshold) + threshold = tvm.ir.make_node("FloatImm", dtype="float32", value=threshold) max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads) nthread_tx = max_threads nthread_bx = (batch_size * num_anchors) // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) tid = bx * max_threads + tx - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod with ib.if_scope(tid < batch_size * num_anchors): i = idxd(tid, num_anchors) @@ -212,8 +210,8 @@ def transform_loc_pre(cls_prob, valid_count, temp_valid_count, temp_cls_id, temp with ib.for_range(0, num_classes - 1) as k: temp = cls_prob[i * num_classes * num_anchors + (k + 1) * num_anchors + j] cls_id[tid] = if_then_else(temp > score[tid], k + 1, cls_id[tid]) - score[tid] = tvm.max(temp, score[tid]) - with ib.if_scope(tvm.all(cls_id[tid] > 0, score[tid] < threshold)): + score[tid] = tvm.te.max(temp, score[tid]) + with ib.if_scope(tvm.tir.all(cls_id[tid] > 0, score[tid] < threshold)): cls_id[tid] = 0 with ib.if_scope(cls_id[tid] > 0): temp_valid_count[tid] = 1 @@ -224,7 +222,7 @@ def transform_loc_pre(cls_prob, valid_count, temp_valid_count, temp_cls_id, temp with ib.for_range(0, num_anchors) as k: with ib.if_scope(k > 0): temp_valid_count[tid * num_anchors + k] += \ - temp_valid_count[tid * num_anchors + k - 1] + temp_valid_count[tid * num_anchors + k - 1] valid_count[i] = temp_valid_count[tid * num_anchors + num_anchors - 1] return ib.get() @@ -289,12 +287,12 @@ def transform_loc_ir(loc_pred, anchor, temp_valid_count, temp_cls_id, temp_score oy = py * vy * ah + ay ow = exp(pw * vw) * aw / 2.0 oh = exp(ph * vh) * ah / 2.0 - return tvm.if_then_else(clip, tvm.max(0.0, tvm.min(1.0, ox - ow)), ox - ow), \ - tvm.if_then_else(clip, tvm.max(0.0, tvm.min(1.0, oy - oh)), oy - oh), \ - tvm.if_then_else(clip, tvm.max(0.0, tvm.min(1.0, ox + ow)), ox + ow), \ - tvm.if_then_else(clip, tvm.max(0.0, tvm.min(1.0, oy + oh)), oy + oh) + return tvm.tir.if_then_else(clip, tvm.te.max(0.0, tvm.te.min(1.0, ox - ow)), ox - ow), \ + tvm.tir.if_then_else(clip, tvm.te.max(0.0, tvm.te.min(1.0, oy - oh)), oy - oh), \ + tvm.tir.if_then_else(clip, tvm.te.max(0.0, tvm.te.min(1.0, ox + ow)), ox + ow), \ + tvm.tir.if_then_else(clip, tvm.te.max(0.0, tvm.te.min(1.0, oy + oh)), oy + oh) - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() loc_pred = ib.buffer_ptr(loc_pred) anchor = ib.buffer_ptr(anchor) @@ -306,14 +304,14 @@ def transform_loc_ir(loc_pred, anchor, temp_valid_count, temp_cls_id, temp_score max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads) nthread_tx = max_threads nthread_bx = (batch_size * num_anchors) // max_threads + 1 - tx = tvm.thread_axis("threadIdx.x") - bx = tvm.thread_axis("blockIdx.x") + tx = te.thread_axis("threadIdx.x") + bx = te.thread_axis("blockIdx.x") ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) tid = bx * max_threads + tx - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod with ib.if_scope(tid < batch_size * num_anchors): i = idxd(tid, num_anchors) @@ -348,13 +346,13 @@ def multibox_transform_loc(cls_prob, loc_pred, anchor, clip=True, \ Parameters ---------- - cls_prob : tvm.Tensor + cls_prob : tvm.te.Tensor Class probabilities. - loc_pred : tvm.Tensor + loc_pred : tvm.te.Tensor Location regression predictions. - anchor : tvm.Tensor + anchor : tvm.te.Tensor Prior anchor boxes. clip : boolean @@ -368,12 +366,12 @@ def multibox_transform_loc(cls_prob, loc_pred, anchor, clip=True, \ Returns ------- - ret : tuple of tvm.Tensor composed of + ret : tuple of tvm.te.Tensor composed of - out : tvm.Tensor + out : tvm.te.Tensor 3-D tensor with shape (batch_size, num_anchors, 6) - valid_count : tvm.Tensor + valid_count : tvm.te.Tensor 1-D tensor with shape (batch_size,), number of valid anchor boxes. """ batch_size = cls_prob.shape[0] @@ -383,40 +381,40 @@ def multibox_transform_loc(cls_prob, loc_pred, anchor, clip=True, \ valid_count_dtype = "int32" out_loc_dtype = loc_pred.dtype - valid_count_buf = api.decl_buffer((batch_size,), valid_count_dtype, - "valid_count_buf", data_alignment=4) - loc_pred_buf = api.decl_buffer(loc_pred.shape, loc_pred.dtype, - "loc_pred_buf", data_alignment=8) - anchor_buf = api.decl_buffer(anchor.shape, anchor.dtype, - "anchor_buf", data_alignment=8) + valid_count_buf = tvm.tir.decl_buffer((batch_size,), valid_count_dtype, + "valid_count_buf", data_alignment=4) + loc_pred_buf = tvm.tir.decl_buffer(loc_pred.shape, loc_pred.dtype, + "loc_pred_buf", data_alignment=8) + anchor_buf = tvm.tir.decl_buffer(anchor.shape, anchor.dtype, + "anchor_buf", data_alignment=8) - temp_valid_count_buf = api.decl_buffer( + temp_valid_count_buf = tvm.tir.decl_buffer( (batch_size, num_anchors,), valid_count_dtype, "temp_valid_count", data_alignment=8) - temp_cls_id_buf = api.decl_buffer( + temp_cls_id_buf = tvm.tir.decl_buffer( (batch_size, num_anchors,), valid_count_dtype, "temp_cls_id", data_alignment=8) - temp_score_buf = api.decl_buffer( + temp_score_buf = tvm.tir.decl_buffer( (batch_size, num_anchors,), cls_prob.dtype, "temp_score", data_alignment=8) valid_count, temp_valid_count, temp_cls_id, temp_score = \ - tvm.extern([(batch_size,), (batch_size, num_anchors,), (batch_size, num_anchors,), \ - (batch_size, num_anchors,)], [cls_prob], - lambda ins, outs: transform_loc_pre( - ins[0], outs[0], outs[1], outs[2], outs[3], threshold), - dtype=[valid_count_dtype, valid_count_dtype, valid_count_dtype, cls_prob.dtype], - out_buffers=[valid_count_buf, temp_valid_count_buf, \ - temp_cls_id_buf, temp_score_buf], - tag="multibox_transform_loc_phase_one") + te.extern([(batch_size,), (batch_size, num_anchors,), (batch_size, num_anchors,), \ + (batch_size, num_anchors,)], [cls_prob], + lambda ins, outs: transform_loc_pre( + ins[0], outs[0], outs[1], outs[2], outs[3], threshold), + dtype=[valid_count_dtype, valid_count_dtype, valid_count_dtype, cls_prob.dtype], + out_buffers=[valid_count_buf, temp_valid_count_buf, \ + temp_cls_id_buf, temp_score_buf], + tag="multibox_transform_loc_phase_one") out_loc = \ - tvm.extern([oshape], - [loc_pred, anchor, temp_valid_count, temp_cls_id, temp_score], - lambda ins, outs: transform_loc_ir( - ins[0], ins[1], ins[2], ins[3], ins[4], outs[0], clip, variances, \ - batch_size, num_anchors), - in_buffers=[loc_pred_buf, anchor_buf, temp_valid_count_buf, \ - temp_cls_id_buf, temp_score_buf], - dtype=[out_loc_dtype], - tag="multibox_transform_loc") + te.extern([oshape], + [loc_pred, anchor, temp_valid_count, temp_cls_id, temp_score], + lambda ins, outs: transform_loc_ir( + ins[0], ins[1], ins[2], ins[3], ins[4], outs[0], clip, variances, \ + batch_size, num_anchors), + in_buffers=[loc_pred_buf, anchor_buf, temp_valid_count_buf, \ + temp_cls_id_buf, temp_score_buf], + dtype=[out_loc_dtype], + tag="multibox_transform_loc") return [out_loc, valid_count] @@ -427,13 +425,13 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm Parameters ---------- - cls_prob : tvm.Tensor + cls_prob : tvm.te.Tensor Class probabilities. - loc_pred : tvm.Tensor + loc_pred : tvm.te.Tensor Location regression predictions. - anchor : tvm.Tensor + anchor : tvm.te.Tensor Prior anchor boxes. clip : boolean @@ -456,7 +454,7 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 3-D tensor with shape (batch_size, num_anchors, 6) """ inter_out = multibox_transform_loc(cls_prob, loc_pred, anchor, diff --git a/topi/python/topi/cuda/tensor_intrin.py b/topi/python/topi/cuda/tensor_intrin.py index 8f46d27..468e2cd 100644 --- a/topi/python/topi/cuda/tensor_intrin.py +++ b/topi/python/topi/cuda/tensor_intrin.py @@ -17,6 +17,7 @@ """Tensor intrinsics on CUDA.""" #pylint: disable=invalid-name import tvm +from tvm import te def dp4a(x_scope='local', y_scope='local', z_scope='local'): @@ -39,12 +40,12 @@ def dp4a(x_scope='local', y_scope='local', z_scope='local'): """ n = 4 # dp4a requires operands packed by 4 - x = tvm.placeholder((n,), name='x', dtype='int8') - y = tvm.placeholder((n,), name='y', dtype='int8') + x = te.placeholder((n,), name='x', dtype='int8') + y = te.placeholder((n,), name='y', dtype='int8') - k = tvm.reduce_axis((0, n), name='rc') + k = te.reduce_axis((0, n), name='rc') - z = tvm.compute((1,), lambda i: tvm.sum( + z = te.compute((1,), lambda i: te.sum( x[k].astype('int32') * y[k].astype('int32'), axis=[k])) def _intrin_func(ins, outs): @@ -55,24 +56,24 @@ def dp4a(x_scope='local', y_scope='local', z_scope='local'): if index == 1: return zz.vstore(0, 0) - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() vec_x = xx.vload(0, dtype='int8x4') vec_y = yy.vload(0, dtype='int8x4') prev_z = 0 if index == 0 else zz.vload(0) - new_z = tvm.call_pure_extern('int32', '__dp4a', vec_x, vec_y, prev_z) + new_z = tvm.tir.call_pure_extern('int32', '__dp4a', vec_x, vec_y, prev_z) ib.emit(zz.vstore(0, new_z)) return ib.get() return _instr(0), _instr(1), _instr(2) # body, reset, update - with tvm.build_config(data_alignment=4, offset_factor=1) as cfg: + with tvm.target.build_config(data_alignment=4, offset_factor=1) as cfg: scopes = {x: x_scope, y: y_scope, z: z_scope} - binds = {t: tvm.decl_buffer(t.shape, t.dtype, t.op.name, - data_alignment=cfg.data_alignment, - offset_factor=cfg.offset_factor, - scope=scopes[t]) for t in [x, y, z]} + binds = {t: tvm.tir.decl_buffer(t.shape, t.dtype, t.op.name, + data_alignment=cfg.data_alignment, + offset_factor=cfg.offset_factor, + scope=scopes[t]) for t in [x, y, z]} - return tvm.decl_tensor_intrin(z.op, _intrin_func, binds=binds) + return te.decl_tensor_intrin(z.op, _intrin_func, binds=binds) diff --git a/topi/python/topi/cuda/vision.py b/topi/python/topi/cuda/vision.py index 8666c22..eb49328 100644 --- a/topi/python/topi/cuda/vision.py +++ b/topi/python/topi/cuda/vision.py @@ -18,6 +18,7 @@ """Schedule for vision operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import cpp from .. import tag from .pooling import schedule_pool @@ -25,8 +26,8 @@ from .injective import schedule_injective_from_existing def _default_schedule(outs): """Default schedule for gpu.""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse(op): if tag.is_broadcast(op.tag) or op.tag in ['bbox_score', 'sorted_bbox']: diff --git a/topi/python/topi/generic/__init__.py b/topi/python/topi/generic/__init__.py index b9db156..bf45bc3 100644 --- a/topi/python/topi/generic/__init__.py +++ b/topi/python/topi/generic/__init__.py @@ -28,7 +28,7 @@ Example # create schedule that dispatches to topi.cuda.schedule_injective with tvm.target.create("cuda"): - s = tvm.generic.schedule_injective(outs) + s = tvm.tir.generic.schedule_injective(outs) """ from __future__ import absolute_import as _abs diff --git a/topi/python/topi/generic/conv2d.py b/topi/python/topi/generic/conv2d.py index 08bb06c..69984a1 100644 --- a/topi/python/topi/generic/conv2d.py +++ b/topi/python/topi/generic/conv2d.py @@ -17,8 +17,7 @@ # pylint: disable=invalid-name, unused-variable, too-many-locals # pylint: disable=unused-argument, redefined-builtin """Generic convolution schedules""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity from ..util import get_const_tuple @@ -123,7 +122,7 @@ def schedule_conv_NCHWc_cpu_common_int8(s, cfg, data_vec, kernel_vec, conv_out, _, _, _, _, oc_bn = get_const_tuple(conv_out.shape) # schedule pad - if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \ + if isinstance(s[data_vec].op, te.tensor.ComputeOp) \ and "pad" in data_vec.op.tag: batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih) @@ -136,7 +135,7 @@ def schedule_conv_NCHWc_cpu_common_int8(s, cfg, data_vec, kernel_vec, conv_out, # this part will be folded during Relay fold_constant pass. s[data_vec].pragma(s[data_vec].op.axis[0], "debug_skip_region") s[kernel_vec].pragma(s[kernel_vec].op.axis[0], "debug_skip_region") - elif isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and \ + elif isinstance(kernel_vec.op, te.tensor.ComputeOp) and \ kernel_vec.name == 'kernel_vec': # data and kernel are not pre-computed, schedule layout transform here. # this should only be used by x86 conv2d_nchw, which is for @@ -213,7 +212,7 @@ def schedule_conv_NCHWc_cpu_1x1_int8(s, cfg, data_vec, kernel_vec, conv_out, _, _, _, _, oc_bn = get_const_tuple(conv_out.shape) # schedule pad - if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \ + if isinstance(s[data_vec].op, te.tensor.ComputeOp) \ and "pad" in data_vec.op.tag: batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih) @@ -226,7 +225,7 @@ def schedule_conv_NCHWc_cpu_1x1_int8(s, cfg, data_vec, kernel_vec, conv_out, # this part will be folded during Relay fold_constant pass. s[data_vec].pragma(s[data_vec].op.axis[0], "debug_skip_region") s[kernel_vec].pragma(s[kernel_vec].op.axis[0], "debug_skip_region") - elif isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and \ + elif isinstance(kernel_vec.op, te.tensor.ComputeOp) and \ kernel_vec.name == 'kernel_vec': # data and kernel are not pre-computed, schedule layout transform here. # this should only be used by x86 conv2d_nchw, which is for diff --git a/topi/python/topi/generic/extern.py b/topi/python/topi/generic/extern.py index 977c537..3b4feb7 100644 --- a/topi/python/topi/generic/extern.py +++ b/topi/python/topi/generic/extern.py @@ -16,8 +16,6 @@ # under the License. # pylint: disable=invalid-name """generic declaration and schedules.""" -from __future__ import absolute_import as _abs - import tvm from .. import cpp diff --git a/topi/python/topi/generic/injective.py b/topi/python/topi/generic/injective.py index 6f1013c..50de798 100644 --- a/topi/python/topi/generic/injective.py +++ b/topi/python/topi/generic/injective.py @@ -19,6 +19,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te def schedule_injective_from_existing(sch, out): """Schedule for injective op from existing schedule. @@ -55,10 +56,10 @@ def schedule_injective(outs): target = tvm.target.Target.current(allow_none=False) if target.target_name != "llvm": raise RuntimeError("schedule_injective not registered for '%s'" % target) - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs x = outs[0] - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + s = te.create_schedule([x.op for x in outs]) + te.schedule.AutoInlineInjective(s) schedule_injective_from_existing(s, x) return s diff --git a/topi/python/topi/generic/nn.py b/topi/python/topi/generic/nn.py index ba50a8b..25b5501 100644 --- a/topi/python/topi/generic/nn.py +++ b/topi/python/topi/generic/nn.py @@ -16,19 +16,19 @@ # under the License. # pylint: disable=invalid-name,unused-argument """Generic nn operators""" -from __future__ import absolute_import as _abs import tvm +from tvm import te def _default_schedule(outs, auto_inline): """Default schedule for llvm.""" target = tvm.target.Target.current(allow_none=False) - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs if target.target_name not in ("llvm", "c"): raise RuntimeError("schedule not registered for '%s'" % target) - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) if auto_inline: x = outs[0] - tvm.schedule.AutoInlineInjective(s) + te.schedule.AutoInlineInjective(s) s[x].fuse(s[x].op.axis) return s @@ -187,7 +187,7 @@ def schedule_conv2d_winograd_weight_transform(outs): """ # Typically this is computed in PreCompute pass # so we make a schedule here for cpu llvm - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) output = outs[0] _, G = s[output].op.input_tensors s[G].compute_inline() @@ -230,7 +230,7 @@ def schedule_conv2d_winograd_nnpack_weight_transform(outs): The computation schedule for the op. """ # Typically this is computed in PreCompute pass - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) return s diff --git a/topi/python/topi/generic/vision.py b/topi/python/topi/generic/vision.py index d6e80df..3935250 100644 --- a/topi/python/topi/generic/vision.py +++ b/topi/python/topi/generic/vision.py @@ -18,18 +18,19 @@ """Generic vision operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import cpp def _default_schedule(outs, auto_inline): """Default schedule for llvm.""" target = tvm.target.Target.current(allow_none=False) - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs if target.target_name != "llvm": raise RuntimeError("schedule not registered for '%s'" % target) - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) if auto_inline: x = outs[0] - tvm.schedule.AutoInlineInjective(s) + te.schedule.AutoInlineInjective(s) s[x].fuse(s[x].op.axis) return s diff --git a/topi/python/topi/generic_op_impl.py b/topi/python/topi/generic_op_impl.py index b4b719f..f4695d3 100644 --- a/topi/python/topi/generic_op_impl.py +++ b/topi/python/topi/generic_op_impl.py @@ -16,8 +16,8 @@ # under the License. """Implementation of generic operators in the presence of Tensor""" # pylint: disable=invalid-name, too-many-arguments -from __future__ import absolute_import as _abs import tvm +from tvm import te from . import broadcast as _broadcast from . import math as _math @@ -75,11 +75,11 @@ def _make_bop(broadcast_bop, orig_bop): Returns ------- - ret : tvm.Tensor (if at least one operand is non-zero-rank Tensor) + ret : tvm.te.Tensor (if at least one operand is non-zero-rank Tensor) tvm.Expr (otherwise) The result of {op} operation. """ - if not isinstance(lhs, tvm.tensor.Tensor) and not isinstance(rhs, tvm.tensor.Tensor): + if not isinstance(lhs, te.tensor.Tensor) and not isinstance(rhs, te.tensor.Tensor): return orig_bop(lhs, rhs) return broadcast_bop(lhs, rhs) _tensor_bop_impl.__doc__ = _tensor_bop_impl.__doc__.format(op=name) @@ -90,12 +90,12 @@ def _bind_generic_ops(): """Bind generic operators for Tensor.""" # Check __op_priority__ to make sure the binding happens only once. __op_priority__ = 1 - if __op_priority__ > tvm.generic.__op_priority__: - tvm.generic.__op_priority__ = __op_priority__ - tvm.generic.add = _make_bop(_broadcast.add, tvm.generic.add) - tvm.generic.subtract = _make_bop(_broadcast.subtract, tvm.generic.subtract) - tvm.generic.multiply = _make_bop(_broadcast.multiply, tvm.generic.multiply) - tvm.generic.divide = _make_bop(_broadcast.divide, tvm.generic.divide) - tvm.generic.cast = _math.cast + if __op_priority__ > tvm.tir.generic.__op_priority__: + tvm.tir.generic.__op_priority__ = __op_priority__ + tvm.tir.generic.add = _make_bop(_broadcast.add, tvm.tir.generic.add) + tvm.tir.generic.subtract = _make_bop(_broadcast.subtract, tvm.tir.generic.subtract) + tvm.tir.generic.multiply = _make_bop(_broadcast.multiply, tvm.tir.generic.multiply) + tvm.tir.generic.divide = _make_bop(_broadcast.divide, tvm.tir.generic.divide) + tvm.tir.generic.cast = _math.cast _bind_generic_ops() diff --git a/topi/python/topi/hls/injective.py b/topi/python/topi/hls/injective.py index d4ccf41..6d0c6f4 100644 --- a/topi/python/topi/hls/injective.py +++ b/topi/python/topi/hls/injective.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, unused-variable, """Schedule for composition of injective operator""" import tvm +from tvm import te def schedule_injective_from_existing(sch, out): """Schedule for injective op from existing schedule. @@ -35,7 +36,7 @@ def schedule_injective_from_existing(sch, out): """ fused = sch[out].fuse(*sch[out].op.axis) px, x = sch[out].split(fused, nparts=1) - sch[out].bind(px, tvm.thread_axis("pipeline")) + sch[out].bind(px, te.thread_axis("pipeline")) return sch def schedule_injective(outs): @@ -52,9 +53,9 @@ def schedule_injective(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + tvm.te.schedule.AutoInlineInjective(s) for out in outs: schedule_injective_from_existing(s, out) return s diff --git a/topi/python/topi/hls/nn.py b/topi/python/topi/hls/nn.py index 06cf329..3d7ff82 100644 --- a/topi/python/topi/hls/nn.py +++ b/topi/python/topi/hls/nn.py @@ -18,13 +18,14 @@ """HLS nn operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import tag def _schedule_conv2d(outs): - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + tvm.te.schedule.AutoInlineInjective(s) def traverse(OP): """Internal traverse function""" @@ -33,7 +34,7 @@ def _schedule_conv2d(outs): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) # schedule conv2d elif OP.tag.find("conv2d") >= 0: @@ -47,7 +48,7 @@ def _schedule_conv2d(outs): traverse(outs[0].op) px, x = s[outs[0]].split(outs[0].op.axis[0], nparts=1) - s[outs[0]].bind(px, tvm.thread_axis("pipeline")) + s[outs[0]].bind(px, te.thread_axis("pipeline")) return s @@ -199,9 +200,9 @@ def schedule_reduce(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + tvm.te.schedule.AutoInlineInjective(s) def traverse(OP): """Internal traverse function""" @@ -210,7 +211,7 @@ def schedule_reduce(outs): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) elif OP.tag in ["comm_reduce", "comm_reduce_idx"]: if OP.tag == "comm_reduce": @@ -227,7 +228,7 @@ def schedule_reduce(outs): fused = s[outs[0]].fuse() px, x = s[outs[0]].split(fused, nparts=1) - s[outs[0]].bind(px, tvm.thread_axis("pipeline")) + s[outs[0]].bind(px, te.thread_axis("pipeline")) return s @@ -245,9 +246,9 @@ def schedule_softmax(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + tvm.te.schedule.AutoInlineInjective(s) softmax = outs[0] @@ -271,7 +272,7 @@ def schedule_softmax(outs): s[max_elem].compute_at(s[softmax], s[softmax].op.axis[1]) px, x = s[softmax].split(softmax.op.axis[0], nparts=1) - s[softmax].bind(px, tvm.thread_axis("pipeline")) + s[softmax].bind(px, te.thread_axis("pipeline")) return s @@ -289,9 +290,9 @@ def schedule_dense(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + tvm.te.schedule.AutoInlineInjective(s) def traverse(OP): """Internal traverse function""" @@ -300,7 +301,7 @@ def schedule_dense(outs): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) # schedule dense elif OP.tag == 'dense': @@ -314,7 +315,7 @@ def schedule_dense(outs): traverse(outs[0].op) px, x = s[outs[0]].split(outs[0].op.axis[0], nparts=1) - s[outs[0]].bind(px, tvm.thread_axis("pipeline")) + s[outs[0]].bind(px, te.thread_axis("pipeline")) return s @@ -332,9 +333,9 @@ def schedule_pool(outs, layout): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + tvm.te.schedule.AutoInlineInjective(s) def traverse(OP): """Internal traverse function""" @@ -343,7 +344,7 @@ def schedule_pool(outs, layout): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) # schedule pool elif OP.tag.startswith('pool'): @@ -357,7 +358,7 @@ def schedule_pool(outs, layout): traverse(outs[0].op) px, x = s[outs[0]].split(outs[0].op.axis[0], nparts=1) - s[outs[0]].bind(px, tvm.thread_axis("pipeline")) + s[outs[0]].bind(px, te.thread_axis("pipeline")) return s @@ -375,9 +376,9 @@ def schedule_adaptive_pool(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + tvm.te.schedule.AutoInlineInjective(s) def traverse(OP): """Internal traverse function""" @@ -386,7 +387,7 @@ def schedule_adaptive_pool(outs): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) # schedule global_pool elif OP.tag.startswith('adaptive_pool'): @@ -400,5 +401,5 @@ def schedule_adaptive_pool(outs): traverse(outs[0].op) px, x = s[outs[0]].split(outs[0].op.axis[0], nparts=1) - s[outs[0]].bind(px, tvm.thread_axis("pipeline")) + s[outs[0]].bind(px, te.thread_axis("pipeline")) return s diff --git a/topi/python/topi/image/resize.py b/topi/python/topi/image/resize.py index 0c02867..d901bab 100644 --- a/topi/python/topi/image/resize.py +++ b/topi/python/topi/image/resize.py @@ -18,6 +18,7 @@ """TVM operator input resize compute.""" from __future__ import absolute_import import tvm +from tvm import te from topi.util import nchw_pack_layout, nchw_xc_layout from .. import tag @@ -42,8 +43,8 @@ def get_2d_indices(indices, layout='NCHW'): def get_2d_pixel(data, layout, boxes, image_height, image_width, n, c, y, x, cc, ib, ic): """ Get 2d pixel """ if boxes is None: - y = tvm.max(tvm.min(y, image_height - 1), 0) - x = tvm.max(tvm.min(x, image_width - 1), 0) + y = tvm.te.max(tvm.te.min(y, image_height - 1), 0) + x = tvm.te.max(tvm.te.min(x, image_width - 1), 0) if layout == 'NHWC': return data(n, y, x, c).astype('float') if layout == 'NCHW': @@ -70,7 +71,7 @@ def resize_nearest_neighbor(indices, data, image_height, image_width, indices : tuple The indices of input data - data : tvm.Tensor + data : tvm.te.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] @@ -87,11 +88,11 @@ def resize_nearest_neighbor(indices, data, image_height, image_width, target_width : integer The target resized image width - boxes : tvm.Tensor, optional + boxes : tvm.te.Tensor, optional A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies the coordinates of a box. - box_indices : tvm.Tensor, optional + box_indices : tvm.te.Tensor, optional A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that the i-th box refers to. @@ -150,29 +151,29 @@ def resize_nearest_neighbor(indices, data, image_height, image_width, in_x = w_scale * x if coordinate_transformation_mode == "align_corners" or boxes is not None: - closest_x_index = tvm.round(in_x).astype("int32") - closest_y_index = tvm.round(in_y).astype("int32") + closest_x_index = te.round(in_x).astype("int32") + closest_y_index = te.round(in_y).astype("int32") else: # Add epsilon to floor to prevent gpu rounding errors. epsilon = 1e-5 - closest_y_index = tvm.floor(in_y + epsilon).astype('int32') - closest_x_index = tvm.floor(in_x + epsilon).astype('int32') + closest_y_index = te.floor(in_y + epsilon).astype('int32') + closest_x_index = te.floor(in_x + epsilon).astype('int32') value = get_2d_pixel(data, layout, boxes, image_height, image_width, box_idx, c, closest_y_index, closest_x_index, cc, inum, ic) if extrapolation_value is not None: - out = tvm.if_then_else(in_y < 0, - extrapolation_value, - tvm.if_then_else(in_y > image_height - 1, - extrapolation_value, - value)) + out = tvm.tir.if_then_else(in_y < 0, + extrapolation_value, + tvm.tir.if_then_else(in_y > image_height - 1, + extrapolation_value, + value)) # use extrapolation_value if in_x is out of boundary - value = tvm.if_then_else(in_x < 0, - extrapolation_value, - tvm.if_then_else(in_x > image_width - 1, - extrapolation_value, - out)) + value = tvm.tir.if_then_else(in_x < 0, + extrapolation_value, + tvm.tir.if_then_else(in_x > image_width - 1, + extrapolation_value, + out)) return _cast_output(value, data.dtype, out_dtype=out_dtype) @@ -191,7 +192,7 @@ def resize_bilinear(indices, data, image_height, image_width, indices : tuple The indices of input data - data : tvm.Tensor + data : tvm.te.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] @@ -208,11 +209,11 @@ def resize_bilinear(indices, data, image_height, image_width, target_width : integer The target resized image width - boxes : tvm.Tensor, optional + boxes : tvm.te.Tensor, optional A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies the coordinates of a box. - box_indices : tvm.Tensor, optional + box_indices : tvm.te.Tensor, optional A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that the i-th box refers to. @@ -279,12 +280,12 @@ def resize_bilinear(indices, data, image_height, image_width, in_y = h_scale * y in_x = w_scale * x - top_y_index = tvm.floor(in_y).astype('int32') - bottom_y_index = tvm.ceil(in_y).astype('int32') + top_y_index = te.floor(in_y).astype('int32') + bottom_y_index = te.ceil(in_y).astype('int32') y_lerp = in_y - top_y_index - left_x_index = tvm.floor(in_x).astype('int32') - right_x_index = tvm.ceil(in_x).astype('int32') + left_x_index = te.floor(in_x).astype('int32') + right_x_index = te.ceil(in_x).astype('int32') x_lerp = in_x - left_x_index top_left = get_2d_pixel(data, layout, boxes, image_height, image_width, @@ -302,16 +303,16 @@ def resize_bilinear(indices, data, image_height, image_width, # use extrapolation_value if in_y/in_x is out of boundary if extrapolation_value is not None: - out = tvm.if_then_else(in_y < 0, - extrapolation_value, - tvm.if_then_else(in_y > image_height - 1, - extrapolation_value, - value)) - value = tvm.if_then_else(in_x < 0, - extrapolation_value, - tvm.if_then_else(in_x > image_width - 1, - extrapolation_value, - out)) + out = tvm.tir.if_then_else(in_y < 0, + extrapolation_value, + tvm.tir.if_then_else(in_y > image_height - 1, + extrapolation_value, + value)) + value = tvm.tir.if_then_else(in_x < 0, + extrapolation_value, + tvm.tir.if_then_else(in_x > image_width - 1, + extrapolation_value, + out)) return _cast_output(value, data.dtype, out_dtype=out_dtype) @@ -329,7 +330,7 @@ def resize_bicubic(indices, data, image_height, image_width, indices : tuple The indices of input data - data : tvm.Tensor + data : tvm.te.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] @@ -346,11 +347,11 @@ def resize_bicubic(indices, data, image_height, image_width, target_width : integer The target resized image width - boxes : tvm.Tensor, optional + boxes : tvm.te.Tensor, optional A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies the coordinates of a box. - box_indices : tvm.Tensor, optional + box_indices : tvm.te.Tensor, optional A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that the i-th box refers to. @@ -421,11 +422,11 @@ def resize_bicubic(indices, data, image_height, image_width, in_y = h_scale * y in_x = w_scale * x - xint = tvm.floor(in_x).astype('int32') - xfract = in_x - tvm.floor(in_x) + xint = te.floor(in_x).astype('int32') + xfract = in_x - te.floor(in_x) - yint = tvm.floor(in_y).astype('int32') - yfract = in_y - tvm.floor(in_y) + yint = te.floor(in_y).astype('int32') + yfract = in_y - te.floor(in_y) # 1st row p00 = _get_pixel(data, layout, boxes, image_height, image_width, @@ -476,16 +477,16 @@ def resize_bicubic(indices, data, image_height, image_width, # use extrapolation_value if in_y/in_x is out of boundary if extrapolation_value is not None: - out = tvm.if_then_else(in_y < 0, - extrapolation_value, - tvm.if_then_else(in_y > image_height - 1, - extrapolation_value, - value)) - value = tvm.if_then_else(in_x < 0, - extrapolation_value, - tvm.if_then_else(in_x > image_width - 1, - extrapolation_value, - out)) + out = tvm.tir.if_then_else(in_y < 0, + extrapolation_value, + tvm.tir.if_then_else(in_y > image_height - 1, + extrapolation_value, + value)) + value = tvm.tir.if_then_else(in_x < 0, + extrapolation_value, + tvm.tir.if_then_else(in_x > image_width - 1, + extrapolation_value, + out)) return _cast_output(value, data.dtype, out_dtype=out_dtype) @@ -495,7 +496,7 @@ def resize(data, size, layout="NCHW", method="bilinear", Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] @@ -520,7 +521,7 @@ def resize(data, size, layout="NCHW", method="bilinear", Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, channel, in_height*scale, in_width*scale] or [batch, in_height*scale, in_width*scale, channel] or 5-D with shape [batch, channel-major, in_height*scale, in_width*scale, channel-minor] @@ -548,21 +549,21 @@ def resize(data, size, layout="NCHW", method="bilinear", return resize_nearest_neighbor(indices, data, in_h, in_w, size[0], size[1], layout=layout, coordinate_transformation_mode= \ - coordinate_transformation_mode, + coordinate_transformation_mode, out_dtype=out_dtype) def _bilinear(*indices): return resize_bilinear(indices, data, in_h, in_w, size[0], size[1], layout=layout, coordinate_transformation_mode= \ - coordinate_transformation_mode, + coordinate_transformation_mode, out_dtype=out_dtype) def _bicubic(*indices): return resize_bicubic(indices, data, in_h, in_w, size[0], size[1], layout, coordinate_transformation_mode= \ - coordinate_transformation_mode, + coordinate_transformation_mode, out_dtype=out_dtype) # Determine which interpolation method to use then run it. @@ -575,7 +576,7 @@ def resize(data, size, layout="NCHW", method="bilinear", else: raise ValueError('%s method is not supported.' % method) - return tvm.compute(output_shape, compute_func, name='resize', tag=tag.INJECTIVE) + return te.compute(output_shape, compute_func, name='resize', tag=tag.INJECTIVE) def crop_and_resize(data, boxes, box_indices, crop_size, layout="NCHW", @@ -584,16 +585,16 @@ def crop_and_resize(data, boxes, box_indices, crop_size, layout="NCHW", Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] - boxes : tvm.Tensor + boxes : tvm.te.Tensor A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies the coordinates of a box. - box_indices : tvm.Tensor + box_indices : tvm.te.Tensor A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that the i-th box refers to. @@ -614,7 +615,7 @@ def crop_and_resize(data, boxes, box_indices, crop_size, layout="NCHW", Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [num_boxes, channel, crop_height, crop_width] or [num_boxes, crop_height, crop_width, channel] """ @@ -656,7 +657,7 @@ def crop_and_resize(data, boxes, box_indices, crop_size, layout="NCHW", else: raise ValueError('%s method is not supported.' % method) - return tvm.compute(output_shape, compute_func, name='crop_and_resize', tag=tag.INJECTIVE) + return te.compute(output_shape, compute_func, name='crop_and_resize', tag=tag.INJECTIVE) @@ -665,7 +666,7 @@ def resize3d(data, size, layout="NCDHW", method="nearest_neighbor", """Perform resize operation on the data. Parameters ---------- - inputs: tvm.Tensor + inputs: tvm.te.Tensor inputs is a 5-D tensor with shape [batch, channel, in_depth, in_height, in_width] or [batch, in_depth, in_height, in_width, channel] @@ -684,7 +685,7 @@ def resize3d(data, size, layout="NCDHW", method="nearest_neighbor", Type to return. If left None will be same as input type. Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, channel, in_depth*scale, in_height*scale, in_width*scale] or [batch, in_depth*scale, in_height*scale, in_width*scale, channel] or 5-D with shape [batch, channel-major, in_depth*scale, in_height*scale, in_width*scale, @@ -716,9 +717,9 @@ def resize3d(data, size, layout="NCDHW", method="nearest_neighbor", coordinate_transformation_mode)) def _get_pixel(n, c, z, y, x, cc): - z = tvm.max(tvm.min(z, in_d - 1), 0) - y = tvm.max(tvm.min(y, in_h - 1), 0) - x = tvm.max(tvm.min(x, in_w - 1), 0) + z = tvm.te.max(tvm.te.min(z, in_d - 1), 0) + y = tvm.te.max(tvm.te.min(y, in_h - 1), 0) + x = tvm.te.max(tvm.te.min(x, in_w - 1), 0) if layout == 'NDHWC': return data(n, z, y, x, c).astype('float') if layout == 'NCDHW': @@ -754,15 +755,15 @@ def resize3d(data, size, layout="NCDHW", method="nearest_neighbor", in_x = x_ratio * x if coordinate_transformation_mode == "align_corners": - zint = tvm.round(in_z).astype('int32') - yint = tvm.round(in_y).astype('int32') - xint = tvm.round(in_x).astype('int32') + zint = te.round(in_z).astype('int32') + yint = te.round(in_y).astype('int32') + xint = te.round(in_x).astype('int32') elif coordinate_transformation_mode in ["asymmetric", "half_pixel"]: # Add epsilon to floor to prevent gpu rounding errors. epsilon = 1e-5 - zint = tvm.floor(in_z + epsilon).astype('int32') - yint = tvm.floor(in_y + epsilon).astype('int32') - xint = tvm.floor(in_x + epsilon).astype('int32') + zint = te.floor(in_z + epsilon).astype('int32') + yint = te.floor(in_y + epsilon).astype('int32') + xint = te.floor(in_x + epsilon).astype('int32') else: raise ValueError("Unsupported coordinate_transformation_mode: {}".format( coordinate_transformation_mode)) @@ -785,14 +786,14 @@ def resize3d(data, size, layout="NCDHW", method="nearest_neighbor", in_y = y_ratio * y in_x = x_ratio * x - zint = tvm.floor(in_z).astype('int32') - zfract = in_z - tvm.floor(in_z) + zint = te.floor(in_z).astype('int32') + zfract = in_z - te.floor(in_z) - xint = tvm.floor(in_x).astype('int32') - xfract = in_x - tvm.floor(in_x) + xint = te.floor(in_x).astype('int32') + xfract = in_x - te.floor(in_x) - yint = tvm.floor(in_y).astype('int32') - yfract = in_y - tvm.floor(in_y) + yint = te.floor(in_y).astype('int32') + yfract = in_y - te.floor(in_y) p000 = _get_pixel(n, c, zint, yint, xint, cc) p001 = _get_pixel(n, c, zint, yint, xint + 1, cc) @@ -820,4 +821,4 @@ def resize3d(data, size, layout="NCDHW", method="nearest_neighbor", else: raise ValueError('%s method is not supported.' % method) - return tvm.compute(output_shape, compute_func, name='resize3d', tag=tag.INJECTIVE) + return te.compute(output_shape, compute_func, name='resize3d', tag=tag.INJECTIVE) diff --git a/topi/python/topi/intel_graphics/conv2d.py b/topi/python/topi/intel_graphics/conv2d.py index 8993063..e4ea196 100644 --- a/topi/python/topi/intel_graphics/conv2d.py +++ b/topi/python/topi/intel_graphics/conv2d.py @@ -20,6 +20,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity @@ -132,14 +133,14 @@ def tile_and_bind3d(s, tensor, z, y, x, z_factor=2, y_factor=None, x_factor=None xo, xi = s[tensor].split(x, x_factor) s[tensor].reorder(zo, yo, xo, zi, yi, xi) - thread_z = tvm.thread_axis((0, z_factor), "threadIdx.z") - thread_y = tvm.thread_axis((0, y_factor), "threadIdx.y") - thread_x = tvm.thread_axis((0, x_factor), "threadIdx.x") - s[tensor].bind(zo, tvm.thread_axis("blockIdx.z")) + thread_z = te.thread_axis((0, z_factor), "threadIdx.z") + thread_y = te.thread_axis((0, y_factor), "threadIdx.y") + thread_x = te.thread_axis((0, x_factor), "threadIdx.x") + s[tensor].bind(zo, te.thread_axis("blockIdx.z")) s[tensor].bind(zi, thread_z) - s[tensor].bind(yo, tvm.thread_axis("blockIdx.y")) + s[tensor].bind(yo, te.thread_axis("blockIdx.y")) s[tensor].bind(yi, thread_y) - s[tensor].bind(xo, tvm.thread_axis("blockIdx.x")) + s[tensor].bind(xo, te.thread_axis("blockIdx.x")) s[tensor].bind(xi, thread_x) return xi, thread_z, thread_y, thread_x @@ -151,11 +152,11 @@ def _pack_data(data, kernel, ic_bn, oc_bn): ic_chunk = ic // ic_bn oc_chunk = oc // oc_bn - data = tvm.compute((n, ic_chunk, ih, iw, ic_bn), - lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], - name="data_vec") + data = te.compute((n, ic_chunk, ih, iw, ic_bn), + lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], + name="data_vec") - kernel = tvm.compute( + kernel = te.compute( (oc_chunk, ic_chunk, kh, kw, ic_bn, oc_bn), lambda occ, icc, k_h, k_w, icb, ocb: kernel[occ * oc_bn + ocb, @@ -172,10 +173,10 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 5-D with shape [num_filter, in_channel, filter_height, filter_width, nnum_filter_vec] stride : int or a list/tuple of two ints @@ -189,7 +190,7 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ if len(data.shape) == 5: @@ -215,9 +216,9 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, _create_schedule_template(cfg, data_shape, kernel_shape, strides, padding, dilation) if cfg.is_fallback: - _get_default_config(cfg, tvm.placeholder((batch, in_channel, ih, iw), dtype=data.dtype), - tvm.placeholder((num_filter, in_channel, kernel_height, kernel_width), - dtype=kernel.dtype), + _get_default_config(cfg, te.placeholder((batch, in_channel, ih, iw), dtype=data.dtype), + te.placeholder((num_filter, in_channel, kernel_height, kernel_width), + dtype=kernel.dtype), strides, padding, out_dtype) ic_bn = cfg["tile_ic"].val if hasattr(cfg["tile_ic"], "val") else cfg["tile_ic"].size[-1] @@ -232,9 +233,9 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, out_width = simplify((iw - kernel_width + pad_left + pad_right) // stride_w + 1) oshape = (batch, out_channel // oc_bn, out_height, out_width, oc_bn) - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_height), name='ry') - rx = tvm.reduce_axis((0, kernel_width), name='rx') + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_height), name='ry') + rx = te.reduce_axis((0, kernel_width), name='rx') block_h = cfg["block_oh"].val block_w = cfg["block_ow"].val @@ -261,17 +262,17 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, else: temp = data - conv = tvm.compute( + conv = te.compute( cshape, lambda nn, ff, yy, xx, ff_v: \ - tvm.sum( - temp[nn, rc//ic_bn, yy * stride_h + ry, xx * stride_w + rx, rc%ic_bn]. \ - astype(out_dtype) * - kernel[ff, rc//ic_bn, ry, rx, rc%ic_bn, ff_v].astype(out_dtype), - axis=[rc, ry, rx]), tag="conv2d_NCHWc", name='conv2d_NCHWc') + te.sum( + temp[nn, rc//ic_bn, yy * stride_h + ry, xx * stride_w + rx, rc%ic_bn]. \ + astype(out_dtype) * + kernel[ff, rc//ic_bn, ry, rx, rc%ic_bn, ff_v].astype(out_dtype), + axis=[rc, ry, rx]), tag="conv2d_NCHWc", name='conv2d_NCHWc') if DOUNPACK: - output = tvm.compute( + output = te.compute( oshape, lambda nn, ff, yy, xx, ff_v: conv[nn][ff][yy][xx][ff_v], @@ -297,8 +298,8 @@ def schedule_conv2d_NCHWc(cfg, outs): s: Schedule The computation schedule for conv2d_nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): """inline all one-to-one-mapping operators except the last stage (output)""" @@ -344,7 +345,7 @@ def _schedule_cl_spatialpack_NCHWc(cfg, s, op): # this part will be folded during Relay fold_constant pass. s[data].pragma(s[data].op.axis[0], "debug_skip_region") s[kernel].pragma(s[kernel].op.axis[0], "debug_skip_region") - elif isinstance(kernel.op, tvm.tensor.ComputeOp) and kernel.name == "kernel_vec": + elif isinstance(kernel.op, tvm.te.ComputeOp) and kernel.name == "kernel_vec": # data and kernel are not pre-computed, schedule layout transform here. # TODO(@Laurawly): Add schedule for data and kernel pack pass @@ -356,9 +357,9 @@ def _schedule_cl_spatialpack_NCHWc(cfg, s, op): z_factor = 1 y_factor = 1 x_factor = 16 - thread_z = tvm.thread_axis((0, z_factor), "threadIdx.z") - thread_y = tvm.thread_axis((0, y_factor), "threadIdx.y") - thread_x = tvm.thread_axis((0, x_factor), "threadIdx.x") + thread_z = te.thread_axis((0, z_factor), "threadIdx.z") + thread_y = te.thread_axis((0, y_factor), "threadIdx.y") + thread_x = te.thread_axis((0, x_factor), "threadIdx.x") _, co, oh, ow, vc = s[conv].op.axis ooh, ioh = s[conv].split(oh, factor=OUTPUT_BLOCK_HEIGHT) oow, iow = s[conv].split(ow, factor=OUTPUT_BLOCK_WIDTH) @@ -371,9 +372,9 @@ def _schedule_cl_spatialpack_NCHWc(cfg, s, op): s[conv].bind(oohi, thread_z) s[conv].bind(oowi, thread_y) s[conv].bind(vci, thread_x) - s[conv].bind(ooho, tvm.thread_axis("blockIdx.z")) - s[conv].bind(oowo, tvm.thread_axis("blockIdx.y")) - s[conv].bind(coi, tvm.thread_axis("blockIdx.x")) + s[conv].bind(ooho, te.thread_axis("blockIdx.z")) + s[conv].bind(oowo, te.thread_axis("blockIdx.y")) + s[conv].bind(coi, te.thread_axis("blockIdx.x")) # schedule conv_L s[conv_L].compute_at(s[conv], vci) @@ -424,9 +425,9 @@ def conv2d_nchw(data, kernel, stride, padding, dilation, out_dtype='float32'): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] stride : int or a list/tuple of two ints stride size, or [stride_height, stride_width] @@ -434,7 +435,7 @@ def conv2d_nchw(data, kernel, stride, padding, dilation, out_dtype='float32'): padding size, or [pad_height, pad_width] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ assert data.shape[0].value == 1, "only support batch size=1 convolution on intel gpu" @@ -456,8 +457,8 @@ def schedule_conv2d_nchw(outs): s: Schedule The computation schedule for conv2d_nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): """inline all one-to-one-mapping operators except the last stage (output)""" @@ -483,9 +484,9 @@ def _decl_cl_spatialpack(data, kernel, stride, padding, out_dtype='float16'): out_width = simplify((in_width - kernel_w + pad_left + pad_right) // stride_w + 1) oshape = (batch, out_channel, out_height, out_width) - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') if stride_h == 2: if num_filter + kernel_h == 515: @@ -529,20 +530,20 @@ def _decl_cl_spatialpack(data, kernel, stride, padding, out_dtype='float16'): cshape = (batch, out_channel // nv, c_h, c_w, nv) kvshape = (num_filter // nv, channel, kernel_h, kernel_w, nv) - kernel_vec = tvm.compute( + kernel_vec = te.compute( kvshape, lambda co, ci, kh, kw, vc: kernel[co*nv + vc][ci][kh][kw], name='kernel_vec') - conv = tvm.compute( + conv = te.compute( cshape, lambda nn, ff, yy, xx, vc: \ - tvm.sum( - temp[nn, rc, yy * stride_h + ry, xx * stride_w + rx].astype(out_dtype) * - kernel_vec[ff, rc, ry, rx, vc].astype(out_dtype), - axis=[rc, ry, rx]), name='conv', attrs=attrs) + te.sum( + temp[nn, rc, yy * stride_h + ry, xx * stride_w + rx].astype(out_dtype) * + kernel_vec[ff, rc, ry, rx, vc].astype(out_dtype), + axis=[rc, ry, rx]), name='conv', attrs=attrs) - output = tvm.compute( + output = te.compute( oshape, lambda nn, ff, yy, xx: conv[nn][ff//nv][yy][xx][ff%nv], @@ -573,9 +574,9 @@ def _schedule_cl_spatialpack(s, op): z_factor = 1 y_factor = 1 x_factor = 16 - thread_z = tvm.thread_axis((0, z_factor), "threadIdx.z") - thread_y = tvm.thread_axis((0, y_factor), "threadIdx.y") - thread_x = tvm.thread_axis((0, x_factor), "threadIdx.x") + thread_z = te.thread_axis((0, z_factor), "threadIdx.z") + thread_y = te.thread_axis((0, y_factor), "threadIdx.y") + thread_x = te.thread_axis((0, x_factor), "threadIdx.x") _, co, oh, ow, vc = s[conv].op.axis ooh, ioh = s[conv].split(oh, factor=OUTPUT_BLOCK_HEIGHT) oow, iow = s[conv].split(ow, factor=OUTPUT_BLOCK_WIDTH) @@ -588,9 +589,9 @@ def _schedule_cl_spatialpack(s, op): s[conv].bind(oohi, thread_z) s[conv].bind(oowi, thread_y) s[conv].bind(vci, thread_x) - s[conv].bind(ooho, tvm.thread_axis("blockIdx.z")) - s[conv].bind(oowo, tvm.thread_axis("blockIdx.y")) - s[conv].bind(coi, tvm.thread_axis("blockIdx.x")) + s[conv].bind(ooho, te.thread_axis("blockIdx.z")) + s[conv].bind(oowo, te.thread_axis("blockIdx.y")) + s[conv].bind(coi, te.thread_axis("blockIdx.x")) # schedule conv_L s[conv_L].compute_at(s[conv], vci) diff --git a/topi/python/topi/intel_graphics/conv2d_alter_op.py b/topi/python/topi/intel_graphics/conv2d_alter_op.py index e95e59f..bbe5e7f 100644 --- a/topi/python/topi/intel_graphics/conv2d_alter_op.py +++ b/topi/python/topi/intel_graphics/conv2d_alter_op.py @@ -18,6 +18,7 @@ """Conv2D alter op and legalize functions for x86""" import tvm +from tvm import te from tvm import relay from tvm import autotvm @@ -74,10 +75,10 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['out_layout'] = 'NCHW%dc' % oc_bn # Store altered operator's config - new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), - dtype=data_dtype) - new_kernel = tvm.placeholder((out_channel//oc_bn, in_channel//ic_bn, - kh, kw, ic_bn, oc_bn), dtype=kernel_dtype) + new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), + dtype=data_dtype) + new_kernel = te.placeholder((out_channel//oc_bn, in_channel//ic_bn, + kh, kw, ic_bn, oc_bn), dtype=kernel_dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, new_attrs["data_layout"], new_attrs["out_layout"], out_dtype], "conv2d_NCHWc.intel_graphics") diff --git a/topi/python/topi/intel_graphics/depthwise_conv2d.py b/topi/python/topi/intel_graphics/depthwise_conv2d.py index 17f1943..a549413 100644 --- a/topi/python/topi/intel_graphics/depthwise_conv2d.py +++ b/topi/python/topi/intel_graphics/depthwise_conv2d.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name """Schedule for depthwise_conv2d with auto fusion""" import tvm +from tvm import te from tvm import autotvm from ..util import traverse_inline from .. import tag @@ -44,8 +45,8 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): s: Schedule The computation schedule for depthwise_conv2d nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'depthwise_conv2d_nchw': @@ -75,7 +76,7 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): ##### space definition end ##### s[pad_data].compute_inline() - if isinstance(kernel.op, tvm.tensor.ComputeOp) and 'dilate' in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and 'dilate' in kernel.op.tag: s[kernel].compute_inline() if conv.op in s.outputs: @@ -100,15 +101,15 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): kernel_scope, n = s[output].split(n, nparts=1) bf = s[output].fuse(n, bf) - s[output].bind(bf, tvm.thread_axis("blockIdx.z")) - s[output].bind(by, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(tf, tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bf, te.thread_axis("blockIdx.z")) + s[output].bind(by, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(tf, te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[output].reorder(bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi) s[OL].compute_at(s[output], tx) @@ -123,9 +124,9 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): fused, tx = s[load].split(fused, cfg["tile_x"].size[2]) fused, ty = s[load].split(fused, cfg["tile_y"].size[2]) fused, tz = s[load].split(fused, cfg["tile_f"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) s[output].pragma(kernel_scope, 'unroll_explicit', cfg['unroll_explicit'].val) @@ -148,8 +149,8 @@ def schedule_depthwise_conv2d_nhwc(outs): s: Schedule The computation schedule for depthwise_conv2d nhwc. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(temp, Filter, DepthwiseConv2d): s[temp].compute_inline() @@ -161,13 +162,13 @@ def schedule_depthwise_conv2d_nhwc(outs): Output = outs[0].op.output(0) s[DepthwiseConv2d].set_scope("local") - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis("threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis("threadIdx.x") b, h, w, c = s[Output].op.axis # num_thread here could be 728, it is larger than cuda.max_num_threads - num_thread = tvm.ir_pass.Simplify(temp.shape[3]).value + num_thread = tvm.tir.ir_pass.Simplify(temp.shape[3]).value target = tvm.target.Target.current() if target and (target.target_name not in ["cuda", "nvptx"]): num_thread = target.max_num_threads @@ -206,7 +207,7 @@ def schedule_depthwise_conv2d_nhwc(outs): if OP.tag == 'depthwise_conv2d_nhwc': PaddedInput = OP.input_tensors[0] Filter = OP.input_tensors[1] - if isinstance(Filter.op, tvm.tensor.ComputeOp) and 'dilate' in Filter.op.tag: + if isinstance(Filter.op, tvm.te.ComputeOp) and 'dilate' in Filter.op.tag: s[Filter].compute_inline() DepthwiseConv2d = OP.output(0) _schedule(PaddedInput, Filter, DepthwiseConv2d) @@ -232,14 +233,14 @@ def schedule_depthwise_conv2d_backward_input_nhwc(outs): The computation schedule for depthwise_conv2d backward wrt input with layout nhwc. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(Padded_out_grad, In_grad): s[Padded_out_grad].compute_inline() - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis("threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis("threadIdx.x") _, h, w, c = In_grad.op.axis fused_hwc = s[In_grad].fuse(h, w, c) @@ -277,13 +278,13 @@ def schedule_depthwise_conv2d_backward_weight_nhwc(outs): The computation schedule for depthwise_conv2d backward wrt weight with layout nhwc. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(Weight_grad): - block_x = tvm.thread_axis("blockIdx.x") - thread_y = tvm.thread_axis("threadIdx.y") - thread_x = tvm.thread_axis("threadIdx.x") + block_x = te.thread_axis("blockIdx.x") + thread_y = te.thread_axis("threadIdx.y") + thread_x = te.thread_axis("threadIdx.x") db, dh, dw = Weight_grad.op.reduce_axis diff --git a/topi/python/topi/mali/conv2d.py b/topi/python/topi/mali/conv2d.py index f774e76..d195928 100644 --- a/topi/python/topi/mali/conv2d.py +++ b/topi/python/topi/mali/conv2d.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name,unused-variable,unused-argument,no-else-return """conv2d schedule on ARM Mali GPU""" import tvm +from tvm import te from tvm import relay from tvm import autotvm from tvm.autotvm.task.space import get_factors @@ -38,10 +39,10 @@ def conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dilation, out_ cfg: ConfigEntity The config for this template - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] or pre-packed 5-D with shape [num_filter_chunk, in_channel, filter_height, filter_width, num_filter_block] @@ -60,7 +61,7 @@ def conv2d_nchw_spatial_pack(cfg, data, kernel, strides, padding, dilation, out_ Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ return conv2d_spatial_pack_nchw(cfg, data, kernel, strides, padding, @@ -83,7 +84,7 @@ def schedule_conv2d_nchw_spatial_pack(cfg, outs): s: Schedule The computation schedule for conv2d """ - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): # schedule conv2d @@ -100,7 +101,7 @@ def schedule_conv2d_nchw_spatial_pack(cfg, outs): kernel = kernel_vec.op.input_tensors[0] else: kernel = kernel_vec - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec) @@ -121,12 +122,12 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec): BW, TW, VW = cfg["tile_ow"].size # schedule padding - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: data_pad = data s[data_pad].compute_inline() # schedule data packing - if isinstance(data_vec.op, tvm.tensor.ComputeOp) and data_vec.op.name == 'data_vec_undilated': + if isinstance(data_vec.op, tvm.te.ComputeOp) and data_vec.op.name == 'data_vec_undilated': _, h, w, ci, _, _, vh, vw = s[data_vec].op.axis else: _, h, w, ci, vh, vw = s[data_vec].op.axis @@ -136,7 +137,7 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec): if vw.dom.extent.value < max_unroll: s[data_vec].unroll(vw) - if isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and kernel_vec.name == 'kernel_vec': + if isinstance(kernel_vec.op, tvm.te.ComputeOp) and kernel_vec.name == 'kernel_vec': if autotvm.GLOBAL_SCOPE.in_tuning: # kernel packing will be pre-computed during compilation, so we skip # this part to make tuning records correct @@ -147,8 +148,8 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec): fused = s[kernel_vec].fuse(co, ci, kh, kw, vc) fused, vec = s[kernel_vec].split(fused, VC) bb, tt = s[kernel_vec].split(fused, max_threads) - s[kernel_vec].bind(bb, tvm.thread_axis("blockIdx.x")) - s[kernel_vec].bind(tt, tvm.thread_axis("threadIdx.x")) + s[kernel_vec].bind(bb, te.thread_axis("blockIdx.x")) + s[kernel_vec].bind(tt, te.thread_axis("threadIdx.x")) if VC in vec_size: s[kernel_vec].vectorize(vec) @@ -199,7 +200,7 @@ def conv2d_nchw_winograd(cfg, data, kernel, strides, padding, dilation, out_dtyp @autotvm.register_topi_schedule("conv2d_nchw_winograd.mali") def schedule_conv2d_nchw_winograd(cfg, outs): - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'winograd_conv2d_output' in op.tag: @@ -271,54 +272,55 @@ def _decl_winograd(cfg, data, kernel, strides, padding, dilation, out_dtype, til assert CO % bna == 0 and P_round % bnb == 0 # pack input tile - input_tile = tvm.compute((CI, P_round // bnb, alpha, alpha, bnb), lambda ci, b, eps, nu, bb: \ - tvm.if_then_else( - b * bnb + bb < P, - data_pad[(b*bnb+bb) // (nH*nW)][ci][(b*bnb+bb) // nW % nH * m + eps] - [(b*bnb+bb) % nW * m + nu], tvm.const(0, data_pad.dtype)), name='d') + input_tile = te.compute( + (CI, P_round // bnb, alpha, alpha, bnb), lambda ci, b, eps, nu, bb: \ + tvm.tir.if_then_else( + b * bnb + bb < P, + data_pad[(b*bnb+bb) // (nH*nW)][ci][(b*bnb+bb) // nW % nH * m + eps] + [(b*bnb+bb) % nW * m + nu], tvm.tir.const(0, data_pad.dtype)), name='d') # transform kernel if pre_computed: U = kernel else: - r_kh = tvm.reduce_axis((0, KH), 'r_kh') - r_kw = tvm.reduce_axis((0, KW), 'r_kw') - U = tvm.compute((alpha, alpha, CO // bna, CI, bna), lambda eps, nu, co, ci, vco: - tvm.sum(kernel[co * bna + vco][ci][r_kh][r_kw] * G[eps][r_kh] * G[nu][r_kw], - axis=[r_kh, r_kw]), name='U') + r_kh = te.reduce_axis((0, KH), 'r_kh') + r_kw = te.reduce_axis((0, KW), 'r_kw') + U = te.compute((alpha, alpha, CO // bna, CI, bna), lambda eps, nu, co, ci, vco: + te.sum(kernel[co * bna + vco][ci][r_kh][r_kw] * G[eps][r_kh] * G[nu][r_kw], + axis=[r_kh, r_kw]), name='U') # transform image - r_a = tvm.reduce_axis((0, alpha), 'r_a') - r_b = tvm.reduce_axis((0, alpha), 'r_b') - V = tvm.compute((alpha, alpha, P_round // bnb, CI, bnb), lambda eps, nu, p, ci, vp: - tvm.sum(input_tile[ci][p][r_a][r_b][vp] * B[r_a][eps] * B[r_b][nu], - axis=[r_a, r_b]), name='V') + r_a = te.reduce_axis((0, alpha), 'r_a') + r_b = te.reduce_axis((0, alpha), 'r_b') + V = te.compute((alpha, alpha, P_round // bnb, CI, bnb), lambda eps, nu, p, ci, vp: + te.sum(input_tile[ci][p][r_a][r_b][vp] * B[r_a][eps] * B[r_b][nu], + axis=[r_a, r_b]), name='V') - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod # batch gemm - ci = tvm.reduce_axis((0, CI), name='c') - M = tvm.compute((alpha, alpha, CO, P_round), lambda eps, nu, co, p: - tvm.sum(U[eps][nu][idxdiv(co, bna)][ci][idxmod(co, bna)] * - V[eps][nu][idxdiv(p, bnb)][ci][idxmod(p, bnb)], axis=ci), name='M') + ci = te.reduce_axis((0, CI), name='c') + M = te.compute((alpha, alpha, CO, P_round), lambda eps, nu, co, p: + te.sum(U[eps][nu][idxdiv(co, bna)][ci][idxmod(co, bna)] * + V[eps][nu][idxdiv(p, bnb)][ci][idxmod(p, bnb)], axis=ci), name='M') - r_a = tvm.reduce_axis((0, alpha), 'r_a') - r_b = tvm.reduce_axis((0, alpha), 'r_b') - Y = tvm.compute((CO, P, m, m), lambda co, p, vh, vw: - tvm.sum(M[r_a][r_b][co][p] * A[r_a][vh] * A[r_b][vw], - axis=[r_a, r_b]), name='Y') + r_a = te.reduce_axis((0, alpha), 'r_a') + r_b = te.reduce_axis((0, alpha), 'r_b') + Y = te.compute((CO, P, m, m), lambda co, p, vh, vw: + te.sum(M[r_a][r_b][co][p] * A[r_a][vh] * A[r_b][vw], + axis=[r_a, r_b]), name='Y') # unpack output - output = tvm.compute((N, CO, H, W), lambda n, co, h, w: - Y[co, n * nH * nW + idxdiv(h, m) * nW + idxdiv(w, m), - idxmod(h, m), idxmod(w, m)] - # The following hack term is used to make the padding in batch gemm ("M") - # effective, otherwise the padding will be eliminated by bound inference. - # Use `tvm.expr.Mul` instead of `*` to avoid issues in const folding. - + tvm.expr.Mul(tvm.const(0, out_dtype), - M[alpha-1][alpha-1][CO-1][P_round-1]), - name='output', tag='winograd_conv2d_output') + output = te.compute((N, CO, H, W), lambda n, co, h, w: + Y[co, n * nH * nW + idxdiv(h, m) * nW + idxdiv(w, m), + idxmod(h, m), idxmod(w, m)] + # The following hack term is used to make the padding in batch gemm ("M") + # effective, otherwise the padding will be eliminated by bound inference. + # Use `tvm.tir.Mul` instead of `*` to avoid issues in const folding. + + tvm.tir.Mul(tvm.tir.const(0, out_dtype), + M[alpha-1][alpha-1][CO-1][P_round-1]), + name='output', tag='winograd_conv2d_output') # we have to manually assign effective GFLOP for winograd cfg.add_flop(2 * N * CO * H * W * KH * KW * CI) @@ -339,7 +341,7 @@ def _schedule_winograd(cfg, s, op): s[data_pad].compute_inline() # transform kernel - if isinstance(U.op, tvm.tensor.ComputeOp): + if isinstance(U.op, tvm.te.ComputeOp): kernel, G = s[U].op.input_tensors s[G].compute_inline() eps, nu, co, ci, vco, = s[U].op.axis @@ -355,7 +357,7 @@ def _schedule_winograd(cfg, s, op): tile_and_bind(s, U, co, ci, 1, 256) # dilation - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() # transform image @@ -370,8 +372,8 @@ def _schedule_winograd(cfg, s, op): fused = s[V].fuse(p, ci) bb, tt = cfg['tile_t1'].apply(s, V, fused) - s[V].bind(bb, tvm.thread_axis('blockIdx.x')) - s[V].bind(tt, tvm.thread_axis('threadIdx.x')) + s[V].bind(bb, te.thread_axis('blockIdx.x')) + s[V].bind(tt, te.thread_axis('threadIdx.x')) eps, nu, p, ci, vp = s[VL].op.axis r_a, r_b = s[VL].op.reduce_axis @@ -416,8 +418,8 @@ def _schedule_winograd(cfg, s, op): s[output].unroll(wi) fused = s[output].fuse(n, co, h, w) bb, tt = cfg['tile_t2'].apply(s, output, fused) - s[output].bind(bb, tvm.thread_axis('blockIdx.x')) - s[output].bind(tt, tvm.thread_axis('threadIdx.x')) + s[output].bind(bb, te.thread_axis('blockIdx.x')) + s[output].bind(tt, te.thread_axis('threadIdx.x')) s[Y].compute_at(s[output], tt) @@ -451,7 +453,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): data, kernel = tinfos out_dtype = out_type.dtype - idxd = tvm.indexdiv + idxd = tvm.tir.indexdiv if topi_tmpl == "conv2d_nchw_spatial_pack.mali": assert data_layout == "NCHW" and kernel_layout == "OIHW" @@ -462,7 +464,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['kernel_layout'] = 'OIHW%do' % VC new_data = data - new_kernel = tvm.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) + new_kernel = te.placeholder((idxd(CO, VC), CI, KH, KW, VC), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], "conv2d_nchw_spatial_pack.mali") @@ -488,10 +490,10 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['tile_size'] = tile_size new_data = data - new_kernel = tvm.placeholder((KH + tile_size - 1, - KW + tile_size -1, - idxd(CO, VC), CI, VC), - kernel.dtype) + new_kernel = te.placeholder((KH + tile_size - 1, + KW + tile_size -1, + idxd(CO, VC), CI, VC), + kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, out_dtype], 'conv2d_nchw_winograd.mali') @@ -508,10 +510,10 @@ def tile_and_bind(s, tensor, y, x, y_factor, x_factor=None): """ tile and bind to GPU threads """ x_factor = x_factor or y_factor yo, xo, yi, xi = s[tensor].tile(y, x, y_factor, x_factor) - s[tensor].bind(xo, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(xi, tvm.thread_axis("threadIdx.x")) - s[tensor].bind(yo, tvm.thread_axis("blockIdx.y")) - s[tensor].bind(yi, tvm.thread_axis("threadIdx.y")) + s[tensor].bind(xo, te.thread_axis("blockIdx.x")) + s[tensor].bind(xi, te.thread_axis("threadIdx.x")) + s[tensor].bind(yo, te.thread_axis("blockIdx.y")) + s[tensor].bind(yi, te.thread_axis("threadIdx.y")) return yo, xo, yi, xi @@ -522,11 +524,11 @@ def tile_and_bind3d(s, tensor, z, y, x, z_factor=2, y_factor=None, x_factor=None zo, zi = s[tensor].split(z, z_factor) yo, yi = s[tensor].split(y, y_factor) xo, xi = s[tensor].split(x, x_factor) - s[tensor].bind(zo, tvm.thread_axis("blockIdx.z")) - s[tensor].bind(zi, tvm.thread_axis("threadIdx.z")) - s[tensor].bind(yo, tvm.thread_axis("blockIdx.y")) - s[tensor].bind(yi, tvm.thread_axis("threadIdx.y")) - s[tensor].bind(xo, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(xi, tvm.thread_axis("threadIdx.x")) + s[tensor].bind(zo, te.thread_axis("blockIdx.z")) + s[tensor].bind(zi, te.thread_axis("threadIdx.z")) + s[tensor].bind(yo, te.thread_axis("blockIdx.y")) + s[tensor].bind(yi, te.thread_axis("threadIdx.y")) + s[tensor].bind(xo, te.thread_axis("blockIdx.x")) + s[tensor].bind(xi, te.thread_axis("threadIdx.x")) s[tensor].reorder(zo, yo, xo, zi, yi, xi) return zo, yo, xo, zi, yi, xi diff --git a/topi/python/topi/mali/dense.py b/topi/python/topi/mali/dense.py index 3b233e9..8ec5d19 100644 --- a/topi/python/topi/mali/dense.py +++ b/topi/python/topi/mali/dense.py @@ -16,10 +16,7 @@ # under the License. # pylint: disable=invalid-name,unused-variable """dense schedule on ARM Mali GPU""" - -from __future__ import absolute_import as _abs - -import tvm +from tvm import te from tvm import autotvm from .. import nn @@ -50,8 +47,8 @@ def schedule_dense(cfg, outs): s: Schedule The computation schedule for dense. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'dense': @@ -82,10 +79,10 @@ def schedule_dense(cfg, outs): by, ty, yi = cfg['tile_y'].apply(s, output, y) bx, tx, xi = cfg['tile_x'].apply(s, output, x) - s[output].bind(by, tvm.thread_axis('blockIdx.y')) - s[output].bind(bx, tvm.thread_axis('blockIdx.x')) - s[output].bind(ty, tvm.thread_axis('threadIdx.y')) - s[output].bind(tx, tvm.thread_axis('threadIdx.x')) + s[output].bind(by, te.thread_axis('blockIdx.y')) + s[output].bind(bx, te.thread_axis('blockIdx.x')) + s[output].bind(ty, te.thread_axis('threadIdx.y')) + s[output].bind(tx, te.thread_axis('threadIdx.x')) if cfg['tile_y'].size[-1] < max_unroll: s[output].unroll(yi) @@ -113,6 +110,6 @@ def fuse_and_bind(s, tensor, axis=None, num_thread=None): axis = axis or s[tensor].op.axis fused = s[tensor].fuse(*axis) bx, tx = s[tensor].split(fused, num_thread) - s[tensor].bind(bx, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(tx, tvm.thread_axis("threadIdx.x")) + s[tensor].bind(bx, te.thread_axis("blockIdx.x")) + s[tensor].bind(tx, te.thread_axis("threadIdx.x")) return bx, tx diff --git a/topi/python/topi/mali/depthwise_conv2d.py b/topi/python/topi/mali/depthwise_conv2d.py index 4ff17e5..785128c 100644 --- a/topi/python/topi/mali/depthwise_conv2d.py +++ b/topi/python/topi/mali/depthwise_conv2d.py @@ -18,6 +18,7 @@ """depthwise_conv2d schedule on ARM Mali GPU""" import tvm +from tvm import te from tvm import autotvm from .. import nn @@ -47,8 +48,8 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): s: Schedule The computation schedule for depthwise_conv2d nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(pad_data, kernel, conv): """schedule depthwise_conv2d""" @@ -75,7 +76,7 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): tile_and_bind3d(s, pad_data, c, y, x, cfg["tile_c"].size[1], 1, 1) # schedule dilation - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() # schedule conv @@ -93,12 +94,12 @@ def schedule_depthwise_conv2d_nchw(cfg, outs): bx, tx, xi = cfg['tile_x'].apply(s, output, x) bc = s[output].fuse(n, bc) - s[output].bind(bc, tvm.thread_axis("blockIdx.z")) - s[output].bind(tc, tvm.thread_axis("threadIdx.z")) - s[output].bind(by, tvm.thread_axis("blockIdx.y")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bc, te.thread_axis("blockIdx.z")) + s[output].bind(tc, te.thread_axis("threadIdx.z")) + s[output].bind(by, te.thread_axis("blockIdx.y")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) di, dj = s[OL].op.reduce_axis s[OL].unroll(di) @@ -134,10 +135,10 @@ def tile_and_bind3d(s, tensor, z, y, x, z_factor=2, y_factor=None, x_factor=None zo, zi = s[tensor].split(z, z_factor) yo, yi = s[tensor].split(y, y_factor) xo, xi = s[tensor].split(x, x_factor) - s[tensor].bind(zo, tvm.thread_axis("blockIdx.z")) - s[tensor].bind(zi, tvm.thread_axis("threadIdx.z")) - s[tensor].bind(yo, tvm.thread_axis("blockIdx.y")) - s[tensor].bind(yi, tvm.thread_axis("threadIdx.y")) - s[tensor].bind(xo, tvm.thread_axis("blockIdx.x")) - s[tensor].bind(xi, tvm.thread_axis("threadIdx.x")) + s[tensor].bind(zo, te.thread_axis("blockIdx.z")) + s[tensor].bind(zi, te.thread_axis("threadIdx.z")) + s[tensor].bind(yo, te.thread_axis("blockIdx.y")) + s[tensor].bind(yi, te.thread_axis("threadIdx.y")) + s[tensor].bind(xo, te.thread_axis("blockIdx.x")) + s[tensor].bind(xi, te.thread_axis("threadIdx.x")) return zo, zi, yo, yi, xo, xi diff --git a/topi/python/topi/math.py b/topi/python/topi/math.py index 148d53a..5b6b9ab 100644 --- a/topi/python/topi/math.py +++ b/topi/python/topi/math.py @@ -16,181 +16,181 @@ # under the License. """Elementwise operators""" # pylint: disable=redefined-builtin -from __future__ import absolute_import as _abs import tvm +from tvm import te from . import tag from . import cpp -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def identity(x): """Take identity of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ # pylint: disable=unnecessary-lambda - return tvm.compute(x.shape, lambda *i: x(*i)) + return te.compute(x.shape, lambda *i: x(*i)) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def negative(x): """Take negation of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ # pylint: disable=unnecessary-lambda - return tvm.compute(x.shape, lambda *i: -x(*i)) + return te.compute(x.shape, lambda *i: -x(*i)) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def exp(x): """Take exponential of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.exp(x(*i))) + return te.compute(x.shape, lambda *i: te.exp(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def erf(x): """Take gauss error function of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.erf(x(*i))) + return te.compute(x.shape, lambda *i: te.erf(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def tanh(x): """Take hyperbolic tanh of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.tanh(x(*i))) + return te.compute(x.shape, lambda *i: te.tanh(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def cos(x): """Take cos of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.cos(x(*i))) + return te.compute(x.shape, lambda *i: te.cos(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def sin(x): """Take sin of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.sin(x(*i))) + return te.compute(x.shape, lambda *i: te.sin(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def atan(x): """Take atan of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.atan(x(*i))) + return te.compute(x.shape, lambda *i: te.atan(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def floor(x): """Take floor of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.floor(x(*i))) + return te.compute(x.shape, lambda *i: te.floor(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def ceil(x): """Take ceil of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.ceil(x(*i))) + return te.compute(x.shape, lambda *i: te.ceil(x(*i))) def sign(x): @@ -198,199 +198,199 @@ def sign(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return cpp.sign(x) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def trunc(x): """Take truncated value of the input of x, element-wise. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.trunc(x(*i))) + return te.compute(x.shape, lambda *i: te.trunc(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def abs(x): """Take absolute value of the input of x, element-wise. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.abs(x(*i))) + return te.compute(x.shape, lambda *i: te.abs(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def isnan(x): """Check if value of x is NaN, element-wise. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.isnan(x(*i))) + return te.compute(x.shape, lambda *i: te.isnan(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def round(x): """Round elements of x to nearest integer. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.round(x(*i))) + return te.compute(x.shape, lambda *i: te.round(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def log(x): """Take logarithm of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.log(x(*i))) + return te.compute(x.shape, lambda *i: te.log(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def sqrt(x): """Take square root of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.sqrt(x(*i))) + return te.compute(x.shape, lambda *i: te.sqrt(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def rsqrt(x): """Take inverse square root of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.rsqrt(x(*i))) + return te.compute(x.shape, lambda *i: te.rsqrt(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def sigmoid(x): """Take sigmoid tanh of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.sigmoid(x(*i))) + return te.compute(x.shape, lambda *i: te.sigmoid(x(*i))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def left_shift(x, n): """Take n bits left shift of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. n : int Number of bits. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: x(*i) << n) + return te.compute(x.shape, lambda *i: x(*i) << n) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def right_shift(x, n): """Take n bits right shift of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. n : int Number of bits. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: x(*i) >> n) + return te.compute(x.shape, lambda *i: x(*i) >> n) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def clip(x, a_min, a_max): """Clip (limit) the values in an array. Given an interval, values outside the interval are clipped to the interval edges. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. a_min : int or float Minimum value. @@ -399,15 +399,15 @@ def clip(x, a_min, a_max): Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ def _compute(*indices): value = x(*indices) - const_min = tvm.const(a_min, value.dtype) - const_max = tvm.const(a_max, value.dtype) - return tvm.max(tvm.min(value, const_max), const_min) - return tvm.compute(x.shape, _compute) + const_min = tvm.tir.const(a_min, value.dtype) + const_max = tvm.tir.const(a_max, value.dtype) + return tvm.te.max(tvm.te.min(value, const_max), const_min) + return te.compute(x.shape, _compute) def cast(x, dtype): @@ -415,7 +415,7 @@ def cast(x, dtype): Parameters ---------- - x : tvm.Tensor or Expr + x : tvm.te.Tensor or Expr Input argument. dtype : str @@ -423,11 +423,11 @@ def cast(x, dtype): Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - if isinstance(x, tvm.tensor.Tensor): - return tvm.compute( + if isinstance(x, te.tensor.Tensor): + return te.compute( x.shape, lambda *i: x(*i).astype(dtype), tag=tag.ELEMWISE) # pylint: disable=import-outside-toplevel from tvm.tir import _ffi_api @@ -439,7 +439,7 @@ def reinterpret(x, dtype): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. dtype : str @@ -447,7 +447,7 @@ def reinterpret(x, dtype): Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return cpp.reinterpret(x, dtype) @@ -458,12 +458,12 @@ def fast_exp(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return cpp.fast_exp(x, x.dtype, tag.ELEMWISE) diff --git a/topi/python/topi/nn/batch_matmul.py b/topi/python/topi/nn/batch_matmul.py index d69562c..0d9f351 100644 --- a/topi/python/topi/nn/batch_matmul.py +++ b/topi/python/topi/nn/batch_matmul.py @@ -16,8 +16,7 @@ # under the License. """Binary Neural Network (BNN) Operators""" # pylint: disable=invalid-name -from __future__ import absolute_import as _abs -import tvm +from tvm import te from ..util import get_const_tuple def batch_matmul(x, y): @@ -26,15 +25,15 @@ def batch_matmul(x, y): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor 3-D with shape [batch, M, K] - y : tvm.Tensor + y : tvm.te.Tensor 3-D with shape [batch, N, K] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 3-D with shape [batch, M, N] """ assert len(x.shape) == 3 and len(y.shape) == 3, "only support 3-dim batch_matmul" @@ -44,7 +43,7 @@ def batch_matmul(x, y): assert x_shape[2] == y_shape[2], "shapes of x and y is inconsistant" batch, M, K = x.shape N = y.shape[1] - k = tvm.reduce_axis((0, K), name='k') - return tvm.compute((batch, M, N), - lambda b, i, j: tvm.sum(x[b, i, k] * y[b, j, k], axis=k), - tag='batch_matmul') + k = te.reduce_axis((0, K), name='k') + return te.compute((batch, M, N), + lambda b, i, j: te.sum(x[b, i, k] * y[b, j, k], axis=k), + tag='batch_matmul') diff --git a/topi/python/topi/nn/bitserial_conv2d.py b/topi/python/topi/nn/bitserial_conv2d.py index f18a5aa..e1a7697 100644 --- a/topi/python/topi/nn/bitserial_conv2d.py +++ b/topi/python/topi/nn/bitserial_conv2d.py @@ -17,8 +17,8 @@ # pylint: disable=invalid-name, too-many-locals, too-many-arguments # pylint: disable=unused-argument, redefined-builtin """Bitserial Conv2D operators""" -from __future__ import absolute_import as _abs import tvm +from tvm import te from .pad import pad from .util import get_pad_tuple from .bitserial_util import bitpack @@ -30,10 +30,10 @@ def bitserial_conv2d_nchw(data, kernel, stride, padding, activation_bits, weight Parameters ---------- - input : tvm.Tensor + input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - filter : tvm.Tensor + filter : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] stride : int or a list/tuple of two ints @@ -59,7 +59,7 @@ def bitserial_conv2d_nchw(data, kernel, stride, padding, activation_bits, weight Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ assert isinstance(stride, int) or len(stride) == 2 @@ -88,32 +88,32 @@ def bitserial_conv2d_nchw(data, kernel, stride, padding, activation_bits, weight out_height = (in_height - kernel_h + TPAD + DPAD) // stride_h + 1 out_width = (in_width - kernel_w + LPAD + RPAD) // stride_w + 1 - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') - b1 = tvm.reduce_axis((0, activation_bits), name='b1') - b2 = tvm.reduce_axis((0, weight_bits), name='b2') + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') + b1 = te.reduce_axis((0, activation_bits), name='b1') + b2 = te.reduce_axis((0, weight_bits), name='b2') if unipolar: def _conv(nn, ff, yy, xx): b1b2 = (b1+b2).astype(out_dtype) - return tvm.sum( - ((tvm.popcount(PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] & - Filter_q[ff, rc, ry, rx, b2]) - - tvm.popcount(PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] & - ~Filter_q[ff, rc, ry, rx, b2])) + return te.sum( + ((tvm.tir.popcount(PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] & + Filter_q[ff, rc, ry, rx, b2]) - + tvm.tir.popcount(PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] & + ~Filter_q[ff, rc, ry, rx, b2])) << (b1b2)).astype(out_dtype), axis=[rc, ry, rx, b2, b1]).astype(out_dtype) else: def _conv(nn, ff, yy, xx): b1b2 = (b1+b2).astype(out_dtype) - return tvm.sum((tvm.popcount( + return te.sum((tvm.tir.popcount( PadInput_q[nn, rc, b1, yy * stride_h + ry, xx * stride_w + rx] & Filter_q[ff, rc, ry, rx, b2])<< (b1b2)).astype(out_dtype), - axis=[rc, ry, rx, b2, b1]).astype(out_dtype) + axis=[rc, ry, rx, b2, b1]).astype(out_dtype) - return tvm.compute((batch, out_channel, out_height, out_width), _conv, - name="Conv2dOutput", tag="bitserial_conv2d_nchw") + return te.compute((batch, out_channel, out_height, out_width), _conv, + name="Conv2dOutput", tag="bitserial_conv2d_nchw") def bitserial_conv2d_nhwc(data, kernel, stride, padding, activation_bits, weight_bits, pack_dtype='uint32', out_dtype='int16', unipolar=True): @@ -121,10 +121,10 @@ def bitserial_conv2d_nhwc(data, kernel, stride, padding, activation_bits, weight Parameters ---------- - input : tvm.Tensor + input : tvm.te.Tensor 4-D with shape [batch, in_height, in_width, in_channel] - filter : tvm.Tensor + filter : tvm.te.Tensor 4-D with shape [filter_height, filter_width, in_channel, num_filter] stride : int or a list/tuple of two ints @@ -150,7 +150,7 @@ def bitserial_conv2d_nhwc(data, kernel, stride, padding, activation_bits, weight Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_height, out_width, out_channel] """ assert isinstance(stride, int) or len(stride) == 2 @@ -180,33 +180,33 @@ def bitserial_conv2d_nhwc(data, kernel, stride, padding, activation_bits, weight out_width = (in_width - kernel_w + LPAD + RPAD) // stride_w + 1 PadInput_q = pad(Input_q, pad_before, pad_after, name="PaddedInput") - rc = tvm.reduce_axis((0, in_channel_q), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') - b1 = tvm.reduce_axis((0, activation_bits), name='b1') - b2 = tvm.reduce_axis((0, weight_bits), name='b2') + rc = te.reduce_axis((0, in_channel_q), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') + b1 = te.reduce_axis((0, activation_bits), name='b1') + b2 = te.reduce_axis((0, weight_bits), name='b2') if unipolar: def _conv(nn, yy, xx, ff): b1b2 = (b1+b2).astype(out_dtype) - return tvm.sum( - ((tvm.popcount(PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] & - Filter_q[ry, rx, rc, ff, b2]) - - tvm.popcount(PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] & - ~Filter_q[ry, rx, rc, ff, b2])) + return te.sum( + ((tvm.tir.popcount(PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] & + Filter_q[ry, rx, rc, ff, b2]) - + tvm.tir.popcount(PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] & + ~Filter_q[ry, rx, rc, ff, b2])) << b1b2).astype(out_dtype), axis=[rc, ry, rx, b2, b1]) else: def _conv(nn, yy, xx, ff): b1b2 = (b1+b2).astype(out_dtype) - return tvm.sum((tvm.popcount( + return te.sum((tvm.tir.popcount( PadInput_q[nn, yy * stride_h + ry, xx * stride_w + rx, rc, b1] & Filter_q[ry, rx, rc, ff, b2]) << b1b2).astype(out_dtype), - axis=[rc, ry, rx, b2, b1]) + axis=[rc, ry, rx, b2, b1]) - conv = tvm.compute((batch, out_height, out_width, out_channel), _conv, - name="Conv2dOutput", tag="bitserial_conv2d_nhwc") + conv = te.compute((batch, out_height, out_width, out_channel), _conv, + name="Conv2dOutput", tag="bitserial_conv2d_nhwc") return conv diff --git a/topi/python/topi/nn/bitserial_dense.py b/topi/python/topi/nn/bitserial_dense.py index fa1b5df..10635d8 100644 --- a/topi/python/topi/nn/bitserial_dense.py +++ b/topi/python/topi/nn/bitserial_dense.py @@ -18,6 +18,7 @@ """Bitserial Dense operator.""" from __future__ import absolute_import import tvm +from tvm import te from topi.util import get_const_tuple from .bitserial_util import bitpack @@ -27,14 +28,14 @@ def bitserial_dense(data, weight, data_bits, weight_bits, pack_dtype='uint32', Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [batch, in_dim] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [out_dim, in_dim] or 3-D with shape [out_dim, weight_bits, in_dim] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim] """ data_packed = bitpack(data, data_bits, pack_axis=1, bit_axis=1, pack_type=pack_dtype) @@ -46,18 +47,18 @@ def bitserial_dense(data, weight, data_bits, weight_bits, pack_dtype='uint32', X, WB, _ = get_const_tuple(weight_packed.shape) oshape = (Y, X) - k = tvm.reduce_axis((0, K), name='k') - db = tvm.reduce_axis((0, DB), name='db') - wb = tvm.reduce_axis((0, WB), name='wb') + k = te.reduce_axis((0, K), name='k') + db = te.reduce_axis((0, DB), name='db') + wb = te.reduce_axis((0, WB), name='wb') - matmul_unipolar = tvm.compute(oshape, lambda i, j: tvm.sum( - (tvm.popcount(weight_packed[j, wb, k] & data_packed[i, db, k]) - - tvm.popcount(~weight_packed[j, wb, k] & data_packed[i, db, k])).astype(out_dtype) + matmul_unipolar = te.compute(oshape, lambda i, j: te.sum( + (tvm.tir.popcount(weight_packed[j, wb, k] & data_packed[i, db, k]) - + tvm.tir.popcount(~weight_packed[j, wb, k] & data_packed[i, db, k])).astype(out_dtype) << (db+wb).astype(out_dtype), axis=[wb, db, k]), - tag='bitserial_dense_unipolar') + tag='bitserial_dense_unipolar') - matmul = tvm.compute(oshape, lambda i, j: tvm.sum( - tvm.popcount(weight_packed[j, wb, k] & data_packed[i, db, k]).astype(out_dtype) + matmul = te.compute(oshape, lambda i, j: te.sum( + tvm.tir.popcount(weight_packed[j, wb, k] & data_packed[i, db, k]).astype(out_dtype) << (db+wb).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense') diff --git a/topi/python/topi/nn/bitserial_util.py b/topi/python/topi/nn/bitserial_util.py index def5b5e..a25aa91 100644 --- a/topi/python/topi/nn/bitserial_util.py +++ b/topi/python/topi/nn/bitserial_util.py @@ -18,6 +18,7 @@ """Utility functions for bitserial operators""" import numpy as np import tvm +from tvm import te from topi.transform import concatenate from ..util import get_const_int @@ -52,7 +53,7 @@ def bitpack(data, bits, pack_axis, bit_axis, pack_type, name="QuantizeInput"): pack_axis += 1 def _bitpack(*indices): - packed_data = [tvm.const(0, pack_type)] * bits + packed_data = [tvm.tir.const(0, pack_type)] * bits for k in range(data_width): # Translate indices for packed data back to original idx = [0] * n @@ -68,7 +69,8 @@ def bitpack(data, bits, pack_axis, bit_axis, pack_type, name="QuantizeInput"): element = data(*idx) for b in range(bits): - extracted_bit = ((element & tvm.const(masks[b], "int32")) >> b).astype(pack_type) + extracted_bit = ( + (element & tvm.tir.const(masks[b], "int32")) >> b).astype(pack_type) packed_data[b] = (packed_data[b] | extracted_bit) if k < data_width - 1: packed_data[b] = packed_data[b] << 1 @@ -77,7 +79,7 @@ def bitpack(data, bits, pack_axis, bit_axis, pack_type, name="QuantizeInput"): return tuple(packed_data) return tuple(packed_data) - output_tuple = tvm.compute(bitserial_oshape, _bitpack, name=name, tag='bitpack') + output_tuple = te.compute(bitserial_oshape, _bitpack, name=name, tag='bitpack') if bits > 1: return concatenate(output_tuple, axis=bit_axis) diff --git a/topi/python/topi/nn/bnn.py b/topi/python/topi/nn/bnn.py index e3b841e..d7355fb 100644 --- a/topi/python/topi/nn/bnn.py +++ b/topi/python/topi/nn/bnn.py @@ -17,6 +17,7 @@ """Binary Neural Network (BNN) Operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import tag from ..util import simplify, get_const_int @@ -26,7 +27,7 @@ def binarize_pack(data, axis=None, name="PackedInput"): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D input, can be any layout. axis : None or int @@ -38,7 +39,7 @@ def binarize_pack(data, axis=None, name="PackedInput"): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor n-D, the same layout as input, dtype is uint32. """ ishape = data.shape @@ -47,11 +48,11 @@ def binarize_pack(data, axis=None, name="PackedInput"): assert get_const_int(ishape[axis]) % 32 == 0 n = len(ishape) oshape = tuple(simplify(ishape[i] // 32) if i == axis \ - else ishape[i] for i in range(n)) + else ishape[i] for i in range(n)) def _binarize_pack(*indices): start_idx = [indices[i] * 32 if i == axis else indices[i] for i in range(n)] - packed = tvm.const(0, 'uint32') + packed = tvm.tir.const(0, 'uint32') for j in range(32): idx = [start_idx[i] + j if i == axis else start_idx[i] for i in range(n)] sign = (data(*idx) >= 0).astype("uint32") @@ -61,7 +62,7 @@ def binarize_pack(data, axis=None, name="PackedInput"): packed = packed << 1 raise RuntimeError("not resach") - return tvm.compute(oshape, _binarize_pack, name=name, tag='binarize_pack') + return te.compute(oshape, _binarize_pack, name=name, tag='binarize_pack') def binary_dense(data, weight): @@ -69,15 +70,15 @@ def binary_dense(data, weight): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [batch, in_dim], dtype is uint32. - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [out_dim, in_dim], dtype is uint32. Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim], dtype is float32. """ assert data.dtype == 'uint32' and weight.dtype == 'uint32', \ @@ -86,11 +87,11 @@ def binary_dense(data, weight): "only support 2-dim binary dense" batch, in_dim = data.shape out_dim, _ = weight.shape - k = tvm.reduce_axis((0, in_dim), name='k') - matmul = tvm.compute((batch, out_dim), lambda i, j: \ - tvm.sum(tvm.popcount(data[i, k] ^ weight[j, k]), axis=k), \ - tag='binary_dense') + k = te.reduce_axis((0, in_dim), name='k') + matmul = te.compute((batch, out_dim), lambda i, j: \ + te.sum(tvm.tir.popcount(data[i, k] ^ weight[j, k]), axis=k), \ + tag='binary_dense') - return tvm.compute((batch, out_dim), lambda i, j: \ - 32 * in_dim - 2. * matmul(i, j), \ - tag=tag.ELEMWISE) + return te.compute((batch, out_dim), lambda i, j: \ + 32 * in_dim - 2. * matmul(i, j), \ + tag=tag.ELEMWISE) diff --git a/topi/python/topi/nn/conv1d.py b/topi/python/topi/nn/conv1d.py index 4565fd2..8049dff 100644 --- a/topi/python/topi/nn/conv1d.py +++ b/topi/python/topi/nn/conv1d.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, unused-argument """1D convolution operators.""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from .pad import pad from ..util import simplify from .util import get_pad_tuple1d @@ -34,11 +33,11 @@ def conv1d(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 3-D input shape [batch, in_channel, in_width] for layout == 'NCW' and [batch, in_width, in_channel] for layout == 'NWC' - kernel : tvm.Tensor + kernel : tvm.te.Tensor 3-D kernel with shape [num_filter, in_channel, filter_size] for layout == 'NCW' and [filter_size, in_channel, num_filter] for layout == 'NWC' @@ -81,10 +80,10 @@ def conv1d_ncw(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 3-D with shape [batch, in_channel, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 3-D with shape [num_filter, in_channel, filter_size] strides : int or tuple @@ -123,12 +122,12 @@ def conv1d_ncw(data, temp = pad(data, pad_before, pad_after, name='pad_temp') # Compute graph - rc = tvm.reduce_axis((0, in_channels), name='rc') - rw = tvm.reduce_axis((0, kernel_size), name='rw') + rc = te.reduce_axis((0, in_channels), name='rc') + rw = te.reduce_axis((0, kernel_size), name='rw') - return tvm.compute( + return te.compute( (batch, out_channels, out_width), - lambda b, c, w: tvm.sum( + lambda b, c, w: te.sum( temp[b, rc, w * strides + rw * dilation].astype(out_dtype) * kernel[c, rc, rw].astype(out_dtype), axis=[rc, rw]), @@ -145,10 +144,10 @@ def conv1d_nwc(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 3-D with shape [batch, in_width, in_channel] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 3-D with shape [filter_size, in_channel, num_filter] strides : int or tuple @@ -187,12 +186,12 @@ def conv1d_nwc(data, temp = pad(data, pad_before, pad_after, name='pad_temp') # Compute graph - rc = tvm.reduce_axis((0, in_channels), name='rc') - rw = tvm.reduce_axis((0, kernel_size), name='rw') + rc = te.reduce_axis((0, in_channels), name='rc') + rw = te.reduce_axis((0, kernel_size), name='rw') - return tvm.compute( + return te.compute( (batch, out_width, out_channels), - lambda b, w, c: tvm.sum( + lambda b, w, c: te.sum( temp[b, w * strides + rw * dilation, rc].astype(out_dtype) * kernel[rw, rc, c].astype(out_dtype), axis=[rc, rw]), diff --git a/topi/python/topi/nn/conv1d_transpose.py b/topi/python/topi/nn/conv1d_transpose.py index 8d22424..1895b1f 100644 --- a/topi/python/topi/nn/conv1d_transpose.py +++ b/topi/python/topi/nn/conv1d_transpose.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, unused-argument """Transposed 1D convolution operators (sometimes called Deconvolution).""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from .dilate import dilate from .pad import pad from ..util import simplify @@ -29,10 +28,10 @@ def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 3-D with shape [batch, in_channel, in_width] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 3-D with shape [in_channel, num_filter, filter_width] stride : ints @@ -46,7 +45,7 @@ def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 3-D with shape [batch, out_channel, out_width] """ @@ -63,18 +62,18 @@ def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype): data = pad(data, [0, 0, pad_left], [0, 0, pad_right], name='data_pad') # transpose kernel, switch kernel layout to IOW - kernel = tvm.compute((channels_out, channels_in, kernel_width), \ - lambda o, i, w: kernel[i][o][kernel_width-1-w],\ - name='kernel') + kernel = te.compute((channels_out, channels_in, kernel_width), \ + lambda o, i, w: kernel[i][o][kernel_width-1-w],\ + name='kernel') # convolution _, _, data_width = data.shape out_w = simplify(data_width - kernel_width + 1) - dc = tvm.reduce_axis((0, channels_in), name='dc') - dw = tvm.reduce_axis((0, kernel_width), name='dw') - output = tvm.compute( + dc = te.reduce_axis((0, channels_in), name='dc') + dw = te.reduce_axis((0, kernel_width), name='dw') + output = te.compute( (batch, channels_out, out_w), - lambda b, c, w: tvm.sum( + lambda b, c, w: te.sum( data[b, dc, w+dw].astype(out_dtype) * kernel[c, dc, dw].astype(out_dtype), axis=[dc, dw]), tag="conv1d_transpose_ncw") diff --git a/topi/python/topi/nn/conv2d.py b/topi/python/topi/nn/conv2d.py index a7a75ed..4c7941b 100644 --- a/topi/python/topi/nn/conv2d.py +++ b/topi/python/topi/nn/conv2d.py @@ -20,6 +20,7 @@ from __future__ import absolute_import as _abs from collections import namedtuple import tvm +from tvm import te from .pad import pad from .util import get_pad_tuple @@ -36,10 +37,10 @@ def conv2d(input, filter, strides, padding, dilation, layout='NCHW', out_dtype=N Parameters ---------- - input : tvm.Tensor + input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - filter : tvm.Tensor + filter : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] strides : int or a list/tuple of two ints @@ -58,7 +59,7 @@ def conv2d(input, filter, strides, padding, dilation, layout='NCHW', out_dtype=N Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ # search platform specific declaration first @@ -170,10 +171,10 @@ def conv2d_nchw(Input, Filter, stride, padding, dilation, out_dtype=None): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] stride : int or a list/tuple of two ints @@ -189,7 +190,7 @@ def conv2d_nchw(Input, Filter, stride, padding, dilation, out_dtype=None): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ if out_dtype is None: @@ -220,12 +221,12 @@ def conv2d_nchw(Input, Filter, stride, padding, dilation, out_dtype=None): pad_before = [0, 0, pad_top, pad_left] pad_after = [0, 0, pad_down, pad_right] temp = pad(Input, pad_before, pad_after, name="pad_temp") - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') - return tvm.compute( + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') + return te.compute( (batch, out_channel, out_height, out_width), - lambda nn, ff, yy, xx: tvm.sum( + lambda nn, ff, yy, xx: te.sum( temp[nn, rc, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w].astype(out_dtype) * Filter[ff, rc, ry, rx].astype(out_dtype), @@ -237,10 +238,10 @@ def conv2d_hwcn(Input, Filter, stride, padding, dilation, out_dtype=None): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [in_height, in_width, in_channel, batch] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [filter_height, filter_width, in_channel, num_filter] stride : int or a list/tuple of two ints @@ -256,7 +257,7 @@ def conv2d_hwcn(Input, Filter, stride, padding, dilation, out_dtype=None): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [out_height, out_width, out_channel, batch] """ if out_dtype is None: @@ -287,12 +288,12 @@ def conv2d_hwcn(Input, Filter, stride, padding, dilation, out_dtype=None): pad_before = [pad_top, pad_left, 0, 0] pad_after = [pad_down, pad_right, 0, 0] PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput") - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') - Output = tvm.compute( + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') + Output = te.compute( (out_height, out_width, out_channel, batch), - lambda yy, xx, ff, nn: tvm.sum( + lambda yy, xx, ff, nn: te.sum( PaddedInput[yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w, rc, nn].astype(out_dtype) * Filter[ry, rx, rc, ff].astype(out_dtype), axis=[ry, rx, rc]), @@ -305,10 +306,10 @@ def conv2d_nhwc(Input, Filter, stride, padding, dilation, out_dtype='float32'): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_height, in_width, in_channel] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [filter_height, filter_width, in_channel, num_filter] stride : int or a list/tuple of two ints @@ -324,7 +325,7 @@ def conv2d_nhwc(Input, Filter, stride, padding, dilation, out_dtype='float32'): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_height, out_width, out_channel] """ assert isinstance(stride, int) or len(stride) == 2 @@ -353,12 +354,12 @@ def conv2d_nhwc(Input, Filter, stride, padding, dilation, out_dtype='float32'): pad_before = [0, pad_top, pad_left, 0] pad_after = [0, pad_down, pad_right, 0] PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput") - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') - Output = tvm.compute( + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') + Output = te.compute( (batch, out_height, out_width, out_channel), - lambda nn, yy, xx, ff: tvm.sum( + lambda nn, yy, xx, ff: te.sum( PaddedInput[nn, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w, rc].astype(out_dtype) * Filter[ry, rx, rc, ff].astype(out_dtype), axis=[ry, rx, rc]), @@ -371,10 +372,10 @@ def conv2d_NCHWc(data, kernel, stride, padding, dilation, layout, out_layout, ou Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 6-D with shape [num_filter_chunk, in_channel_chunk, filter_height, filter_width, in_channel_block, num_filter_block] @@ -401,7 +402,7 @@ def conv2d_NCHWc(data, kernel, stride, padding, dilation, layout, out_layout, ou Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, out_channel_chunk, out_height, out_width, out_channel_block] """ @@ -441,27 +442,27 @@ def conv2d_NCHWc(data, kernel, stride, padding, dilation, layout, out_layout, ou else: data_pad = data - ic = tvm.reduce_axis((0, in_channel), name='ic') - kh = tvm.reduce_axis((0, kernel_height), name='kh') - kw = tvm.reduce_axis((0, kernel_width), name='kw') - - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod - - return tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: - tvm.sum(data_pad[n, - idxdiv(ic, ic_bn), - oh * HSTR + kh * dilation_h, - ow * WSTR + kw * dilation_w, - idxmod(ic, ic_bn)].astype(out_dtype) - * kernel[oc_chunk, - idxdiv(ic, ic_bn), - kh, - kw, - idxmod(ic, ic_bn), - oc_block], - axis=[ic, kh, kw]), - name='conv2d_NCHWc', tag="conv2d_NCHWc") + ic = te.reduce_axis((0, in_channel), name='ic') + kh = te.reduce_axis((0, kernel_height), name='kh') + kw = te.reduce_axis((0, kernel_width), name='kw') + + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod + + return te.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: + te.sum(data_pad[n, + idxdiv(ic, ic_bn), + oh * HSTR + kh * dilation_h, + ow * WSTR + kw * dilation_w, + idxmod(ic, ic_bn)].astype(out_dtype) + * kernel[oc_chunk, + idxdiv(ic, ic_bn), + kh, + kw, + idxmod(ic, ic_bn), + oc_block], + axis=[ic, kh, kw]), + name='conv2d_NCHWc', tag="conv2d_NCHWc") def conv2d_NCHWc_int8(data, kernel, stride, padding, dilation, layout, out_layout, @@ -470,10 +471,10 @@ def conv2d_NCHWc_int8(data, kernel, stride, padding, dilation, layout, out_layou Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block] - kernel : tvm.Tensor + kernel : tvm.te.Tensor 7-D with shape [num_filter_chunk, in_channel_chunk, filter_height, filter_width, in_channel_block/4, num_filter_block, 4] @@ -500,7 +501,7 @@ def conv2d_NCHWc_int8(data, kernel, stride, padding, dilation, layout, out_layou Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, out_channel_chunk, out_height, out_width, out_channel_block] """ @@ -540,53 +541,53 @@ def conv2d_NCHWc_int8(data, kernel, stride, padding, dilation, layout, out_layou else: data_pad = data - ic = tvm.reduce_axis((0, in_channel), name='ic') - kh = tvm.reduce_axis((0, kernel_height), name='kh') - kw = tvm.reduce_axis((0, kernel_width), name='kw') + ic = te.reduce_axis((0, in_channel), name='ic') + kh = te.reduce_axis((0, kernel_height), name='kh') + kw = te.reduce_axis((0, kernel_width), name='kw') if groups == 1: n_elems = 4 - ic_outer = tvm.reduce_axis((0, in_channel//ic_bn), name='ic_outer') - ic_f_inner = tvm.reduce_axis((0, ic_bn//n_elems), name='ic_f_inner') - ic_s_inner = tvm.reduce_axis((0, n_elems), name='ic_s_inner') - return tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: - tvm.sum(data_pad[n, - ic_outer, - oh * HSTR + kh * dilation_h, - ow * WSTR + kw * dilation_w, - ic_f_inner * n_elems + ic_s_inner].astype(out_dtype) - * kernel[oc_chunk, - ic_outer, - kh, - kw, - ic_f_inner, - oc_block, - ic_s_inner].astype(out_dtype), - axis=[kh, kw, ic_outer, ic_f_inner, ic_s_inner]), - name='conv2d_NCHWc_int8', tag="conv2d_NCHWc_int8") + ic_outer = te.reduce_axis((0, in_channel//ic_bn), name='ic_outer') + ic_f_inner = te.reduce_axis((0, ic_bn//n_elems), name='ic_f_inner') + ic_s_inner = te.reduce_axis((0, n_elems), name='ic_s_inner') + return te.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: + te.sum(data_pad[n, + ic_outer, + oh * HSTR + kh * dilation_h, + ow * WSTR + kw * dilation_w, + ic_f_inner * n_elems + ic_s_inner].astype(out_dtype) + * kernel[oc_chunk, + ic_outer, + kh, + kw, + ic_f_inner, + oc_block, + ic_s_inner].astype(out_dtype), + axis=[kh, kw, ic_outer, ic_f_inner, ic_s_inner]), + name='conv2d_NCHWc_int8', tag="conv2d_NCHWc_int8") # for int8 group conv support n_elems = 4 ic_chunk = in_channel//ic_bn - ic_outer = tvm.reduce_axis((0, ic_chunk//groups), name='ic_outer') - ic_f_inner = tvm.reduce_axis((0, ic_bn//n_elems), name='ic_f_inner') - ic_s_inner = tvm.reduce_axis((0, n_elems), name='ic_s_inner') + ic_outer = te.reduce_axis((0, ic_chunk//groups), name='ic_outer') + ic_f_inner = te.reduce_axis((0, ic_bn//n_elems), name='ic_f_inner') + ic_s_inner = te.reduce_axis((0, n_elems), name='ic_s_inner') oshape = (n, oc_chunk, out_height, out_width, oc_bn) - return tvm.compute(oshape, lambda n, occ, oh, ow, oc_block: - tvm.sum(data_pad[n, - (occ * oc_bn // (oc_chunk * oc_bn // groups)) - * (ic_chunk // groups) + ic_outer, - oh * HSTR + kh, - ow * WSTR + kw, - ic_f_inner * n_elems + ic_s_inner].astype(out_dtype) - * kernel[occ, - ic_outer, - kh, - kw, - ic_f_inner, - oc_block, - ic_s_inner].astype(out_dtype), - axis=[kh, kw, ic_outer, ic_f_inner, ic_s_inner]), - name='conv2d_NCHWc_int8', tag="conv2d_NCHWc_int8") + return te.compute(oshape, lambda n, occ, oh, ow, oc_block: + te.sum(data_pad[n, + (occ * oc_bn // (oc_chunk * oc_bn // groups)) + * (ic_chunk // groups) + ic_outer, + oh * HSTR + kh, + ow * WSTR + kw, + ic_f_inner * n_elems + ic_s_inner].astype(out_dtype) + * kernel[occ, + ic_outer, + kh, + kw, + ic_f_inner, + oc_block, + ic_s_inner].astype(out_dtype), + axis=[kh, kw, ic_outer, ic_f_inner, ic_s_inner]), + name='conv2d_NCHWc_int8', tag="conv2d_NCHWc_int8") def conv2d_winograd_weight_transform(kernel, tile_size): @@ -601,7 +602,7 @@ def conv2d_winograd_weight_transform(kernel, tile_size): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [alpha, alpha, CO, CI] """ shape = get_const_tuple(kernel.shape) @@ -613,12 +614,12 @@ def conv2d_winograd_weight_transform(kernel, tile_size): _, _, G = winograd_transform_matrices(tile_size, K, kernel.dtype) - r_kh = tvm.reduce_axis((0, K), name='r_kh') - r_kw = tvm.reduce_axis((0, K), name='r_kw') - return tvm.compute(shape, lambda eps, nu, co, ci: - tvm.sum(kernel[co][ci][r_kh][r_kw] * - G[eps][r_kh] * G[nu][r_kw], - axis=[r_kh, r_kw]), name='transform_weight') + r_kh = te.reduce_axis((0, K), name='r_kh') + r_kw = te.reduce_axis((0, K), name='r_kw') + return te.compute(shape, lambda eps, nu, co, ci: + te.sum(kernel[co][ci][r_kh][r_kw] * + G[eps][r_kh] * G[nu][r_kw], + axis=[r_kh, r_kw]), name='transform_weight') def conv2d_winograd_nnpack_weight_transform(kernel, convolution_algorithm, out_dtype): @@ -631,7 +632,7 @@ def conv2d_winograd_nnpack_weight_transform(kernel, convolution_algorithm, out_d The convolution algorithm for Winograd NNPACK. Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [alpha, alpha, CO, CI] """ # pylint: disable=import-outside-toplevel @@ -645,10 +646,10 @@ def group_conv2d_nchw(Input, Filter, stride, padding, dilation, groups, out_dtyp Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [num_filter, in_channel // groups, filter_height, filter_width] stride : int or a list/tuple of two ints @@ -670,7 +671,7 @@ def group_conv2d_nchw(Input, Filter, stride, padding, dilation, groups, out_dtyp Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ if out_dtype is None: @@ -705,12 +706,12 @@ def group_conv2d_nchw(Input, Filter, stride, padding, dilation, groups, out_dtyp pad_before = [0, 0, pad_top, pad_left] pad_after = [0, 0, pad_down, pad_right] temp = pad(Input, pad_before, pad_after, name="pad_temp") - rc = tvm.reduce_axis((0, in_channel // groups), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') - return tvm.compute( + rc = te.reduce_axis((0, in_channel // groups), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') + return te.compute( (batch, out_channel, out_height, out_width), - lambda nn, ff, yy, xx: tvm.sum( + lambda nn, ff, yy, xx: te.sum( temp[nn, ff // (num_filter//groups) * (in_channel//groups) + rc, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w].astype(out_dtype) * @@ -723,7 +724,7 @@ def unpack_NCHWc_to_nchw(packed_out, out_dtype): Parameters ----------- - packed_out : tvm.Tensor + packed_out : tvm.te.Tensor The output tensor of conv2d_NCHWc. out_dtype : str @@ -731,20 +732,20 @@ def unpack_NCHWc_to_nchw(packed_out, out_dtype): Returns ------- - unpacked_out : tvm.Tensor + unpacked_out : tvm.te.Tensor The unpacked output tensor in NCHW layout. """ n, oc_chunk, oh, ow, oc_bn = get_const_tuple(packed_out.shape) - idxmod = tvm.indexmod - idxdiv = tvm.indexdiv + idxmod = tvm.tir.indexmod + idxdiv = tvm.tir.indexdiv oshape = (n, oc_chunk * oc_bn, oh, ow) unpacked_out = \ - tvm.compute(oshape, - lambda n, c, h, w: - packed_out[n, idxdiv(c, oc_bn), h, w, idxmod(c, oc_bn)] - .astype(out_dtype), - name='output_unpack', - tag=tag.INJECTIVE+",unpack_nchwc") + te.compute(oshape, + lambda n, c, h, w: + packed_out[n, idxdiv(c, oc_bn), h, w, idxmod(c, oc_bn)] + .astype(out_dtype), + name='output_unpack', + tag=tag.INJECTIVE+",unpack_nchwc") return unpacked_out diff --git a/topi/python/topi/nn/conv2d_transpose.py b/topi/python/topi/nn/conv2d_transpose.py index db132fc..3563112 100644 --- a/topi/python/topi/nn/conv2d_transpose.py +++ b/topi/python/topi/nn/conv2d_transpose.py @@ -16,8 +16,8 @@ # under the License. # pylint: disable=invalid-name, unused-variable, unused-argument """Transposed 2D convolution operators (sometimes called Deconvolution).""" -from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import relay from .dilate import dilate from .pad import pad @@ -30,10 +30,10 @@ def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [in_channel, num_filter, filter_height, filter_width] strides : tuple of two ints @@ -47,7 +47,7 @@ def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ return declaration_conv2d_transpose_impl(Input, Filter, strides, padding, out_dtype) @@ -72,9 +72,9 @@ def conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype): [0, 0, bpad_bottom, bpad_right], \ name='data_pad') # transform kernel layout from IOHW to OIHW, and rotate kernel by 180 degrees - kernel_transform = tvm.compute((out_c, in_c, filter_h, filter_w), \ - lambda o, i, h, w: kernel[i][o][filter_h-1-h][filter_w-1-w], \ - name='kernel_transform') + kernel_transform = te.compute((out_c, in_c, filter_h, filter_w), \ + lambda o, i, h, w: kernel[i][o][filter_h-1-h][filter_w-1-w], \ + name='kernel_transform') return data_pad, kernel_transform @@ -90,13 +90,13 @@ def declaration_conv2d_transpose_impl(data, kernel, strides, padding, out_dtype) out_c = simplify(out_c) out_h = simplify(in_h - filter_h + 1) out_w = simplify(in_w - filter_w + 1) - dc = tvm.reduce_axis((0, in_c), name='dc') - dh = tvm.reduce_axis((0, filter_h), name='dh') - dw = tvm.reduce_axis((0, filter_w), name='dw') + dc = te.reduce_axis((0, in_c), name='dc') + dh = te.reduce_axis((0, filter_h), name='dh') + dw = te.reduce_axis((0, filter_w), name='dw') - Output = tvm.compute( + Output = te.compute( (batch, out_c, out_h, out_w), - lambda b, c, h, w: tvm.sum( + lambda b, c, h, w: te.sum( data_pad[b, dc, h+dh, w+dw].astype(out_dtype) * kernel_transform[c, dc, dh, dw].astype(out_dtype), axis=[dc, dh, dw]), tag="conv2d_transpose_nchw") diff --git a/topi/python/topi/nn/conv3d.py b/topi/python/topi/nn/conv3d.py index 88c7c6a..d6bd642 100644 --- a/topi/python/topi/nn/conv3d.py +++ b/topi/python/topi/nn/conv3d.py @@ -17,8 +17,7 @@ # pylint: disable=invalid-name, unused-variable, too-many-locals # pylint: disable=unused-argument, redefined-builtin, no-else-return """Conv3D operators""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from .pad import pad from .util import get_pad_tuple3d @@ -30,10 +29,10 @@ def conv3d_ncdhw(Input, Filter, stride, padding, dilation, out_dtype=None): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 5-D with shape [batch, in_channel, in_depth, in_height, in_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width] stride : int or a list/tuple of three ints @@ -47,7 +46,7 @@ def conv3d_ncdhw(Input, Filter, stride, padding, dilation, out_dtype=None): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 5-D with shape [batch, out_channel, out_depth, out_height, out_width] """ if out_dtype is None: @@ -80,14 +79,14 @@ def conv3d_ncdhw(Input, Filter, stride, padding, dilation, out_dtype=None): pad_before = [0, 0, pad_front, pad_top, pad_left] pad_after = [0, 0, pad_back, pad_down, pad_right] temp = pad(Input, pad_before, pad_after, name="pad_temp") - rc = tvm.reduce_axis((0, in_channel), name='rc') - rz = tvm.reduce_axis((0, kernel_d), name='rz') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') + rc = te.reduce_axis((0, in_channel), name='rc') + rz = te.reduce_axis((0, kernel_d), name='rz') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') - return tvm.compute( + return te.compute( (batch, out_channel, out_depth, out_height, out_width), - lambda nn, ff, zz, yy, xx: tvm.sum( + lambda nn, ff, zz, yy, xx: te.sum( temp[nn, rc, zz * stride_d + rz * dilation_d, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w].astype(out_dtype) * Filter[ff, rc, rz, ry, rx].astype(out_dtype), @@ -99,10 +98,10 @@ def conv3d_ndhwc(Input, Filter, stride, padding, dilation, out_dtype='float32'): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 5-D with shape [batch, in_depth, in_height, in_width, in_channel] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 5-D with shape [filter_depth, filter_height, filter_width, in_channel, num_filter] stride : int or a list/tuple of three ints @@ -116,7 +115,7 @@ def conv3d_ndhwc(Input, Filter, stride, padding, dilation, out_dtype='float32'): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 5-D with shape [batch, out_depth, out_height, out_width, out_channel] """ assert isinstance(stride, int) or len(stride) == 3 @@ -148,13 +147,13 @@ def conv3d_ndhwc(Input, Filter, stride, padding, dilation, out_dtype='float32'): pad_before = [0, pad_front, pad_top, pad_left, 0] pad_after = [0, pad_back, pad_down, pad_right, 0] PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput") - rd = tvm.reduce_axis((0, kernel_d), name='rd') - rh = tvm.reduce_axis((0, kernel_h), name='rh') - rw = tvm.reduce_axis((0, kernel_w), name='rw') - rc = tvm.reduce_axis((0, in_channel), name='rc') - Output = tvm.compute( + rd = te.reduce_axis((0, kernel_d), name='rd') + rh = te.reduce_axis((0, kernel_h), name='rh') + rw = te.reduce_axis((0, kernel_w), name='rw') + rc = te.reduce_axis((0, in_channel), name='rc') + Output = te.compute( (batch, out_depth, out_height, out_width, out_channel), - lambda nn, dd, hh, ww, cc: tvm.sum( + lambda nn, dd, hh, ww, cc: te.sum( PaddedInput[nn, dd * stride_d + rd * dilation_d, hh * stride_h + rh * dilation_h, ww * stride_w + rw * dilation_w, rc].astype(out_dtype) * Filter[rd, rh, rw, rc, cc].astype(out_dtype), axis=[rd, rh, rw, rc]), diff --git a/topi/python/topi/nn/deformable_conv2d.py b/topi/python/topi/nn/deformable_conv2d.py index 251f68a..9f95fd1 100644 --- a/topi/python/topi/nn/deformable_conv2d.py +++ b/topi/python/topi/nn/deformable_conv2d.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name, too-many-locals, too-many-arguments """Deformable Conv2D operators""" import tvm +from tvm import te from .util import get_pad_tuple from ..util import get_const_tuple @@ -30,14 +31,14 @@ def deformable_conv2d_nchw(data, offset, kernel, strides, padding, dilation, def Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - offset : tvm.Tensor + offset : tvm.te.Tensor 4-D with shape [batch, deformable_groups * filter_height * filter_width * 2, out_height, out_width]. - kernel : tvm.Tensor + kernel : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] strides : int or a list/tuple of two ints @@ -57,7 +58,7 @@ def deformable_conv2d_nchw(data, offset, kernel, strides, padding, dilation, def Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ if out_dtype is None: @@ -85,30 +86,30 @@ def deformable_conv2d_nchw(data, offset, kernel, strides, padding, dilation, def dilated_kernel_w = (kernel_w - 1) * dilation_w + 1 pad_top, pad_left, _, _ = get_pad_tuple( padding, (dilated_kernel_h, dilated_kernel_w)) - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') - zero = tvm.const(0.0, data.dtype) + zero = tvm.tir.const(0.0, data.dtype) def _bilinear(n, c, h, w): - outside = tvm.any(h < 0, w < 0, h >= in_height, w >= in_width) + outside = tvm.tir.any(h < 0, w < 0, h >= in_height, w >= in_width) val = bilinear_sample_nchw(data, (n, c, h, w), in_height - 1, in_width - 1) - return tvm.if_then_else(outside, zero, val) + return tvm.tir.if_then_else(outside, zero, val) data_deform = \ - tvm.compute((batch, in_channel, kernel_h, kernel_w, out_height, out_width), - lambda n, c, kh, kw, y, x: - _bilinear(n, c, - y * stride_h - pad_top + kh * dilation_h + - offset[n, c // ic_per_dgroup * (kernel_w*kernel_h*2) + - (kh * kernel_w + kw) * 2, y, x], - x * stride_w - pad_left + kw * dilation_w + - offset[n, c // ic_per_dgroup * (kernel_w*kernel_h*2) + - (kh * kernel_w + kw) * 2 + 1, y, x])) - return tvm.compute( + te.compute((batch, in_channel, kernel_h, kernel_w, out_height, out_width), + lambda n, c, kh, kw, y, x: + _bilinear(n, c, + y * stride_h - pad_top + kh * dilation_h + + offset[n, c // ic_per_dgroup * (kernel_w*kernel_h*2) + + (kh * kernel_w + kw) * 2, y, x], + x * stride_w - pad_left + kw * dilation_w + + offset[n, c // ic_per_dgroup * (kernel_w*kernel_h*2) + + (kh * kernel_w + kw) * 2 + 1, y, x])) + return te.compute( (batch, out_channel, out_height, out_width), - lambda n, f, y, x: tvm.sum( + lambda n, f, y, x: te.sum( data_deform[n, rc, ry, rx, y, x].astype(out_dtype) * kernel[f, rc, ry, rx].astype(out_dtype), axis=[rc, ry, rx]), tag="deformable_conv2d_nchw") diff --git a/topi/python/topi/nn/dense.py b/topi/python/topi/nn/dense.py index fe21e74..7d7ef6c 100644 --- a/topi/python/topi/nn/dense.py +++ b/topi/python/topi/nn/dense.py @@ -15,8 +15,7 @@ # specific language governing permissions and limitations # under the License. """TVM operator fully connected compute.""" -from __future__ import absolute_import -import tvm +from tvm import te from .. import tag def dense(data, weight, bias=None, out_dtype=None): @@ -24,13 +23,13 @@ def dense(data, weight, bias=None, out_dtype=None): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [batch, in_dim] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [out_dim, in_dim] - bias : tvm.Tensor, optional + bias : tvm.te.Tensor, optional 1-D with shape [out_dim] out_dtype : str @@ -38,7 +37,7 @@ def dense(data, weight, bias=None, out_dtype=None): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim] """ assert len(data.shape) == 2 and len(weight.shape) == 2, \ @@ -49,13 +48,13 @@ def dense(data, weight, bias=None, out_dtype=None): out_dtype = data.dtype batch, in_dim = data.shape out_dim, _ = weight.shape - k = tvm.reduce_axis((0, in_dim), name='k') - matmul = tvm.compute((batch, out_dim), \ - lambda i, j: tvm.sum(data[i, k].astype(out_dtype) * \ - weight[j, k].astype(out_dtype), axis=k), \ - name='T_dense', tag='dense') + k = te.reduce_axis((0, in_dim), name='k') + matmul = te.compute((batch, out_dim), \ + lambda i, j: te.sum(data[i, k].astype(out_dtype) * \ + weight[j, k].astype(out_dtype), axis=k), \ + name='T_dense', tag='dense') if bias is not None: - matmul = tvm.compute((batch, out_dim), \ - lambda i, j: matmul[i, j] + bias[j].astype(out_dtype), \ - tag=tag.BROADCAST) + matmul = te.compute((batch, out_dim), \ + lambda i, j: matmul[i, j] + bias[j].astype(out_dtype), \ + tag=tag.BROADCAST) return matmul diff --git a/topi/python/topi/nn/depth_to_space.py b/topi/python/topi/nn/depth_to_space.py index d847c08..a9fbfea 100644 --- a/topi/python/topi/nn/depth_to_space.py +++ b/topi/python/topi/nn/depth_to_space.py @@ -18,6 +18,7 @@ """TVM operator depth_to_space compute.""" from __future__ import absolute_import import tvm +from tvm import te from .. import tag @@ -26,7 +27,7 @@ def depth_to_space(data, block_size, layout='NCHW', mode='DCR'): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D tensor in either NCHW or NHWC layout. block_size : int @@ -42,17 +43,17 @@ def depth_to_space(data, block_size, layout='NCHW', mode='DCR'): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor Output of shape [N, C / block_size**2, H * block_size, W * block_size] """ if layout == 'NCHW': in_n, in_c, in_h, in_w = data.shape - channel_factor = tvm.truncdiv(in_c, (block_size * block_size)) + channel_factor = tvm.tir.truncdiv(in_c, (block_size * block_size)) output_shape = [in_n, channel_factor, in_h * block_size, in_w * block_size] elif layout == 'NHWC': in_n, in_h, in_w, in_c = data.shape - channel_factor = tvm.truncdiv(in_c, (block_size * block_size)) + channel_factor = tvm.tir.truncdiv(in_c, (block_size * block_size)) output_shape = [in_n, in_h * block_size, in_w * block_size, channel_factor] else: @@ -66,10 +67,10 @@ def depth_to_space(data, block_size, layout='NCHW', mode='DCR'): return n, c, y, x def _get_pixel(n, c, y, x): - block_x = tvm.truncdiv(x, block_size) - block_y = tvm.truncdiv(y, block_size) - idx_x = tvm.truncmod(x, block_size) - idx_y = tvm.truncmod(y, block_size) + block_x = tvm.tir.truncdiv(x, block_size) + block_y = tvm.tir.truncdiv(y, block_size) + idx_x = tvm.tir.truncmod(x, block_size) + idx_y = tvm.tir.truncmod(y, block_size) if mode == "DCR": channel_idx = channel_factor * ((block_size * idx_y) + idx_x) + c else: @@ -85,4 +86,4 @@ def depth_to_space(data, block_size, layout='NCHW', mode='DCR'): n, c, y, x = _get_indices(*indices) return _get_pixel(n, c, y, x) - return tvm.compute(output_shape, _compute, name='depth_to_space', tag=tag.INJECTIVE) + return te.compute(output_shape, _compute, name='depth_to_space', tag=tag.INJECTIVE) diff --git a/topi/python/topi/nn/depthwise_conv2d.py b/topi/python/topi/nn/depthwise_conv2d.py index 49aaace..32a9258 100644 --- a/topi/python/topi/nn/depthwise_conv2d.py +++ b/topi/python/topi/nn/depthwise_conv2d.py @@ -19,6 +19,7 @@ from __future__ import absolute_import as _abs from collections import namedtuple import tvm +from tvm import te from .dilate import dilate from .pad import pad @@ -52,10 +53,10 @@ def depthwise_conv2d_nchw(Input, Filter, stride, padding, dilation, out_dtype=No Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [in_channel, channel_multiplier, filter_height, filter_width] stride : tuple of two ints @@ -72,7 +73,7 @@ def depthwise_conv2d_nchw(Input, Filter, stride, padding, dilation, out_dtype=No Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ out_dtype = Input.dtype if out_dtype is None else out_dtype @@ -104,13 +105,13 @@ def depthwise_conv2d_nchw(Input, Filter, stride, padding, dilation, out_dtype=No pad_after = [0, 0, pad_down, pad_right] PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput") # depthconv stage - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod - di = tvm.reduce_axis((0, filter_height), name='di') - dj = tvm.reduce_axis((0, filter_width), name='dj') - Output = tvm.compute( + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod + di = te.reduce_axis((0, filter_height), name='di') + dj = te.reduce_axis((0, filter_width), name='dj') + Output = te.compute( (batch, out_channel, out_height, out_width), - lambda b, c, i, j: tvm.sum( + lambda b, c, i, j: te.sum( (PaddedInput[b, idxdiv(c, channel_multiplier), i*stride_h+di*dilation_h, j*stride_w+dj*dilation_w].astype(out_dtype) * Filter[idxdiv(c, channel_multiplier), @@ -125,10 +126,10 @@ def depthwise_conv2d_nhwc(Input, Filter, stride, padding, dilation, out_dtype=No Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_height, in_width, in_channel] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [filter_height, filter_width, in_channel, channel_multiplier] stride : tuple of two ints @@ -145,7 +146,7 @@ def depthwise_conv2d_nhwc(Input, Filter, stride, padding, dilation, out_dtype=No Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_height, out_width, out_channel] """ out_dtype = Input.dtype if out_dtype is None else out_dtype @@ -177,14 +178,14 @@ def depthwise_conv2d_nhwc(Input, Filter, stride, padding, dilation, out_dtype=No pad_after = [0, pad_down, pad_right, 0] PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput") # depthconv stage - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod - di = tvm.reduce_axis((0, filter_height), name='di') - dj = tvm.reduce_axis((0, filter_width), name='dj') - Output = tvm.compute( + di = te.reduce_axis((0, filter_height), name='di') + dj = te.reduce_axis((0, filter_width), name='dj') + Output = te.compute( (batch, out_height, out_width, out_channel), - lambda b, i, j, c: tvm.sum( + lambda b, i, j, c: te.sum( (PaddedInput[b, i*stride_h + di*dilation_h, j*stride_w + dj*dilation_w, idxdiv(c, channel_multiplier)].astype(out_dtype) * Filter[di, dj, @@ -199,10 +200,10 @@ def depthwise_conv2d_backward_input_nhwc(Filter, Out_grad, oshape, ishape, strid Parameters ---------- - Filter : tvm.Tensor + Filter : tvm.te.Tensor 4-D with shape [filter_height, filter_width, in_channel, channel_multiplier] - Out_grad : tvm.Tensor + Out_grad : tvm.te.Tensor 4-D with shape [batch, out_height, out_width, out_channel] stride : tuple of two ints @@ -213,7 +214,7 @@ def depthwise_conv2d_backward_input_nhwc(Filter, Out_grad, oshape, ishape, strid Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, in_height, in_width, in_channel] """ batch, in_h, in_w, in_c = ishape @@ -235,19 +236,19 @@ def depthwise_conv2d_backward_input_nhwc(Filter, Out_grad, oshape, ishape, strid bpad_right = (filter_w - 1 - fpad_right) + (stride_w - 1) padded_out_grad = pad(dilated_out_grad, \ - [0, bpad_top, bpad_left, 0], \ - [0, bpad_bottom, bpad_right, 0], \ - name='padded_out_grad') + [0, bpad_top, bpad_left, 0], \ + [0, bpad_bottom, bpad_right, 0], \ + name='padded_out_grad') - dh = tvm.reduce_axis((0, filter_h), name='dh') - dw = tvm.reduce_axis((0, filter_w), name='dw') - dc = tvm.reduce_axis((0, channel_multiplier), name='dc') + dh = te.reduce_axis((0, filter_h), name='dh') + dw = te.reduce_axis((0, filter_w), name='dw') + dc = te.reduce_axis((0, channel_multiplier), name='dc') - In_grad = tvm.compute( + In_grad = te.compute( (batch, in_h, in_w, in_c), - lambda b, h, w, c: tvm.sum(padded_out_grad[b, h+dh, w+dw, c*channel_multiplier + dc] * \ - Filter[filter_h-1-dh, filter_w-1-dw, c, dc], - axis=[dh, dw, dc]), tag='depthwise_conv2d_backward_input_nhwc') + lambda b, h, w, c: te.sum(padded_out_grad[b, h+dh, w+dw, c*channel_multiplier + dc] * \ + Filter[filter_h-1-dh, filter_w-1-dw, c, dc], + axis=[dh, dw, dc]), tag='depthwise_conv2d_backward_input_nhwc') return In_grad @@ -257,10 +258,10 @@ def depthwise_conv2d_backward_weight_nhwc(Input, Out_grad, oshape, fshape, strid Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_height, in_width, in_channel] - Out_grad : tvm.Tensor + Out_grad : tvm.te.Tensor 4-D with shape [batch, out_height, out_width, out_channel] stride : tuple of two ints @@ -271,7 +272,7 @@ def depthwise_conv2d_backward_weight_nhwc(Input, Out_grad, oshape, fshape, strid Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [filter_height, filter_width, in_channel, channel_multiplier] """ batch, out_h, out_w, out_c = oshape @@ -285,19 +286,19 @@ def depthwise_conv2d_backward_weight_nhwc(Input, Out_grad, oshape, fshape, strid pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(padding, (filter_h, filter_w)) padded_in = pad(Input, \ - [0, pad_top, pad_left, 0], \ - [0, pad_bottom, pad_right, 0], \ - name='padded_in') + [0, pad_top, pad_left, 0], \ + [0, pad_bottom, pad_right, 0], \ + name='padded_in') - dh = tvm.reduce_axis((0, Out_grad.shape[1].value), name='dh') - dw = tvm.reduce_axis((0, Out_grad.shape[2].value), name='dw') - db = tvm.reduce_axis((0, batch), name='db') - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + dh = te.reduce_axis((0, Out_grad.shape[1].value), name='dh') + dw = te.reduce_axis((0, Out_grad.shape[2].value), name='dw') + db = te.reduce_axis((0, batch), name='db') + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod - Weight_grad = tvm.compute( + Weight_grad = te.compute( (filter_h, filter_w, in_c, channel_multiplier), - lambda fh, fw, c, m: tvm.sum( + lambda fh, fw, c, m: te.sum( Out_grad[db, dh, dw, c*channel_multiplier+idxmod(m, channel_multiplier)] * padded_in[db, fh+dh*stride_h, fw+dw*stride_w, c], axis=[db, dh, dw]), tag='depthwise_conv2d_backward_weight_nhwc') @@ -311,10 +312,10 @@ def depthwise_conv2d_NCHWc(Input, Filter, stride, padding, dilation, Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 5-D with shape [batch, in_channel_chunk, in_height, in_width, in_channel_block] - Filter : tvm.Tensor + Filter : tvm.te.Tensor 6-D with shape [out_channel_chunk, 1, filter_height, filter_width, 1, out_channel_block] In NCHWc depthwise convolution, we group kernel's in_channel and channel_multiplier together then do the tiling. @@ -339,7 +340,7 @@ def depthwise_conv2d_NCHWc(Input, Filter, stride, padding, dilation, Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 5-D with shape [batch, out_channel_chunk, out_height, out_width, out_channel_block] """ raise ValueError("missing register for topi.nn.depthwise_conv2d_NCHWc") diff --git a/topi/python/topi/nn/dilate.py b/topi/python/topi/nn/dilate.py index d952453..f628fad 100644 --- a/topi/python/topi/nn/dilate.py +++ b/topi/python/topi/nn/dilate.py @@ -16,18 +16,18 @@ # under the License. # pylint: disable=invalid-name """Dilation operators""" -from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import util from .. import tag -@tvm.tag_scope(tag=tag.INJECTIVE+",dilate") +@te.tag_scope(tag=tag.INJECTIVE+",dilate") def dilate(data, strides, name="DilatedInput"): """Dilate data with zeros. Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D, can be any layout. strides : list / tuple of n ints @@ -38,7 +38,7 @@ def dilate(data, strides, name="DilatedInput"): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor n-D, the same layout as data. """ n = len(data.shape) @@ -47,13 +47,13 @@ def dilate(data, strides, name="DilatedInput"): n, len(strides))) out_shape = tuple( - tvm.ir_pass.Simplify((data.shape[i] - 1) * strides[i] + 1) for i in range(n)) + tvm.tir.ir_pass.Simplify((data.shape[i] - 1) * strides[i] + 1) for i in range(n)) def _dilate(*indices): not_zero = [] index_tuple = [] - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod for i in range(n): if not util.equal_const_int(strides[i], 1): index_tuple.append(idxdiv(indices[i], strides[i])) @@ -61,8 +61,9 @@ def dilate(data, strides, name="DilatedInput"): else: index_tuple.append(indices[i]) if not_zero: - not_zero = tvm.all(*not_zero) - return tvm.if_then_else(not_zero, data(*index_tuple), tvm.const(0.0, data.dtype)) + not_zero = tvm.tir.all(*not_zero) + return tvm.tir.if_then_else( + not_zero, data(*index_tuple), tvm.tir.const(0.0, data.dtype)) return data(*index_tuple) - return tvm.compute(out_shape, _dilate, name=name) + return te.compute(out_shape, _dilate, name=name) diff --git a/topi/python/topi/nn/elemwise.py b/topi/python/topi/nn/elemwise.py index e9f3019..1315a48 100644 --- a/topi/python/topi/nn/elemwise.py +++ b/topi/python/topi/nn/elemwise.py @@ -17,33 +17,34 @@ """Elementwise operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import tag from ..util import get_const_int -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def relu(x): """Take relu of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ - return tvm.compute(x.shape, lambda *i: tvm.max(x(*i), tvm.const(0, x.dtype))) + return te.compute(x.shape, lambda *i: tvm.te.max(x(*i), tvm.tir.const(0, x.dtype))) -@tvm.tag_scope(tag=tag.ELEMWISE) +@tvm.te.tag_scope(tag=tag.ELEMWISE) def leaky_relu(x, alpha): """Take leaky relu of input x. Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. alpha : float @@ -51,16 +52,16 @@ def leaky_relu(x, alpha): Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ def _compute(*indices): value = x(*indices) - calpha = tvm.const(alpha, value.dtype) - return tvm.expr.Select(value > 0, value, value * calpha) - return tvm.compute(x.shape, _compute) + calpha = tvm.tir.const(alpha, value.dtype) + return tvm.tir.Select(value > 0, value, value * calpha) + return te.compute(x.shape, _compute) -@tvm.tag_scope(tag=tag.BROADCAST) +@tvm.te.tag_scope(tag=tag.BROADCAST) def prelu(x, slope, axis=1): """ PReLU. It accepts two arguments: an input ``x`` and a weight array ``W`` @@ -68,17 +69,17 @@ def prelu(x, slope, axis=1): where :math:`*` is an elementwise multiplication for each sample in the batch. Arguments: - x : tvm.Tensor + x : tvm.te.Tensor Input argument. - slope : tvm.Tensor + slope : tvm.te.Tensor Channelised slope tensor for prelu axis : int The axis where the channel data needs to be applied Returns: - y : tvm.Tensor + y : tvm.te.Tensor The result. Links: @@ -91,5 +92,5 @@ def prelu(x, slope, axis=1): def _compute_channelwise(*indices): xval = x(*indices) - return tvm.expr.Select(xval > 0, xval, xval * slope(indices[axis])) - return tvm.compute(x.shape, _compute_channelwise) + return tvm.tir.Select(xval > 0, xval, xval * slope(indices[axis])) + return te.compute(x.shape, _compute_channelwise) diff --git a/topi/python/topi/nn/fifo_buffer.py b/topi/python/topi/nn/fifo_buffer.py index 946b8d1..de283e0 100644 --- a/topi/python/topi/nn/fifo_buffer.py +++ b/topi/python/topi/nn/fifo_buffer.py @@ -18,10 +18,11 @@ """FIFO buffer op""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import tag from ..transform import concatenate, strided_slice -@tvm.tag_scope(tag=tag.INJECTIVE+",fifo_buffer") +@tvm.te.tag_scope(tag=tag.INJECTIVE+",fifo_buffer") def fifo_buffer(data, buffer, axis): """ FIFO buffer to enable computation reuse in CNNs with sliding indow input @@ -42,16 +43,16 @@ def fifo_buffer(data, buffer, axis): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input data - buffer : tvm.Tensor + buffer : tvm.te.Tensor Previous value of the FIFO buffer axis : int Specify which axis should be used for buffering Returns ------- - result : tvm.Tensor + result : tvm.te.Tensor Updated value for the buffer """ assert len(data.shape) == len(buffer.shape), \ @@ -70,80 +71,80 @@ def fifo_buffer(data, buffer, axis): # Explicitly write out formula up to 4D, and then use concat+slice combo for 5D and higher if len(buffer.shape) == 1: - return tvm.compute(buffer.shape, - lambda i: - tvm.if_then_else(i < buflen - data_size, - buffer[i + data_size], - data[i - buflen + data_size]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i: + tvm.tir.if_then_else(i < buflen - data_size, + buffer[i + data_size], + data[i - buflen + data_size]), + name='new_buffer') if len(buffer.shape) == 2: if axis == 0: - return tvm.compute(buffer.shape, - lambda i, j: - tvm.if_then_else(i < buflen - data_size, - buffer[i + data_size, j], - data[i - buflen + data_size, j]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j: + tvm.tir.if_then_else(i < buflen - data_size, + buffer[i + data_size, j], + data[i - buflen + data_size, j]), + name='new_buffer') if axis == 1: - return tvm.compute(buffer.shape, - lambda i, j: - tvm.if_then_else(j < buflen - data_size, - buffer[i, j + data_size], - data[i, j - buflen + data_size]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j: + tvm.tir.if_then_else(j < buflen - data_size, + buffer[i, j + data_size], + data[i, j - buflen + data_size]), + name='new_buffer') assert False, 'Invalid value for axis; it should be at most {}'.format(len(buffer.shape)) elif len(buffer.shape) == 3: if axis == 0: - return tvm.compute(buffer.shape, - lambda i, j, k: - tvm.if_then_else(i < buflen - data_size, - buffer[i + data_size, j, k], - data[i - buflen + data_size, j, k]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j, k: + tvm.tir.if_then_else(i < buflen - data_size, + buffer[i + data_size, j, k], + data[i - buflen + data_size, j, k]), + name='new_buffer') if axis == 1: - return tvm.compute(buffer.shape, - lambda i, j, k: - tvm.if_then_else(j < buflen - data_size, - buffer[i, j + data_size, k], - data[i, j - buflen + data_size, k]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j, k: + tvm.tir.if_then_else(j < buflen - data_size, + buffer[i, j + data_size, k], + data[i, j - buflen + data_size, k]), + name='new_buffer') if axis == 2: - return tvm.compute(buffer.shape, - lambda i, j, k: - tvm.if_then_else(k < buflen - data_size, - buffer[i, j, k + data_size], - data[i, j, k - buflen + data_size]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j, k: + tvm.tir.if_then_else(k < buflen - data_size, + buffer[i, j, k + data_size], + data[i, j, k - buflen + data_size]), + name='new_buffer') assert False, 'Invalid value for axis; it should be at most {}'.format(len(buffer.shape)) elif len(buffer.shape) == 4: if axis == 0: - return tvm.compute(buffer.shape, - lambda i, j, k, l: - tvm.if_then_else(i < buflen - data_size, - buffer[i + data_size, j, k, l], - data[i - buflen + data_size, j, k, l]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j, k, l: + tvm.tir.if_then_else(i < buflen - data_size, + buffer[i + data_size, j, k, l], + data[i - buflen + data_size, j, k, l]), + name='new_buffer') if axis == 1: - return tvm.compute(buffer.shape, - lambda i, j, k, l: - tvm.if_then_else(j < buflen - data_size, - buffer[i, j + data_size, k, l], - data[i, j - buflen + data_size, k, l]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j, k, l: + tvm.tir.if_then_else(j < buflen - data_size, + buffer[i, j + data_size, k, l], + data[i, j - buflen + data_size, k, l]), + name='new_buffer') if axis == 2: - return tvm.compute(buffer.shape, - lambda i, j, k, l: - tvm.if_then_else(k < buflen - data_size, - buffer[i, j, k + data_size, l], - data[i, j, k - buflen + data_size, l]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j, k, l: + tvm.tir.if_then_else(k < buflen - data_size, + buffer[i, j, k + data_size, l], + data[i, j, k - buflen + data_size, l]), + name='new_buffer') if axis == 3: - return tvm.compute(buffer.shape, - lambda i, j, k, l: - tvm.if_then_else(l < buflen - data_size, - buffer[i, j, k, l + data_size], - data[i, j, k, l - buflen + data_size]), - name='new_buffer') + return te.compute(buffer.shape, + lambda i, j, k, l: + tvm.tir.if_then_else(l < buflen - data_size, + buffer[i, j, k, l + data_size], + data[i, j, k, l - buflen + data_size]), + name='new_buffer') assert False, 'Invalid value for axis; it should be at most {}'.format(len(buffer.shape)) else: # Implement FIFO buffer as combination of concat and slice diff --git a/topi/python/topi/nn/flatten.py b/topi/python/topi/nn/flatten.py index dba9b7c..11fe0d8 100644 --- a/topi/python/topi/nn/flatten.py +++ b/topi/python/topi/nn/flatten.py @@ -17,20 +17,21 @@ """TVM operator flatten compute.""" from __future__ import absolute_import import tvm +from tvm import te from .. import tag -@tvm.tag_scope(tag=tag.INJECTIVE) +@tvm.te.tag_scope(tag=tag.INJECTIVE) def flatten(data): """Flattens the input array into a 2-D array by collapsing the higher dimensions. Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor Input array. Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D array with collapsed higher dimensions. """ ishape = data.shape @@ -38,8 +39,8 @@ def flatten(data): for i in range(1, len(ishape)): dim = dim * ishape[i] oshape = [ishape[0], dim] - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod def unwrap(idx, shape): index = [] @@ -48,4 +49,4 @@ def flatten(data): idx = idxdiv(idx, s) return list(reversed(index)) - return tvm.compute(oshape, lambda i, j: data(i, *unwrap(j, ishape[1:]))) + return te.compute(oshape, lambda i, j: data(i, *unwrap(j, ishape[1:]))) diff --git a/topi/python/topi/nn/local_response_norm.py b/topi/python/topi/nn/local_response_norm.py index 1b41c7d..35c76d2 100644 --- a/topi/python/topi/nn/local_response_norm.py +++ b/topi/python/topi/nn/local_response_norm.py @@ -31,7 +31,7 @@ def lrn(data, size, axis=1, alpha=0.0001, beta=0.75, bias=2): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, channel, height, width] size : int @@ -52,7 +52,7 @@ def lrn(data, size, axis=1, alpha=0.0001, beta=0.75, bias=2): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D output with same shape """ return cpp.nn.lrn(data, size, axis, alpha, beta, bias) diff --git a/topi/python/topi/nn/mapping.py b/topi/python/topi/nn/mapping.py index b2222bd..12558a8 100644 --- a/topi/python/topi/nn/mapping.py +++ b/topi/python/topi/nn/mapping.py @@ -18,49 +18,50 @@ """Operators of one-to-one-mapping on the first input""" from __future__ import absolute_import as _abs import tvm +from tvm import te from .. import tag -@tvm.tag_scope(tag=tag.BROADCAST) +@tvm.te.tag_scope(tag=tag.BROADCAST) def scale_shift_nchw(Input, Scale, Shift): """Batch normalization operator in inference. Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor Input tensor, layout is NCHW - Scale : tvm.Tensor + Scale : tvm.te.Tensor Scale tensor, 1-D of size channel number - Shift : tvm.Tensor + Shift : tvm.te.Tensor Shift tensor, 1-D of size channel number Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor Output tensor, layout is NCHW """ - return tvm.compute(Input.shape, lambda b, c, i, j: Input[b, c, i, j] * Scale[c] + Shift[c], name='ScaleShift') + return te.compute(Input.shape, lambda b, c, i, j: Input[b, c, i, j] * Scale[c] + Shift[c], name='ScaleShift') -@tvm.tag_scope(tag=tag.BROADCAST) +@tvm.te.tag_scope(tag=tag.BROADCAST) def scale_shift_nhwc(Input, Scale, Shift): """Batch normalization operator in inference. Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor Input tensor, layout is NHWC - Scale : tvm.Tensor + Scale : tvm.te.Tensor Scale tensor, 1-D of size channel number - Shift : tvm.Tensor + Shift : tvm.te.Tensor Shift tensor, 1-D of size channel number Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor Output tensor, layout is NHWC """ - return tvm.compute(Input.shape, lambda b, i, j, c: Input[b, i, j, c] * Scale[c] + Shift[c], name='ScaleShift') + return te.compute(Input.shape, lambda b, i, j, c: Input[b, i, j, c] * Scale[c] + Shift[c], name='ScaleShift') diff --git a/topi/python/topi/nn/pad.py b/topi/python/topi/nn/pad.py index 13f8e72..8fe5337 100644 --- a/topi/python/topi/nn/pad.py +++ b/topi/python/topi/nn/pad.py @@ -17,16 +17,17 @@ """Pad the data by constant value """ from __future__ import absolute_import as _abs import tvm +from tvm import te from ..util import equal_const_int from .. import tag -@tvm.tag_scope(tag=tag.INJECTIVE+",pad") +@tvm.te.tag_scope(tag=tag.INJECTIVE+",pad") def pad(data, pad_before, pad_after=None, pad_value=0.0, name="PadInput"): """Pad Input with zeros. Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D input, can be any layout. pad_before : list / tuple of n ints @@ -43,7 +44,7 @@ def pad(data, pad_before, pad_after=None, pad_value=0.0, name="PadInput"): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor n-D, the same layout as Input. """ n = len(data.shape) @@ -55,10 +56,10 @@ def pad(data, pad_before, pad_after=None, pad_value=0.0, name="PadInput"): raise ValueError("Input dimension and pad_after dismatch : %d vs %d" % ( n, len(pad_before))) out_shape = tuple( - tvm.ir_pass.Simplify( + tvm.tir.ir_pass.Simplify( (data.shape[i] + pad_before[i] + pad_after[i])) for i in range(n)) - pad_value = (pad_value if isinstance(pad_value, tvm.expr.PrimExpr) - else tvm.const(pad_value, data.dtype)) + pad_value = (pad_value if isinstance(pad_value, tvm.tir.PrimExpr) + else tvm.tir.const(pad_value, data.dtype)) def _pad(*indices): not_zero = [] index_tuple = [] @@ -70,13 +71,13 @@ def pad(data, pad_before, pad_after=None, pad_value=0.0, name="PadInput"): not_zero.append(indices[i] >= pad_before[i]) not_zero.append(indices[i] < data.shape[i] + pad_before[i]) if not_zero: - not_zero = tvm.all(*not_zero) - return tvm.if_then_else(not_zero, data(*index_tuple), pad_value) + not_zero = tvm.tir.all(*not_zero) + return tvm.tir.if_then_else(not_zero, data(*index_tuple), pad_value) return data(*index_tuple) - return tvm.compute(out_shape, _pad, name=name) + return te.compute(out_shape, _pad, name=name) -@tvm.tag_scope(tag=tag.INJECTIVE + ",pad") +@tvm.te.tag_scope(tag=tag.INJECTIVE + ",pad") def mirror_pad(data, pad_before, pad_after=None, @@ -86,7 +87,7 @@ def mirror_pad(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D input, can be any layout. pad_before : list / tuple of n ints @@ -103,7 +104,7 @@ def mirror_pad(data, Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor n-D, the same layout as Input. """ n = len(data.shape) @@ -115,7 +116,7 @@ def mirror_pad(data, raise ValueError("Input dimension and pad_after dismatch : %d vs %d" % (n, len(pad_before))) out_shape = tuple( - tvm.ir_pass.Simplify((data.shape[i] + pad_before[i] + pad_after[i])) + tvm.tir.ir_pass.Simplify((data.shape[i] + pad_before[i] + pad_after[i])) for i in range(n)) assert mode in ('SYMMETRIC', 'REFLECT') mode = int(mode == 'SYMMETRIC') @@ -136,10 +137,10 @@ def mirror_pad(data, below.append(indices[i] < pad_before[i]) mapped_tuple = [] for i, axis in enumerate(index_tuple): - mapped_axis = tvm.if_then_else(below[i], -axis - mode, axis) - mapped_axis = tvm.if_then_else( + mapped_axis = tvm.tir.if_then_else(below[i], -axis - mode, axis) + mapped_axis = tvm.tir.if_then_else( above[i], (2 * (data.shape[i] - 1)) - axis + mode, mapped_axis) mapped_tuple.append(mapped_axis) return data(*mapped_tuple) - return tvm.compute(out_shape, _pad, name=name) + return te.compute(out_shape, _pad, name=name) diff --git a/topi/python/topi/nn/pooling.py b/topi/python/topi/nn/pooling.py index 5fd2ded..e3d57ce 100644 --- a/topi/python/topi/nn/pooling.py +++ b/topi/python/topi/nn/pooling.py @@ -34,7 +34,7 @@ def global_pool(data, pool_type, layout="NCHW"): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D with shape of layout pool_type : str @@ -51,7 +51,7 @@ def global_pool(data, pool_type, layout="NCHW"): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor n-D in same layout with height and width dimension size of 1. e.g., for NCHW, the output shape will be [batch, channel, 1, 1] """ @@ -76,7 +76,7 @@ def pool(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D with shape of layout kernel : list/tuple of two ints @@ -108,7 +108,7 @@ def pool(data, Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor n-D in the same layout """ return cpp.nn.pool(data, kernel, stride, padding, @@ -133,10 +133,10 @@ def pool_grad(grads, Parameters ---------- - grads : tvm.Tensor + grads : tvm.te.Tensor n-D with shape of layout - data : tvm.Tensor + data : tvm.te.Tensor n-D with shape of layout kernel : list/tuple of two ints @@ -168,7 +168,7 @@ def pool_grad(grads, Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor n-D in the same layout """ return cpp.nn.pool_grad(grads, data, kernel, @@ -192,7 +192,7 @@ def adaptive_pool(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D with shape of layout output_size : tuple of int @@ -212,7 +212,7 @@ def adaptive_pool(data, Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor n-D in the same layout """ return cpp.nn.adaptive_pool(data, output_size, POOL_TYPE_CODE[pool_type], layout) @@ -236,7 +236,7 @@ def pool1d(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D with shape of layout kernel : list/tuple of one int or int @@ -268,7 +268,7 @@ def pool1d(data, Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor n-D in the same layout """ if isinstance(kernel, int): @@ -297,7 +297,7 @@ def pool3d(data, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor n-D with shape of layout kernel : list/tuple of three ints @@ -329,7 +329,7 @@ def pool3d(data, Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor n-D in the same layout """ return cpp.nn.pool3d(data, kernel, stride, padding, diff --git a/topi/python/topi/nn/softmax.py b/topi/python/topi/nn/softmax.py index 16ffd79..c414372 100644 --- a/topi/python/topi/nn/softmax.py +++ b/topi/python/topi/nn/softmax.py @@ -18,14 +18,15 @@ """TVM operator for softmax and log_softmax compute.""" from __future__ import absolute_import import tvm +from tvm import te -@tvm.tag_scope(tag='softmax_output') +@tvm.te.tag_scope(tag='softmax_output') def softmax(x, axis=-1): """Perform softmax activation on the data Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor can be any dimension axis : int @@ -33,7 +34,7 @@ def softmax(x, axis=-1): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor output shape is the same as input """ shape = x.shape @@ -42,8 +43,8 @@ def softmax(x, axis=-1): if axis >= len(shape): ValueError("axis parameter should be less than input dim") - k1 = tvm.reduce_axis((0, shape[axis]), name='k') - k2 = tvm.reduce_axis((0, shape[axis]), name='k') + k1 = te.reduce_axis((0, shape[axis]), name='k') + k2 = te.reduce_axis((0, shape[axis]), name='k') def insert_reduce_index(indices, reduce_index): return indices[:axis] + (reduce_index,) + indices[axis:] @@ -53,51 +54,51 @@ def softmax(x, axis=-1): def _compute_max(*indices): eval_range = insert_reduce_index(indices, k1) - return tvm.max(x[eval_range], axis=k1) + return tvm.te.max(x[eval_range], axis=k1) def _compute_exp(max_elem, *indices): non_reduce_indices = get_non_reduce_indices(indices) - return tvm.exp(x[indices] - max_elem[non_reduce_indices]) + return te.exp(x[indices] - max_elem[non_reduce_indices]) def _compute_expsum(exp, *indices): eval_range = insert_reduce_index(indices, k2) - return tvm.sum(exp[eval_range], axis=k2) + return te.sum(exp[eval_range], axis=k2) def _normalize(exp, expsum, *indices): non_reduce_indices = get_non_reduce_indices(indices) return exp[indices] / expsum[non_reduce_indices] reduced_shape = tuple([dim for (i, dim) in enumerate(shape) if i != axis]) - max_elem = tvm.compute(reduced_shape, _compute_max, name='T_softmax_maxelem') - exp = tvm.compute(shape, lambda *indices: _compute_exp(max_elem, *indices), - name='T_softmax_exp') - expsum = tvm.compute(reduced_shape, lambda *indices: _compute_expsum(exp, *indices), - name='T_softmax_expsum') - return tvm.compute(shape, lambda *indices: _normalize(exp, expsum, *indices), - name='T_softmax_norm', attrs={"axis" : axis}) + max_elem = te.compute(reduced_shape, _compute_max, name='T_softmax_maxelem') + exp = te.compute(shape, lambda *indices: _compute_exp(max_elem, *indices), + name='T_softmax_exp') + expsum = te.compute(reduced_shape, lambda *indices: _compute_expsum(exp, *indices), + name='T_softmax_expsum') + return te.compute(shape, lambda *indices: _normalize(exp, expsum, *indices), + name='T_softmax_norm', attrs={"axis" : axis}) -@tvm.tag_scope(tag='log_softmax_output') +@tvm.te.tag_scope(tag='log_softmax_output') def log_softmax(x): """Perform log softmax activation on the data Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D input data Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D output with same shape """ assert len(x.shape) == 2, "only support 2-dim log softmax" m, n = x.shape - k = tvm.reduce_axis((0, n), name='k') - max_elem = tvm.compute((m, ), lambda i: tvm.max(x[i, k], axis=k)) - k = tvm.reduce_axis((0, n), name='k') - expsum = tvm.compute( - (m, ), lambda i: tvm.sum(tvm.exp(x[i, k] - max_elem[i]), axis=k)) - return tvm.compute( - x.shape, lambda i, j: x[i, j] - max_elem[i] - tvm.log(expsum[i])) + k = te.reduce_axis((0, n), name='k') + max_elem = te.compute((m, ), lambda i: tvm.te.max(x[i, k], axis=k)) + k = te.reduce_axis((0, n), name='k') + expsum = te.compute( + (m, ), lambda i: te.sum(te.exp(x[i, k] - max_elem[i]), axis=k)) + return te.compute( + x.shape, lambda i, j: x[i, j] - max_elem[i] - te.log(expsum[i])) diff --git a/topi/python/topi/nn/space_to_depth.py b/topi/python/topi/nn/space_to_depth.py index 6ed7cd6..b90bd11 100644 --- a/topi/python/topi/nn/space_to_depth.py +++ b/topi/python/topi/nn/space_to_depth.py @@ -18,6 +18,7 @@ """TVM operator space_to_depth compute.""" from __future__ import absolute_import import tvm +from tvm import te from .. import tag @@ -26,7 +27,7 @@ def space_to_depth(data, block_size, layout='NCHW'): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D tensor in either NCHW or NHWC layout. block_size : int @@ -37,17 +38,17 @@ def space_to_depth(data, block_size, layout='NCHW'): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor Output of shape [N, C * block_size**2, H / block_size, W / block_size] """ if layout == 'NCHW': in_n, in_c, in_h, in_w = data.shape output_shape = [in_n, in_c * block_size * block_size, - tvm.truncdiv(in_h, block_size), tvm.truncdiv(in_w, block_size)] + tvm.tir.truncdiv(in_h, block_size), tvm.tir.truncdiv(in_w, block_size)] elif layout == 'NHWC': in_n, in_h, in_w, in_c = data.shape - output_shape = [in_n, tvm.truncdiv(in_h, block_size), tvm.truncdiv( + output_shape = [in_n, tvm.tir.truncdiv(in_h, block_size), tvm.tir.truncdiv( in_w, block_size), in_c * block_size * block_size] else: raise ValueError("Only NCHW and NHWC layouts are currently supported.") @@ -60,10 +61,10 @@ def space_to_depth(data, block_size, layout='NCHW'): return n, c, y, x def _get_pixel(n, c, y, x): - block_offset = tvm.truncdiv(c, in_c) - channel_idx = tvm.truncmod(c, in_c) - x_idx = tvm.truncmod(block_offset, block_size) - y_idx = tvm.truncdiv(block_offset, block_size) + block_offset = tvm.tir.truncdiv(c, in_c) + channel_idx = tvm.tir.truncmod(c, in_c) + x_idx = tvm.tir.truncmod(block_offset, block_size) + y_idx = tvm.tir.truncdiv(block_offset, block_size) if layout == 'NCHW': output = data(n, channel_idx, y_idx + @@ -77,4 +78,4 @@ def space_to_depth(data, block_size, layout='NCHW'): n, c, y, x = _get_indices(*indices) return _get_pixel(n, c, y, x) - return tvm.compute(output_shape, _compute, name='space_to_depth', tag=tag.INJECTIVE) + return te.compute(output_shape, _compute, name='space_to_depth', tag=tag.INJECTIVE) diff --git a/topi/python/topi/nn/sparse.py b/topi/python/topi/nn/sparse.py index 6974ff4..b37bac2 100644 --- a/topi/python/topi/nn/sparse.py +++ b/topi/python/topi/nn/sparse.py @@ -18,6 +18,7 @@ """Sparse operators""" from __future__ import absolute_import import tvm +from tvm import te from ..util import get_const_tuple @@ -29,24 +30,24 @@ def sparse_dense(data, weight_data, weight_indices, weight_indptr): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor 2-D with shape [M, K], float32 - weight_data : tvm.Tensor + weight_data : tvm.te.Tensor 1-D with shape [nnz] (CSR) or 3-D with shape [num_blocks, bs_r, bs_c] (BSR) - weight_indices : tvm.Tensor + weight_indices : tvm.te.Tensor 1-D with shape [nnz] (CSR) or 1-D with shape [num_blocks] (BSR) - weight_indptr : tvm.Tensor + weight_indptr : tvm.te.Tensor 1-D with shape [N + 1] (CSR) or 1-D with shape [(N + 1) // bs_r] (BSR) Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [M, N] """ assert len(weight_data.shape) in (1, 3) @@ -66,12 +67,12 @@ def _sparse_dense_csrmm(data, weight_data, weight_indices, weight_indptr): row_start = weight_indptr[row] row_end = weight_indptr[row + 1] row_elems = row_end - row_start - elem_idx = tvm.reduce_axis((0, row_elems), name="elem_idx") + elem_idx = te.reduce_axis((0, row_elems), name="elem_idx") elem = row_start + elem_idx a_val = weight_data[elem] weight_val = data[i, weight_indices[elem]] - return tvm.sum(a_val * weight_val, axis=elem_idx) - return tvm.compute(oshape, f, tag="sparse_dense_csrmm") + return te.sum(a_val * weight_val, axis=elem_idx) + return te.compute(oshape, f, tag="sparse_dense_csrmm") def _sparse_dense_bsrmm(data, weight_data, weight_indices, weight_indptr): @@ -84,22 +85,22 @@ def _sparse_dense_bsrmm(data, weight_data, weight_indices, weight_indptr): row_start = weight_indptr[nb_j] row_end = weight_indptr[nb_j + 1] row_elems = row_end - row_start - elem_idx = tvm.reduce_axis( + elem_idx = te.reduce_axis( (0, row_elems), name="elem_idx") block_offset = row_start + elem_idx - c = tvm.reduce_axis((0, bs_c), name="c") + c = te.reduce_axis((0, bs_c), name="c") block_j = weight_indices[block_offset] block_ij_val = weight_data[block_offset][j][c] x_val = data[i, bs_c * block_j + c] - return tvm.sum(block_ij_val * x_val, axis=[elem_idx, c]) + return te.sum(block_ij_val * x_val, axis=[elem_idx, c]) - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod - bsrmm_block = tvm.compute( + bsrmm_block = te.compute( (m, num_blocks, bs_r), _compute_block, tag="sparse_dense_bsrmm_block") - return tvm.compute( + return te.compute( (m, num_blocks * bs_r), lambda m, n: bsrmm_block[m, idxd(n, bs_r), idxm(n, bs_r)], tag="sparse_dense_bsrmm") @@ -113,24 +114,24 @@ def sparse_transpose(sparse_data, sparse_indices, sparse_indptr): Parameters ---------- - sparse_data : tvm.Tensor + sparse_data : tvm.te.Tensor 1-D with shape [nonzeros], dtype of 'float32' - sparse_indices : tvm.Tensor + sparse_indices : tvm.te.Tensor 1-D with shape [nonzeros], dtype of 'int32' - sparse_indptr : tvm.Tensor + sparse_indptr : tvm.te.Tensor 1-D with shape [n+1], dtype of 'int32' Returns ------- - out_data : tvm.Tensor + out_data : tvm.te.Tensor 1-D with shape [nonzeros], dtype of 'float32' - out_indices : tvm.Tensor + out_indices : tvm.te.Tensor 1-D with shape [nonzeros], dtype of 'int32' - out_indptr : tvm.Tensor + out_indptr : tvm.te.Tensor 1-D with shape [n+1], dtype of 'int32' """ assert len(sparse_data.shape) == 1, "error in data dimension" @@ -143,7 +144,7 @@ def sparse_transpose(sparse_data, sparse_indices, sparse_indptr): # TODO: Add BSR transpose support - output_data, output_indices, output_indptr = tvm.extern( + output_data, output_indices, output_indptr = te.extern( shape=output_shape, inputs=[sparse_data, sparse_indices, sparse_indptr], fcompute=lambda ins, outs: @@ -157,7 +158,7 @@ def sparse_transpose(sparse_data, sparse_indices, sparse_indptr): def _csr_transpose_ir(data, indices, indptr, out_data, out_indices, out_indptr): """define ir for csr_transpose""" - irb = tvm.ir_builder.create() + irb = tvm.tir.ir_builder.create() data_ptr = irb.buffer_ptr(data) indices_ptr = irb.buffer_ptr(indices) diff --git a/topi/python/topi/nn/upsampling.py b/topi/python/topi/nn/upsampling.py index c816bbb..008e52e 100644 --- a/topi/python/topi/nn/upsampling.py +++ b/topi/python/topi/nn/upsampling.py @@ -15,9 +15,8 @@ # specific language governing permissions and limitations # under the License. """TVM operator upsampling compute.""" -from __future__ import absolute_import import topi -import tvm +from tvm import te from ..util import simplify @@ -28,7 +27,7 @@ def upsampling(data, scale_h, scale_w, layout="NCHW", method='nearest_neighbor', Parameters ---------- - inputs : tvm.Tensor + inputs : tvm.te.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] @@ -47,17 +46,17 @@ def upsampling(data, scale_h, scale_w, layout="NCHW", method='nearest_neighbor', Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, channel, in_height*scale_h, in_width*scale_w] or [batch, in_height*scale, in_width*scale, channel] """ base_layout = layout[0:4] if base_layout == "NCHW": - out_shape = (simplify(topi.cast(tvm.round(data.shape[2] * scale_h), data.shape[2].dtype)), - simplify(topi.cast(tvm.round(data.shape[3] * scale_w), data.shape[3].dtype))) + out_shape = (simplify(topi.cast(te.round(data.shape[2] * scale_h), data.shape[2].dtype)), + simplify(topi.cast(te.round(data.shape[3] * scale_w), data.shape[3].dtype))) elif layout == "NHWC": - out_shape = (simplify(topi.cast(tvm.round(data.shape[1] * scale_h), data.shape[1].dtype)), - simplify(topi.cast(tvm.round(data.shape[2] * scale_w), data.shape[2].dtype))) + out_shape = (simplify(topi.cast(te.round(data.shape[1] * scale_h), data.shape[1].dtype)), + simplify(topi.cast(te.round(data.shape[2] * scale_w), data.shape[2].dtype))) else: raise ValueError("not support this layout {} yet".format(layout)) @@ -73,7 +72,7 @@ def upsampling3d(data, scale_d, scale_h, scale_w, layout="NCDHW", method='neares Parameters ---------- - inputs : tvm.Tensor + inputs : tvm.te.Tensor inputs is a 5-D tensor with shape [batch, channel, in_depth, in_height, in_width] or [batch, in_depth, in_height, in_width, channel] @@ -101,19 +100,19 @@ def upsampling3d(data, scale_d, scale_h, scale_w, layout="NCDHW", method='neares Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, channel, in_depth*scale, in_height*scale, in_width*scale] or [batch, in_depth*scale, in_height*scale, in_width*scale, channel] """ base_layout = layout[0:5] if base_layout == "NCDHW": - out_shape = (simplify(topi.cast(tvm.round(data.shape[2] * scale_d), data.shape[2].dtype)), - simplify(topi.cast(tvm.round(data.shape[3] * scale_h), data.shape[3].dtype)), - simplify(topi.cast(tvm.round(data.shape[4] * scale_w), data.shape[4].dtype))) + out_shape = (simplify(topi.cast(te.round(data.shape[2] * scale_d), data.shape[2].dtype)), + simplify(topi.cast(te.round(data.shape[3] * scale_h), data.shape[3].dtype)), + simplify(topi.cast(te.round(data.shape[4] * scale_w), data.shape[4].dtype))) elif layout == "NDHWC": - out_shape = (simplify(topi.cast(tvm.round(data.shape[1] * scale_d), data.shape[1].dtype)), - simplify(topi.cast(tvm.round(data.shape[2] * scale_h), data.shape[2].dtype)), - simplify(topi.cast(tvm.round(data.shape[3] * scale_w), data.shape[3].dtype))) + out_shape = (simplify(topi.cast(te.round(data.shape[1] * scale_d), data.shape[1].dtype)), + simplify(topi.cast(te.round(data.shape[2] * scale_h), data.shape[2].dtype)), + simplify(topi.cast(te.round(data.shape[3] * scale_w), data.shape[3].dtype))) else: raise ValueError("not support this layout {} yet".format(layout)) diff --git a/topi/python/topi/nn/util.py b/topi/python/topi/nn/util.py index f0cdd9a..5a9b49e 100644 --- a/topi/python/topi/nn/util.py +++ b/topi/python/topi/nn/util.py @@ -107,8 +107,8 @@ def infer_stride(data, kernel, out): _, _, IH, IW = data.shape _, _, KH, KW = kernel.shape _, _, OH, OW = out.shape - hstride = (IH - KH) // tvm.make.Max(OH - 1, 1) + tvm.expr.Select(OH == 1, 1, 0) - wstride = (IW - KW) // tvm.make.Max(OW - 1, 1) + tvm.expr.Select(OW == 1, 1, 0) + hstride = (IH - KH) // tvm.te.max(OH - 1, 1) + tvm.tir.Select(OH == 1, 1, 0) + wstride = (IW - KW) // tvm.te.max(OW - 1, 1) + tvm.tir.Select(OW == 1, 1, 0) return get_const_int(hstride), get_const_int(wstride) diff --git a/topi/python/topi/nn/winograd_util.py b/topi/python/topi/nn/winograd_util.py index 464b633..d967431 100644 --- a/topi/python/topi/nn/winograd_util.py +++ b/topi/python/topi/nn/winograd_util.py @@ -55,7 +55,7 @@ def _cook_toom_convolution(a, n, r): f = lambda j, i: reduce(mul, ((a[i]-a[k] if k != i else 1) for k in range(0, n-1)), 1) Ff = np.fromfunction(np.vectorize(f), (1, n-1), dtype=int) f = lambda i, nth: (reduce(mul, [(np.poly1d([1, -a[k]]) if k != i else 1) \ - for k in range(0, n-1)], 1)).coef[n-1-nth-1]/Ff[0, i] + for k in range(0, n-1)], 1)).coef[n-1-nth-1]/Ff[0, i] F = np.fromfunction(np.vectorize(f), (n-1, n-1), dtype=int) f = lambda i, j: -a[i]**(n-1) t = np.fromfunction(np.vectorize(f), (n-1, 1), dtype=int) diff --git a/topi/python/topi/opengl/conv2d_nchw.py b/topi/python/topi/opengl/conv2d_nchw.py index 52ed119..c93bcc2 100644 --- a/topi/python/topi/opengl/conv2d_nchw.py +++ b/topi/python/topi/opengl/conv2d_nchw.py @@ -17,6 +17,7 @@ #pylint: disable=invalid-name, no-member, too-many-locals, too-many-statements, too-many-arguments, too-many-branches, line-too-long """Schedule for conv2d_nchw with auto fusion""" import tvm +from tvm import te from .. import tag def schedule_conv2d_nchw(outs): @@ -33,8 +34,8 @@ def schedule_conv2d_nchw(outs): s: Schedule The computation schedule for conv2d_nchw. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def _schedule(conv2d, data): @@ -53,14 +54,14 @@ def schedule_conv2d_nchw(outs): if OP not in s.outputs: s[OP].opengl() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, tvm.te.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule conv2d_nchw elif OP.tag.startswith('conv2d_nchw'): conv2d = OP.output(0) data = OP.input_tensors[0] kernel = OP.input_tensors[1] - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() _schedule(conv2d, data) else: diff --git a/topi/python/topi/opengl/dense.py b/topi/python/topi/opengl/dense.py index db2c4a6..715f713 100644 --- a/topi/python/topi/opengl/dense.py +++ b/topi/python/topi/opengl/dense.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable """Schedule for dense operator""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from .. import tag def schedule_dense(outs): @@ -34,8 +33,8 @@ def schedule_dense(outs): s: Schedule The computation schedule for dense. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def _schedule(Dense): @@ -53,7 +52,7 @@ def schedule_dense(outs): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule dense elif OP.tag == 'dense': diff --git a/topi/python/topi/opengl/injective.py b/topi/python/topi/opengl/injective.py index 28dc87d..3d45247 100644 --- a/topi/python/topi/opengl/injective.py +++ b/topi/python/topi/opengl/injective.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, """Schedule for composition of injective operator""" -import tvm +from tvm import te def schedule_injective_from_existing(sch, out): """Schedule for injective op from existing schedule. @@ -50,10 +50,10 @@ def schedule_injective(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + te.schedule.AutoInlineInjective(s) for out in outs: schedule_injective_from_existing(s, out) return s diff --git a/topi/python/topi/opengl/pooling.py b/topi/python/topi/opengl/pooling.py index 3226422..c30389c 100644 --- a/topi/python/topi/opengl/pooling.py +++ b/topi/python/topi/opengl/pooling.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, unused-argument """Schedule for pooling operators""" -import tvm +from tvm import te from .. import tag def schedule_adaptive_pool(outs): @@ -33,8 +33,8 @@ def schedule_adaptive_pool(outs): s: Schedule The computation schedule for adaptive pool. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def _schedule(Pool): @@ -52,7 +52,7 @@ def schedule_adaptive_pool(outs): if OP not in s.outputs: s[OP].opengl() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule global_pool elif OP.tag.startswith('adaptive_pool'): @@ -84,12 +84,12 @@ def schedule_pool(outs, layout): s: Schedule The computation schedule for pool. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def _schedule(PaddedInput, Pool): - if isinstance(PaddedInput.op, tvm.tensor.ComputeOp): + if isinstance(PaddedInput.op, te.tensor.ComputeOp): s[PaddedInput].opengl() if Pool.op in s.outputs: Out = Pool @@ -105,7 +105,7 @@ def schedule_pool(outs, layout): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if tensor.op not in scheduled_ops and isinstance(tensor.op, tvm.tensor.ComputeOp): + if tensor.op not in scheduled_ops and isinstance(tensor.op, te.tensor.ComputeOp): traverse(tensor.op) # schedule pool elif OP.tag.startswith('pool'): diff --git a/topi/python/topi/opengl/softmax.py b/topi/python/topi/opengl/softmax.py index ff218d1..e725134 100644 --- a/topi/python/topi/opengl/softmax.py +++ b/topi/python/topi/opengl/softmax.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, trailing-whitespace """Schedule for softmax operator""" -import tvm +from tvm import te def schedule_softmax(outs): """Schedule for softmax op. @@ -32,8 +32,8 @@ def schedule_softmax(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) softmax = outs[0] op_tag = softmax.op.tag diff --git a/topi/python/topi/reduction.py b/topi/python/topi/reduction.py index 7c4e059..74ba688 100644 --- a/topi/python/topi/reduction.py +++ b/topi/python/topi/reduction.py @@ -45,7 +45,7 @@ def sum(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm tensor axis : None or int or tuple of int @@ -60,7 +60,7 @@ def sum(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.sum(data, axis, keepdims) @@ -70,7 +70,7 @@ def all(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm boolean tensor axis : None or int or tuple of int @@ -85,7 +85,7 @@ def all(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.all(data, axis, keepdims) @@ -95,7 +95,7 @@ def any(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm boolean tensor axis : None or int or tuple of int @@ -110,7 +110,7 @@ def any(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.any(data, axis, keepdims) @@ -120,7 +120,7 @@ def max(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm tensor axis : None or int or tuple of int @@ -135,7 +135,7 @@ def max(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.max(data, axis, keepdims) @@ -145,7 +145,7 @@ def min(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm tensor axis : None or int or tuple of int @@ -160,7 +160,7 @@ def min(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.min(data, axis, keepdims) @@ -170,7 +170,7 @@ def argmax(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm tensor axis : None or int or tuple of int @@ -185,7 +185,7 @@ def argmax(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.argmax(data, axis, keepdims) @@ -195,7 +195,7 @@ def argmin(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm tensor axis : None or int or tuple of int @@ -210,7 +210,7 @@ def argmin(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.argmin(data, axis, keepdims) @@ -220,7 +220,7 @@ def prod(data, axis=None, keepdims=False): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tvm tensor axis : None or int or tuple of int @@ -235,6 +235,6 @@ def prod(data, axis=None, keepdims=False): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.prod(data, axis, keepdims) diff --git a/topi/python/topi/rocm/conv2d.py b/topi/python/topi/rocm/conv2d.py index ce56dc4..713647e 100644 --- a/topi/python/topi/rocm/conv2d.py +++ b/topi/python/topi/rocm/conv2d.py @@ -32,10 +32,10 @@ def conv2d_nchw_miopen(cfg, data, kernel, strides, padding, dilation, out_dtype= cfg: ConfigEntity The config for this template - input : tvm.Tensor + input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - filter : tvm.Tensor + filter : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] strides : int or a list/tuple of two ints @@ -51,7 +51,7 @@ def conv2d_nchw_miopen(cfg, data, kernel, strides, padding, dilation, out_dtype= Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ @@ -67,7 +67,7 @@ def conv2d_nchw_miopen(cfg, data, kernel, strides, padding, dilation, out_dtype= OH = (H + 2 * pad_h - KH) // stride_h + 1 OW = (W + 2 * pad_w - KW) // stride_w + 1 cfg.add_flop(2 * N * OH * OW * CO * CI * ((KH - 1) * dilation_h + 1) *\ - ((KW - 1) * dilation_w + 1)) + ((KW - 1) * dilation_w + 1)) return miopen.conv2d_forward(data, kernel, diff --git a/topi/python/topi/rocm/dense.py b/topi/python/topi/rocm/dense.py index 8729a62..097120d 100644 --- a/topi/python/topi/rocm/dense.py +++ b/topi/python/topi/rocm/dense.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, unused-argument """Schedule for dense operator""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from tvm import autotvm from tvm.contrib import rocblas from .. import generic, nn @@ -30,13 +29,13 @@ def dense(cfg, data, weight, bias=None, out_dtype=None): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [batch, in_dim] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [out_dim, in_dim] - bias : tvm.Tensor, optional + bias : tvm.te.Tensor, optional 1-D with shape [out_dim] out_dtype : str @@ -44,7 +43,7 @@ def dense(cfg, data, weight, bias=None, out_dtype=None): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim] """ assert len(data.shape) == 2 and len(weight.shape) == 2, \ @@ -71,8 +70,8 @@ def schedule_dense(cfg, outs): s: Schedule The computation schedule for dense. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if op.tag == 'dense': @@ -87,11 +86,11 @@ def schedule_dense(cfg, outs): else: Out = outs[0].op.output(0) s[Dense].compute_at(s[Out], s[Out].op.axis[1]) - s[Out].bind(s[Out].op.axis[0], tvm.thread_axis("blockIdx.y")) - s[Out].bind(s[Out].op.axis[1], tvm.thread_axis("blockIdx.x")) + s[Out].bind(s[Out].op.axis[0], te.thread_axis("blockIdx.y")) + s[Out].bind(s[Out].op.axis[1], te.thread_axis("blockIdx.x")) tx = s[Dense].op.reduce_axis[0] - thread_x = tvm.thread_axis("threadIdx.x") + thread_x = te.thread_axis("threadIdx.x") s[Dense].bind(tx, thread_x) s[DenseF].compute_at(s[Dense], tx) s[Dense].set_store_predicate(thread_x.var.equal(0)) @@ -107,13 +106,13 @@ def dense_rocblas(cfg, data, weight, bias=None, out_dtype=None): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [batch, in_dim] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [out_dim, in_dim] - bias : tvm.Tensor, optional + bias : tvm.te.Tensor, optional 1-D with shape [out_dim] out_dtype : str @@ -121,7 +120,7 @@ def dense_rocblas(cfg, data, weight, bias=None, out_dtype=None): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim] """ assert out_dtype == data.dtype, "Mixed precision not supported." @@ -130,9 +129,9 @@ def dense_rocblas(cfg, data, weight, bias=None, out_dtype=None): out_dim, _ = weight.shape cfg.add_flop(batch * in_dim * out_dim * 2) if bias is not None: - matmul = tvm.compute((batch, out_dim), - lambda i, j: matmul[i, j] + bias[j], - tag=tag.BROADCAST) + matmul = te.compute((batch, out_dim), + lambda i, j: matmul[i, j] + bias[j], + tag=tag.BROADCAST) return matmul diff --git a/topi/python/topi/sort.py b/topi/python/topi/sort.py index 96a0889..744da62 100644 --- a/topi/python/topi/sort.py +++ b/topi/python/topi/sort.py @@ -17,7 +17,7 @@ # pylint: disable=too-many-arguments """Argsort operator""" import tvm -from tvm import api +from tvm import te from .util import get_const_tuple def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): @@ -27,14 +27,14 @@ def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tensor. - valid_count : tvm.Tensor, optional + valid_count : tvm.te.Tensor, optional 1-D tensor for valid number of boxes only for ssd. axis : int, optional - Axis along which to sort the input tensor. + Axis along which to sort the input tensor. By default the flattened array is used. is_ascend : boolean, optional @@ -45,7 +45,7 @@ def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor Sorted index tensor. Example @@ -54,7 +54,7 @@ def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): # An example to use argsort dshape = (1, 5, 6) - data = tvm.placeholder(dshape, name="data") + data = te.placeholder(dshape, name="data") axis = 0 is_ascend = False out = argsort(data, axis=axis, is_ascend=is_ascend) @@ -66,35 +66,36 @@ def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data.dtype), ctx) f(tvm_data, tvm_out) """ - data_buf = api.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8) + data_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8) if valid_count is not None: - valid_count_buf = api.decl_buffer(valid_count.shape, valid_count.dtype, - "valid_count_buf", data_alignment=4) - out_buf = api.decl_buffer(data.shape, "int32", "out_buf", data_alignment=8) + valid_count_buf = tvm.tir.decl_buffer( + valid_count.shape, valid_count.dtype, + "valid_count_buf", data_alignment=4) + out_buf = tvm.tir.decl_buffer(data.shape, "int32", "out_buf", data_alignment=8) out = \ - tvm.extern(data.shape, - [data, valid_count], - lambda ins, outs: tvm.call_packed( - "tvm.contrib.sort.argsort_nms", ins[0], ins[1], - outs[0], axis, is_ascend), - dtype="int32", - in_buffers=[data_buf, valid_count_buf], - out_buffers=out_buf, - name="argsort_nms_cpu", - tag="argsort_nms_cpu") + te.extern(data.shape, + [data, valid_count], + lambda ins, outs: tvm.tir.call_packed( + "tvm.contrib.sort.argsort_nms", ins[0], ins[1], + outs[0], axis, is_ascend), + dtype="int32", + in_buffers=[data_buf, valid_count_buf], + out_buffers=out_buf, + name="argsort_nms_cpu", + tag="argsort_nms_cpu") else: - out_buf = api.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8) + out_buf = tvm.tir.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8) out = \ - tvm.extern(data.shape, - [data], - lambda ins, outs: tvm.call_packed( - "tvm.contrib.sort.argsort", ins[0], - outs[0], axis, is_ascend), - dtype=dtype, - in_buffers=[data_buf], - out_buffers=out_buf, - name="argsort_cpu", - tag="argsort_cpu") + te.extern(data.shape, + [data], + lambda ins, outs: tvm.tir.call_packed( + "tvm.contrib.sort.argsort", ins[0], + outs[0], axis, is_ascend), + dtype=dtype, + in_buffers=[data_buf], + out_buffers=out_buf, + name="argsort_cpu", + tag="argsort_cpu") return out @@ -103,7 +104,7 @@ def topk(data, k=1, axis=-1, ret_type="both", is_ascend=False, dtype="int64"): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor The input tensor. k : int, optional @@ -126,27 +127,27 @@ def topk(data, k=1, axis=-1, ret_type="both", is_ascend=False, dtype="int64"): Returns ------- - out : tvm.Tensor or List[tvm.Tensor] + out : tvm.te.Tensor or List[tvm.te.Tensor] The computed result. """ assert ret_type in ["both", "values", "indices"] - data_buf = api.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8) + data_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8) out_shape = list(get_const_tuple(data.shape)) if k >= 1: out_shape[axis] = k out_bufs = [] if ret_type in ["both", "values"]: - out_bufs.append(api.decl_buffer(out_shape, data.dtype, "value_buf", data_alignment=8)) + out_bufs.append(tvm.tir.decl_buffer(out_shape, data.dtype, "value_buf", data_alignment=8)) if ret_type in ["both", "indices"]: - out_bufs.append(api.decl_buffer(out_shape, dtype, "indices_buf", data_alignment=8)) + out_bufs.append(tvm.tir.decl_buffer(out_shape, dtype, "indices_buf", data_alignment=8)) out_shapes = [out_shape] * len(out_bufs) - out = tvm.extern(out_shapes, - [data], - lambda ins, outs: tvm.call_packed( - "tvm.contrib.sort.topk", ins[0], *outs, k, axis, ret_type, is_ascend), - in_buffers=[data_buf], - out_buffers=out_bufs, - name="topk_cpu", - tag="topk_cpu") + out = te.extern(out_shapes, + [data], + lambda ins, outs: tvm.tir.call_packed( + "tvm.contrib.sort.topk", ins[0], *outs, k, axis, ret_type, is_ascend), + in_buffers=[data_buf], + out_buffers=out_bufs, + name="topk_cpu", + tag="topk_cpu") return out diff --git a/topi/python/topi/sparse/csrmm.py b/topi/python/topi/sparse/csrmm.py index 29f9cb4..8dc0894 100644 --- a/topi/python/topi/sparse/csrmm.py +++ b/topi/python/topi/sparse/csrmm.py @@ -17,6 +17,7 @@ """TVM operator compute SpMM in CSR format.""" from __future__ import absolute_import import tvm +from tvm import te from .. import tag from ..util import simplify @@ -26,37 +27,37 @@ def csrmm_default(data, indices, indptr, weight, bias=None): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 1-D with shape [nonzeros] - indices : tvm.Tensor + indices : tvm.te.Tensor 1-D with shape [nonzeros] - indptr : tvm.Tensor + indptr : tvm.te.Tensor 1-D with shape [m+1] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [k, n] - bias : tvm.Tensor, optional + bias : tvm.te.Tensor, optional 1-D with shape [m] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [m, n] """ assert len(data.shape) == 1 and len(indices.shape) == 1 and len(indptr.shape) == 1 \ and len(weight.shape) == 2, "only support 2-dim csrmm" - assert isinstance(weight, tvm.tensor.Tensor), \ - "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight)) + assert isinstance(weight, te.tensor.Tensor), \ + "weight matrix is assumed to be tvm.te.Tensor, but weight is `%s`" % (type(weight)) if bias is not None: assert len(bias.shape) == 1 M = simplify(indptr.shape[0]-1) _, N = weight.shape def csrmm_default_ir(data, indices, indptr, weight, out): """define ir for csrmm""" - irb = tvm.ir_builder.create() + irb = tvm.tir.ir_builder.create() data_ptr = irb.buffer_ptr(data) indices_ptr = irb.buffer_ptr(indices) indptr_ptr = irb.buffer_ptr(indptr) @@ -78,12 +79,12 @@ def csrmm_default(data, indices, indptr, weight, bias=None): out_ptr[row*N+n] += dot[0] return irb.get() oshape = (M, N) - matmul = tvm.extern(oshape, [data, indices, indptr, weight], - lambda ins, outs: csrmm_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), - tag="csrmm", dtype='float32', name='out') + matmul = te.extern(oshape, [data, indices, indptr, weight], + lambda ins, outs: csrmm_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), + tag="csrmm", dtype='float32', name='out') if bias is not None: - matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[i], \ - tag=tag.BROADCAST) + matmul = te.compute(oshape, lambda i, j: matmul[i, j] + bias[i], \ + tag=tag.BROADCAST) return matmul @@ -96,15 +97,15 @@ def csrmm(a, b, c=None): a : tvm.contrib.sparse.CSRNDArray 2-D sparse matrix with shape [m, k] - b : tvm.Tensor + b : tvm.te.Tensor 2-D dense matrix with shape [k, n] - c : tvm.Tensor, optional + c : tvm.te.Tensor, optional 1-D dense vector with shape [n] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [m, n] """ return csrmm_default(a.data, a.indices, a.indptr, b, c) diff --git a/topi/python/topi/sparse/csrmv.py b/topi/python/topi/sparse/csrmv.py index 8a21f0d..c0aa1b4 100644 --- a/topi/python/topi/sparse/csrmv.py +++ b/topi/python/topi/sparse/csrmv.py @@ -17,6 +17,7 @@ """TVM operator compute SpMV in CSR format.""" from __future__ import absolute_import import tvm +from tvm import te from .. import tag def csrmv_default(data, indices, indptr, weight, bias=None): @@ -24,36 +25,36 @@ def csrmv_default(data, indices, indptr, weight, bias=None): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 1-D with shape [nonzeros] - indices : tvm.Tensor + indices : tvm.te.Tensor 1-D with shape [nonzeros] - indptr : tvm.Tensor + indptr : tvm.te.Tensor 1-D with shape [m+1] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [k, 1] - bias : tvm.Tensor, optional + bias : tvm.te.Tensor, optional 1-D with shape [1] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [m, 1] """ assert len(data.shape) == 1 and len(weight.shape) == 2, \ "only support 2-dim csrmv" - assert isinstance(weight, tvm.tensor.Tensor), \ - "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight)) + assert isinstance(weight, te.tensor.Tensor), \ + "weight matrix is assumed to be tvm.te.Tensor, but weight is `%s`" % (type(weight)) if bias is not None: assert len(bias.shape) == 1 batch = indptr.shape[0]-1 def csrmv_default_ir(data, indices, indptr, weight, out): """define ir for csrmv""" - irb = tvm.ir_builder.create() + irb = tvm.tir.ir_builder.create() data_ptr = irb.buffer_ptr(data) indices_ptr = irb.buffer_ptr(indices) indptr_ptr = irb.buffer_ptr(indptr) @@ -73,12 +74,12 @@ def csrmv_default(data, indices, indptr, weight, bias=None): out_ptr[row] += dot[0] return irb.get() oshape = (batch, 1) - matmul = tvm.extern(oshape, [data, indices, indptr, weight], - lambda ins, outs: csrmv_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), - tag="csrmv", dtype='float32', name='csrmv') + matmul = te.extern(oshape, [data, indices, indptr, weight], + lambda ins, outs: csrmv_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), + tag="csrmv", dtype='float32', name='csrmv') if bias is not None: - matmul = tvm.compute((batch, 1), lambda i, j: matmul[i, 0] + bias[i], \ - tag=tag.BROADCAST) + matmul = te.compute((batch, 1), lambda i, j: matmul[i, 0] + bias[i], \ + tag=tag.BROADCAST) return matmul @@ -91,15 +92,15 @@ def csrmv(a, x, y=None): a : tvm.contrib.sparse.CSRNDArray 2-D sparse matrix with shape [m, k] - x : tvm.Tensor + x : tvm.te.Tensor 2-D dense matrix with shape [k, 1] - y : tvm.Tensor, optional + y : tvm.te.Tensor, optional 1-D dense vector with shape [1] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D dense matrix with shape [m, 1] """ return csrmv_default(a.data, a.indices, a.indptr, x, y) diff --git a/topi/python/topi/sparse/dense.py b/topi/python/topi/sparse/dense.py index fe21e2f..9f01405 100644 --- a/topi/python/topi/sparse/dense.py +++ b/topi/python/topi/sparse/dense.py @@ -17,6 +17,7 @@ """TVM operator compute Dense in CSR format.""" from __future__ import absolute_import import tvm +from tvm import te from .. import tag from ..util import simplify @@ -26,30 +27,30 @@ def dense_si(data, indices, indptr, weight, bias=None): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 1-D with shape [num_nonzeros] - indices : tvm.Tensor + indices : tvm.te.Tensor 1-D with shape [num_nonzeros] - indptr : tvm.Tensor + indptr : tvm.te.Tensor 1-D with shape [m+1] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [k, n] - bias : tvm.Tensor, optional + bias : tvm.te.Tensor, optional 1-D with shape [m] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [m, n] """ assert len(data.shape) == 1 and len(indices.shape) == 1 and len(indptr.shape) == 1 \ and len(weight.shape) == 2, "only support 2-dim dense" - assert isinstance(weight, tvm.tensor.Tensor), \ - "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight)) + assert isinstance(weight, te.tensor.Tensor), \ + "weight matrix is assumed to be tvm.te.Tensor, but weight is `%s`" % (type(weight)) if bias is not None: assert len(bias.shape) == 1 dtype = data.dtype @@ -58,7 +59,7 @@ def dense_si(data, indices, indptr, weight, bias=None): def dense_default_ir(data, indices, indptr, weight, out): """Define IR for Dense""" dtype = data.dtype - irb = tvm.ir_builder.create() + irb = tvm.tir.ir_builder.create() data_ptr = irb.buffer_ptr(data) indices_ptr = irb.buffer_ptr(indices) indptr_ptr = irb.buffer_ptr(indptr) @@ -69,8 +70,8 @@ def dense_si(data, indices, indptr, weight, bias=None): with irb.for_range(0, N, for_type="vectorize", name='n') as n: with irb.for_range(0, M, for_type="parallel", name='m') as m: dot = irb.allocate(dtype, (1,), name='dot', scope='local') - out_ptr[m*N+n] = tvm.const(0, dtype) - dot[0] = tvm.const(0, dtype) + out_ptr[m*N+n] = tvm.tir.const(0, dtype) + dot[0] = tvm.tir.const(0, dtype) row_start = indptr_ptr[m] row_elems = indptr_ptr[m+1]-row_start with irb.for_range(0, row_elems, name='k') as k: @@ -79,12 +80,12 @@ def dense_si(data, indices, indptr, weight, bias=None): out_ptr[m*N+n] += dot[0] return irb.get() oshape = (M, N) - matmul = tvm.extern(oshape, [data, indices, indptr, weight], - lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), - tag="dense", dtype=dtype, name='out') + matmul = te.extern(oshape, [data, indices, indptr, weight], + lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), + tag="dense", dtype=dtype, name='out') if bias is not None: - matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[j], \ - tag=tag.BROADCAST) + matmul = te.compute(oshape, lambda i, j: matmul[i, j] + bias[j], \ + tag=tag.BROADCAST) return matmul @@ -94,30 +95,30 @@ def dense_sw(data, w_data, w_indices, w_indptr, bias=None): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [m, k] - w_data : tvm.Tensor + w_data : tvm.te.Tensor 1-D with shape [nonzeros] - w_indices : tvm.Tensor + w_indices : tvm.te.Tensor 1-D with shape [nonzeros] - w_indptr : tvm.Tensor + w_indptr : tvm.te.Tensor 1-D with shape [n+1] - bias : tvm.Tensor, optional + bias : tvm.te.Tensor, optional 1-D with shape [n] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [m, n] """ assert len(w_data.shape) == 1 and len(w_indices.shape) == 1 and len(w_indptr.shape) == 1 \ and len(data.shape) == 2, "only support 2-dim dense" - assert isinstance(data, tvm.tensor.Tensor), \ - "data matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(data)) + assert isinstance(data, te.tensor.Tensor), \ + "data matrix is assumed to be tvm.te.Tensor, but weight is `%s`" % (type(data)) if bias is not None: assert len(bias.shape) == 1 dtype = data.dtype @@ -126,7 +127,7 @@ def dense_sw(data, w_data, w_indices, w_indptr, bias=None): def dense_default_ir(data, w_data, w_indices, w_indptr, out): """Define IR for Dense""" dtype = data.dtype - irb = tvm.ir_builder.create() + irb = tvm.tir.ir_builder.create() data_ptr = irb.buffer_ptr(data) w_data_ptr = irb.buffer_ptr(w_data) w_indices_ptr = irb.buffer_ptr(w_indices) @@ -137,8 +138,8 @@ def dense_sw(data, w_data, w_indices, w_indptr, bias=None): with irb.for_range(0, M, for_type="vectorize", name='m') as m: with irb.for_range(0, N, for_type="parallel", name='n') as n: dot = irb.allocate(dtype, (1,), name='dot', scope='local') - out_ptr[m*N+n] = tvm.const(0, dtype) - dot[0] = tvm.const(0, dtype) + out_ptr[m*N+n] = tvm.tir.const(0, dtype) + dot[0] = tvm.tir.const(0, dtype) row_start = w_indptr_ptr[n] row_elems = w_indptr_ptr[n+1]-row_start with irb.for_range(0, row_elems, name='k') as k: @@ -147,12 +148,12 @@ def dense_sw(data, w_data, w_indices, w_indptr, bias=None): out_ptr[m*N+n] += dot[0] return irb.get() oshape = (M, N) - matmul = tvm.extern(oshape, [data, w_data, w_indices, w_indptr], - lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), - tag="dense", dtype=dtype, name='out') + matmul = te.extern(oshape, [data, w_data, w_indices, w_indptr], + lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]), + tag="dense", dtype=dtype, name='out') if bias is not None: - matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[j], \ - tag=tag.BROADCAST) + matmul = te.compute(oshape, lambda i, j: matmul[i, j] + bias[j], \ + tag=tag.BROADCAST) return matmul @@ -162,26 +163,26 @@ def dense(data, weight, bias=None): Parameters ---------- - data : tvm.contrib.sparse.CSRNDArray or tvm.tensor.Tensor + data : tvm.contrib.sparse.CSRNDArray or te.tensor.Tensor 2-D with shape [batch, in_dim] - weight : tvm.tensor.Tensor or tvm.contrib.sparse.CSRNDArray + weight : te.tensor.Tensor or tvm.contrib.sparse.CSRNDArray 2-D with shape [out_dim, in_dim] - bias : tvm.tensor.Tensor, optional + bias : te.tensor.Tensor, optional 1-D with shape [out_dim] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim] """ ret = None if isinstance(data, tvm.contrib.sparse.CSRPlaceholderOp) and \ - isinstance(weight, tvm.tensor.Tensor): + isinstance(weight, te.tensor.Tensor): ret = dense_si(data.data, data.indices, data.indptr, weight, bias) - elif isinstance(data, tvm.tensor.Tensor) and \ - isinstance(weight, tvm.contrib.sparse.CSRPlaceholderOp): + elif isinstance(data, te.tensor.Tensor) and \ + isinstance(weight, tvm.contrib.sparse.CSRPlaceholderOp): ret = dense_sw(data, weight.data, weight.indices, weight.indptr, bias) else: raise NotImplementedError("implementation for %s as data and %s as weights, " diff --git a/topi/python/topi/tensor.py b/topi/python/topi/tensor.py index 0231efc..0071242 100644 --- a/topi/python/topi/tensor.py +++ b/topi/python/topi/tensor.py @@ -24,12 +24,12 @@ def elemwise_sum(xs): Parameters ---------- - xs : list of tvm.Tensor + xs : list of tvm.te.Tensor Input arguments. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return cpp.elemwise_sum(xs) @@ -49,7 +49,7 @@ def full(shape, dtype, fill_value): Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return cpp.full(shape, dtype, fill_value) @@ -61,14 +61,14 @@ def full_like(x, fill_value): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. fill_value : float Value to be filled Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return cpp.full_like(x, fill_value) diff --git a/topi/python/topi/testing/conv2d_transpose_python.py b/topi/python/topi/testing/conv2d_transpose_python.py index 50c43eb..c789fec 100644 --- a/topi/python/topi/testing/conv2d_transpose_python.py +++ b/topi/python/topi/testing/conv2d_transpose_python.py @@ -59,9 +59,9 @@ def conv2d_transpose_nchw_python(a_np, w_np, stride, padding): bpad_left = filter_w - 1 - fpad_left bpad_right = filter_w - 1 - fpad_right padded_a_np = np.zeros((batch, in_c, dilated_a_np.shape[2]+bpad_top+bpad_bottom, \ - dilated_a_np.shape[3]+bpad_left+bpad_right)) + dilated_a_np.shape[3]+bpad_left+bpad_right)) padded_a_np[:, :, bpad_top:dilated_a_np.shape[2]+bpad_top, \ - bpad_left:dilated_a_np.shape[3]+bpad_left] = dilated_a_np + bpad_left:dilated_a_np.shape[3]+bpad_left] = dilated_a_np # convolution stage out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w diff --git a/topi/python/topi/testing/conv3d_ncdhw_python.py b/topi/python/topi/testing/conv3d_ncdhw_python.py index 825ec62..063c07d 100644 --- a/topi/python/topi/testing/conv3d_ncdhw_python.py +++ b/topi/python/topi/testing/conv3d_ncdhw_python.py @@ -48,7 +48,7 @@ def _conv3d_ncdhw_python(a_np, w_np, stride, padding): if pad_d > 0 or pad_h > 0 or pad_w > 0: apad = np.zeros((in_depth + pad_d, in_height + pad_h, in_width + pad_w)) apad[pad_front:pad_front + in_depth, pad_top:pad_top + in_height,\ - pad_left:pad_left + in_width] = a_np[n, c] + pad_left:pad_left + in_width] = a_np[n, c] else: apad = a_np[n, c] out = scipy.signal.convolve( diff --git a/topi/python/topi/testing/conv3d_ndhwc_python.py b/topi/python/topi/testing/conv3d_ndhwc_python.py index 2810f72..85b991f 100644 --- a/topi/python/topi/testing/conv3d_ndhwc_python.py +++ b/topi/python/topi/testing/conv3d_ndhwc_python.py @@ -73,7 +73,7 @@ def conv3d_ndhwc_python(a_np, w_np, stride, padding): if pad_d > 0 or pad_h > 0 or pad_w > 0: apad = np.zeros((in_depth + pad_d, in_height + pad_h, in_width + pad_w)) apad[pad_front:pad_front + in_depth, pad_top:pad_top + in_height,\ - pad_left:pad_left + in_width] = at[n, c] + pad_left:pad_left + in_width] = at[n, c] else: apad = at[n, c] out = scipy.signal.convolve( diff --git a/topi/python/topi/testing/depthwise_conv2d_python.py b/topi/python/topi/testing/depthwise_conv2d_python.py index 566bb93..5addc75 100644 --- a/topi/python/topi/testing/depthwise_conv2d_python.py +++ b/topi/python/topi/testing/depthwise_conv2d_python.py @@ -57,8 +57,8 @@ def depthwise_conv2d_python_nchw(input_np, filter_np, stride, padding): for i in range(batch): for j in range(out_channel): output_np[i, j, :, :] = signal.convolve2d(input_np[i, j//channel_multiplier, :, :], \ - np.rot90(filter_np[j//channel_multiplier, j%channel_multiplier, :, :], 2), \ - mode='valid')[0:(in_height - filter_height + 1):stride_h, 0:(in_width - filter_height + 1):stride_w] + np.rot90(filter_np[j//channel_multiplier, j%channel_multiplier, :, :], 2), \ + mode='valid')[0:(in_height - filter_height + 1):stride_h, 0:(in_width - filter_height + 1):stride_w] if padding == 'SAME': out_channel = in_channel * channel_multiplier out_height = np.int(np.ceil(float(in_height) / float(stride_h))) @@ -75,8 +75,8 @@ def depthwise_conv2d_python_nchw(input_np, filter_np, stride, padding): for i in range(batch): for j in range(out_channel): output_np[i, j, :, :] = signal.convolve2d(input_np[i, j//channel_multiplier, :, :], \ - np.rot90(filter_np[j//channel_multiplier, j%channel_multiplier, :, :], 2), \ - mode='same')[index_h:in_height:stride_h, index_w:in_width:stride_w] + np.rot90(filter_np[j//channel_multiplier, j%channel_multiplier, :, :], 2), \ + mode='same')[index_h:in_height:stride_h, index_w:in_width:stride_w] return output_np @@ -118,8 +118,8 @@ def depthwise_conv2d_python_nhwc(input_np, filter_np, stride, padding): for i in range(batch): for j in range(out_channel): output_np[i, :, :, j] = signal.convolve2d(input_np[i, :, :, j//channel_multiplier], \ - np.rot90(filter_np[:, :, j//channel_multiplier, j%channel_multiplier], 2), \ - mode='valid')[0:(in_height - filter_height + 1):stride_h, 0:(in_width - filter_height + 1):stride_w] + np.rot90(filter_np[:, :, j//channel_multiplier, j%channel_multiplier], 2), \ + mode='valid')[0:(in_height - filter_height + 1):stride_h, 0:(in_width - filter_height + 1):stride_w] if padding == 'SAME': out_channel = in_channel * channel_multiplier out_height = np.int(np.ceil(float(in_height) / float(stride_h))) @@ -136,7 +136,7 @@ def depthwise_conv2d_python_nhwc(input_np, filter_np, stride, padding): for i in range(batch): for j in range(out_channel): output_np[i, :, :, j] = signal.convolve2d(input_np[i, :, :, j//channel_multiplier], \ - np.rot90(filter_np[:, :, j//channel_multiplier, j%channel_multiplier], 2), \ - mode='same')[index_h:in_height:stride_h, index_w:in_width:stride_w] + np.rot90(filter_np[:, :, j//channel_multiplier, j%channel_multiplier], 2), \ + mode='same')[index_h:in_height:stride_h, index_w:in_width:stride_w] return output_np diff --git a/topi/python/topi/testing/pool3d_python.py b/topi/python/topi/testing/pool3d_python.py index 631a995..2606650 100644 --- a/topi/python/topi/testing/pool3d_python.py +++ b/topi/python/topi/testing/pool3d_python.py @@ -40,9 +40,9 @@ def pool3d_ncdhw_python(np_data, kernel, assert out_shape[3] == int(math.floor(float(in_shape[3] - k_h + pt + pb) / s_h) + 1) assert out_shape[4] == int(math.floor(float(in_shape[4] - k_w + pl + pr) / s_w) + 1) - fill_value = tvm.const(0.0, dtype).value + fill_value = tvm.tir.const(0.0, dtype).value if not(count_include_pad) and pool_type == 'max': - fill_value = tvm.min_value(dtype).value + fill_value = tvm.te.min_value(dtype).value pad_np = np.full(shape=(in_n, in_c, in_d + pf + pk, diff --git a/topi/python/topi/testing/pool_grad_python.py b/topi/python/topi/testing/pool_grad_python.py index f1e51f0..ee671c2 100644 --- a/topi/python/topi/testing/pool_grad_python.py +++ b/topi/python/topi/testing/pool_grad_python.py @@ -53,7 +53,7 @@ def pool_grad_nchw(a_np, out_grad_np, # take the first element, as they are the same across batch and channel pad_count = pad_count.ravel()[0] pad_pool_grad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw] += \ - out_grad_np[:, :, i, j].reshape(n, ic, 1, 1) / np.maximum(pad_count, 1) + out_grad_np[:, :, i, j].reshape(n, ic, 1, 1) / np.maximum(pad_count, 1) elif pool_type == 'max': for i in range(oh): for j in range(ow): diff --git a/topi/python/topi/testing/roi_align_python.py b/topi/python/topi/testing/roi_align_python.py index 6ba2061..d328549 100644 --- a/topi/python/topi/testing/roi_align_python.py +++ b/topi/python/topi/testing/roi_align_python.py @@ -45,8 +45,8 @@ def roi_align_nchw_python(a_np, rois_np, pooled_size, spatial_scale, sample_rati lx = x - x_low return (1 - ly) * (1 - lx) * a_np[b, c, y_low, x_low] + \ (1 - ly) * lx * a_np[b, c, y_low, x_high] + \ - ly * (1 - lx) * a_np[b, c, y_high, x_low] + \ - ly * lx * a_np[b, c, y_high, x_high] + ly * (1 - lx) * a_np[b, c, y_high, x_low] + \ + ly * lx * a_np[b, c, y_high, x_high] for i in range(num_roi): roi = rois_np[i] diff --git a/topi/python/topi/transform.py b/topi/python/topi/transform.py index bdeb223..036191b 100644 --- a/topi/python/topi/transform.py +++ b/topi/python/topi/transform.py @@ -18,6 +18,7 @@ """Injective transformation operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te import topi from . import cpp from . import tag @@ -29,7 +30,7 @@ def expand_dims(a, axis, num_newaxis=1): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be expanded. num_newaxis: int, optional @@ -37,7 +38,7 @@ def expand_dims(a, axis, num_newaxis=1): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.expand_dims(a, axis, num_newaxis) @@ -63,21 +64,21 @@ def expand_like(a, shape_like, axis): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be expanded. - shape_like : tvm.Tensor + shape_like : tvm.te.Tensor The tensor to with target shape. axis: list of int axis to be expanded on Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ odim = len(axis) + len(a.shape) if odim != len(shape_like.shape): if len(a.shape) == 1 and len(axis) == len(shape_like.shape): # A special case: `a` is a scalar represented as a 1-dim tensor - return tvm.compute(shape_like.shape, lambda *idxs: a(0)) + return te.compute(shape_like.shape, lambda *idxs: a(0)) raise ValueError("shape inconsistent when expand_like ({}, {}, {})".format( len(axis), len(a.shape), len(shape_like.shape))) @@ -92,7 +93,7 @@ def expand_like(a, shape_like, axis): indices.append(idxs[i]) axis_index += 1 return a(*indices) - return tvm.compute(shape_like.shape, _compute) + return te.compute(shape_like.shape, _compute) def transpose(a, axes=None): @@ -100,7 +101,7 @@ def transpose(a, axes=None): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be expanded. axes: tuple of ints, optional @@ -108,7 +109,7 @@ def transpose(a, axes=None): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.transpose(a, axes) @@ -118,7 +119,7 @@ def flip(a, axis=0): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be expanded. axis : int, optional @@ -126,7 +127,7 @@ def flip(a, axis=0): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.flip(a, axis) @@ -135,7 +136,7 @@ def strided_slice(a, begin, end, strides=None): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be sliced. begin: list of int @@ -151,38 +152,38 @@ def strided_slice(a, begin, end, strides=None): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ if strides is None: strides = [] return cpp.strided_slice(a, begin, end, strides) -@tvm.tag_scope(tag=tag.INJECTIVE+",strided_set") +@tvm.te.tag_scope(tag=tag.INJECTIVE+",strided_set") def strided_set(a, v, begin, end, strides=None): """Set slice of an array. Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be sliced. - v : tvm.Tensor + v : tvm.te.Tensor The values to set - begin: tvm.Tensor + begin: tvm.te.Tensor The indices to begin with in the slicing. - end: tvm.Tensor + end: tvm.te.Tensor Indicies indicating end of the slice. - strides: tvm.Tensor, optional + strides: tvm.te.Tensor, optional Specifies the stride values, it can be negative in that case, the input tensor will be reversed in that particular axis. Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ n = len(a.shape) @@ -201,38 +202,38 @@ def strided_set(a, v, begin, end, strides=None): raise TypeError("strides should be int32") def _max(a, b): - return tvm.expr.Select(a > b, a, b) + return tvm.tir.Select(a > b, a, b) if strides is None: - strides = [tvm.const(1, 'int32')] * n + strides = [tvm.tir.const(1, 'int32')] * n else: - strides = [tvm.if_then_else(strides.shape[0] > i, - strides[i], - tvm.const(1, 'int32')) + strides = [tvm.tir.if_then_else(strides.shape[0] > i, + strides[i], + tvm.tir.const(1, 'int32')) for i in range(n)] - begin = [tvm.if_then_else(begin.shape[0] > i, - begin[i], - tvm.expr.Select(strides[i] > 0, - tvm.const(0, 'int32'), - a.shape[i])) + begin = [tvm.tir.if_then_else(begin.shape[0] > i, + begin[i], + tvm.tir.Select(strides[i] > 0, + tvm.tir.const(0, 'int32'), + a.shape[i])) for i in range(n)] - end = [tvm.if_then_else(end.shape[0] > i, - end[i], - tvm.expr.Select(strides[i] > 0, - a.shape[i] + 1, - -(a.shape[i] + 1))) + end = [tvm.tir.if_then_else(end.shape[0] > i, + end[i], + tvm.tir.Select(strides[i] > 0, + a.shape[i] + 1, + -(a.shape[i] + 1))) for i in range(n)] # Convert negative indexes for i in range(n): - begin[i] = tvm.if_then_else(begin[i] < 0, - begin[i] + a.shape[i], - begin[i]) - end[i] = tvm.if_then_else(end[i] < 0, - end[i] + a.shape[i], - end[i]) + begin[i] = tvm.tir.if_then_else(begin[i] < 0, + begin[i] + a.shape[i], + begin[i]) + end[i] = tvm.tir.if_then_else(end[i] < 0, + end[i] + a.shape[i], + end[i]) def _select(*indices): from_val = [] @@ -241,9 +242,9 @@ def strided_set(a, v, begin, end, strides=None): from_val.append(within_index(begin[i], end[i], strides[i], indices[i])) index_tuple.append( make_idx(begin[i], end[i], strides[i], a.shape[i], indices[i])) - return tvm.if_then_else(tvm.all(*from_val), v(*index_tuple), a(*indices)) + return tvm.tir.if_then_else(tvm.tir.all(*from_val), v(*index_tuple), a(*indices)) - return tvm.compute(a.shape, _select, name="strided_set") + return te.compute(a.shape, _select, name="strided_set") def reshape(a, newshape): @@ -251,14 +252,14 @@ def reshape(a, newshape): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be reshaped newshape : tuple of ints The new shape Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.reshape(a, newshape) @@ -268,7 +269,7 @@ def squeeze(a, axis=None): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor axis : None or int or tuple of ints, optional Selects a subset of the single-dimensional entries in the shape. @@ -276,7 +277,7 @@ def squeeze(a, axis=None): Returns ------- - squeezed : tvm.Tensor + squeezed : tvm.te.Tensor """ return cpp.squeeze(a, axis) @@ -286,7 +287,7 @@ def concatenate(a_tuple, axis=0): Parameters ---------- - a_tuple : tuple of tvm.Tensor + a_tuple : tuple of tvm.te.Tensor The arrays to concatenate axis : int, optional @@ -294,7 +295,7 @@ def concatenate(a_tuple, axis=0): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.concatenate(a_tuple, axis) @@ -304,7 +305,7 @@ def stack(a, axis): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be stacked. axis : int, optional @@ -313,7 +314,7 @@ def stack(a, axis): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.stack(a, axis) @@ -323,7 +324,7 @@ def split(ary, indices_or_sections, axis=0): Parameters ---------- - ary : tvm.Tensor + ary : tvm.te.Tensor indices_or_sections : int or 1-D array @@ -331,7 +332,7 @@ def split(ary, indices_or_sections, axis=0): Returns ------- - ret : tuple of tvm.Tensor + ret : tuple of tvm.te.Tensor """ return cpp.split(ary, indices_or_sections, axis) @@ -341,10 +342,10 @@ def take(a, indices, axis=None, mode="clip"): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The source array. - indices : tvm.Tensor + indices : tvm.te.Tensor The indices of the values to extract. axis : int, optional @@ -359,7 +360,7 @@ def take(a, indices, axis=None, mode="clip"): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ if axis is None: return cpp.take(a, indices, mode) @@ -371,15 +372,15 @@ def gather_nd(a, indices): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The source array. - indices : tvm.Tensor + indices : tvm.te.Tensor The indices of the values to extract. Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.gather_nd(a, indices) @@ -444,7 +445,7 @@ def arange(start, stop=None, step=1, dtype="float32"): Returns ------- - result : tvm.Tensor + result : tvm.te.Tensor The resulting tensor. """ if stop is None: @@ -458,7 +459,7 @@ def repeat(a, repeats, axis): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be repeated. repeats: int, required @@ -469,7 +470,7 @@ def repeat(a, repeats, axis): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.repeat(a, repeats, axis) @@ -479,7 +480,7 @@ def tile(a, reps): Parameters ---------- - a : tvm.Tensor + a : tvm.te.Tensor The tensor to be tiled. reps: tuple of ints, required @@ -487,7 +488,7 @@ def tile(a, reps): Returns ------- - ret : tvm.Tensor + ret : tvm.te.Tensor """ return cpp.tile(a, reps) @@ -497,7 +498,7 @@ def layout_transform(array, src_layout, dst_layout): Parameters ---------- - array : tvm.Tensor + array : tvm.te.Tensor The source array. src_layout : str @@ -514,7 +515,7 @@ def shape(array, dtype="int32"): Parameters ---------- - array : tvm.Tensor + array : tvm.te.Tensor The source tensor. dtype : str, optional @@ -522,7 +523,7 @@ def shape(array, dtype="int32"): Returns ------- - result : tvm.Tensor + result : tvm.te.Tensor The resulting tensor. """ return cpp.shape(array, dtype) @@ -543,11 +544,11 @@ def sequence_mask(data, valid_length, mask_value=0, axis=0): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor N-D with shape [MAX_LENGTH, batch_size, ...] or [batch_size, MAX_LENGTH, ...] depending on the value of `axis`. - valid_length : tvm.Tensor + valid_length : tvm.te.Tensor 1-D with shape [batch_size,] mask_value : float, optional @@ -558,7 +559,7 @@ def sequence_mask(data, valid_length, mask_value=0, axis=0): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor N-D with shape [MAX_LENGTH, batch_size, ...] or [batch_size, MAX_LENGTH, ...] depending on the value of `axis`. """ @@ -574,7 +575,7 @@ def ndarray_size(array, dtype="int32"): Parameters ---------- - array : tvm.Tensor + array : tvm.te.Tensor The source tensor. dtype : str, optional @@ -582,7 +583,7 @@ def ndarray_size(array, dtype="int32"): Returns ------- - result : tvm.Tensor + result : tvm.te.Tensor The resulting tensor. """ return cpp.ndarray_size(array, dtype) @@ -593,18 +594,18 @@ def where(condition, x, y): Parameters ---------- - condition : tvm.Tensor + condition : tvm.te.Tensor The condition array. - x : tvm.Tensor + x : tvm.te.Tensor First array to be selected. - y : tvm.Tensor + y : tvm.te.Tensor Second array to be selected. Returns ------- - result : tvm.Tensor + result : tvm.te.Tensor A Tensor selected from x or y depending on condition. """ return cpp.where(condition, x, y) @@ -617,13 +618,13 @@ def one_hot(indices, on_value, off_value, depth, axis, dtype): Parameters ---------- - indices : tvm.Tensor + indices : tvm.te.Tensor Locations to set to on_value. - on_value : tvm.Tensor + on_value : tvm.te.Tensor Value to fill at indices. - off_value : tvm.Tensor + off_value : tvm.te.Tensor Value to fill at all other positions besides indices. depth : int diff --git a/topi/python/topi/util.py b/topi/python/topi/util.py index c4c3ee6..6815357 100644 --- a/topi/python/topi/util.py +++ b/topi/python/topi/util.py @@ -20,7 +20,8 @@ from __future__ import absolute_import as _abs from numbers import Integral import tvm -from tvm.api import layout, bijective_layout +from tvm import te +from tvm.tir import layout, bijective_layout from . import tag, cpp class InvalidShapeError(ValueError): @@ -56,7 +57,7 @@ def traverse_inline(s, final_op, callback): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): _traverse(tensor.op) callback(op) @@ -77,7 +78,7 @@ def prod(x): The result value """ if not x: - return tvm.const(1, "int32") + return tvm.tir.const(1, "int32") res = x[0] for i in range(1, len(x)): res = res * x[i] @@ -99,9 +100,9 @@ def get_const_int(expr): """ if isinstance(expr, Integral): return expr - if not isinstance(expr, tvm.expr.IntImm): - expr = tvm.ir_pass.Simplify(expr) - if not isinstance(expr, tvm.expr.IntImm): + if not isinstance(expr, tvm.tir.IntImm): + expr = tvm.tir.ir_pass.Simplify(expr) + if not isinstance(expr, tvm.tir.IntImm): raise ValueError("Expect value to be constant int") return int(expr.value) @@ -121,9 +122,9 @@ def get_const_float(expr): """ if isinstance(expr, float): return float(expr) - if not isinstance(expr, tvm.expr.FloatImm): - expr = tvm.ir_pass.Simplify(expr) - if not isinstance(expr, tvm.expr.FloatImm): + if not isinstance(expr, tvm.tir.FloatImm): + expr = tvm.tir.ir_pass.Simplify(expr) + if not isinstance(expr, tvm.tir.FloatImm): raise ValueError("Expect value to be constant float") return float(expr.value) @@ -143,9 +144,9 @@ def equal_const_int(expr, value): """ if isinstance(expr, Integral): return expr == value - if not isinstance(expr, tvm.expr.IntImm): - expr = tvm.ir_pass.Simplify(expr) - if not isinstance(expr, tvm.expr.IntImm): + if not isinstance(expr, tvm.tir.IntImm): + expr = tvm.tir.ir_pass.Simplify(expr) + if not isinstance(expr, tvm.tir.IntImm): return False return expr.value == value @@ -165,11 +166,11 @@ def get_const_tuple(in_tuple): """ ret = [] for elem in in_tuple: - if isinstance(elem, tvm.expr.Var): + if isinstance(elem, tvm.tir.Var): ret.append(elem) - elif not isinstance(elem, (tvm.expr.IntImm, int)): - elem = tvm.ir_pass.Simplify(elem) - if not isinstance(elem, tvm.expr.IntImm): + elif not isinstance(elem, (tvm.tir.IntImm, int)): + elem = tvm.tir.ir_pass.Simplify(elem) + if not isinstance(elem, tvm.tir.IntImm): ret.append(elem) else: ret.append(get_const_int(elem)) @@ -205,7 +206,7 @@ def simplify(expr): out : Expr or int The simplified output """ - return tvm.ir_pass.Simplify(expr) if isinstance(expr, tvm.expr.PrimExpr) else expr + return tvm.tir.ir_pass.Simplify(expr) if isinstance(expr, tvm.tir.PrimExpr) else expr def ravel_index(indices, shape): @@ -213,7 +214,7 @@ def ravel_index(indices, shape): Parameters ---------- - indices : tuple of int or tvm.expr.IntImm + indices : tuple of int or tvm.tir.IntImm The input coordinates shape : tuple of int @@ -238,7 +239,7 @@ def unravel_index(idx, shape): Parameters ---------- - idx : int or tvm.expr.IntImm + idx : int or tvm.tir.IntImm The 1D index shape : tuple of int @@ -246,11 +247,11 @@ def unravel_index(idx, shape): Returns ------- - indices : tuple of int or tvm.expr.IntImm + indices : tuple of int or tvm.tir.IntImm Corresponding coordinate of the 1D index """ - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod indices = [] for i in range(len(shape) - 1, -1, -1): indices.append(idxm(idx, shape[i])) @@ -276,18 +277,18 @@ def const_matrix(matrix, name="const_matrix"): """ row, col = matrix.shape dtype = str(matrix.dtype) - idxm = tvm.indexmod + idxm = tvm.tir.indexmod def select_array(i, j): - now = tvm.const(0.0, dtype) + now = tvm.tir.const(0.0, dtype) for ii in range(row): for jj in range(col): - now = tvm.expr.Select(tvm.all(idxm(i, row) == ii, idxm(j, col) == jj), - tvm.const(matrix[ii][jj], dtype), - now) + now = tvm.tir.Select(tvm.tir.all(idxm(i, row) == ii, idxm(j, col) == jj), + tvm.tir.const(matrix[ii][jj], dtype), + now) return now - return tvm.compute(matrix.shape, select_array, name=name) + return te.compute(matrix.shape, select_array, name=name) def get_max_power2_factor(n, max_value=None): @@ -349,7 +350,7 @@ def get_shape(src_shape, src_layout, dst_layout): layout_mapping = bijective_layout(src_layout, dst_layout) dst_indices = layout_mapping.forward_index( - tvm.convert(list(range(len(src_layout))))) + tvm.runtime.convert(list(range(len(src_layout))))) return get_const_tuple(tuple([src_shape[i.value] for i in dst_indices])) @@ -377,12 +378,12 @@ def within_index(b, e, s, i): bool expression that is True is the array position would be selected by the index and False otherwise """ - bc = tvm.expr.Select(s < 0, i <= e, i < b) - ec = tvm.expr.Select(s < 0, i > b, i >= e) - ss = tvm.if_then_else(s < 0, - ((i - e) + (e % tvm.abs(s)) + 1) % tvm.abs(s), - (i - b) % s) - return tvm.expr.Select(tvm.expr.Or(bc, ec), tvm.const(False), ss.equal(0)) + bc = tvm.tir.Select(s < 0, i <= e, i < b) + ec = tvm.tir.Select(s < 0, i > b, i >= e) + ss = te.if_then_else(s < 0, + ((i - e) + (e % te.abs(s)) + 1) % te.abs(s), + (i - b) % s) + return tvm.tir.Select(tvm.tir.Or(bc, ec), tvm.tir.const(False), ss.equal(0)) def make_idx(b, e, s, z, i): @@ -414,16 +415,16 @@ def make_idx(b, e, s, z, i): postion: Expr int expression that corresponds to an array position in the selection. """ - bc = tvm.expr.Select(s < 0, i <= e, i < b) - ec = tvm.expr.Select(s < 0, i > b, i >= e) + bc = tvm.tir.Select(s < 0, i <= e, i < b) + ec = tvm.tir.Select(s < 0, i > b, i >= e) # Clamp to array size - b = tvm.expr.Select(z < b, z - 1, b) + b = tvm.tir.Select(z < b, z - 1, b) - ss = tvm.if_then_else(s < 0, - (b - i) // tvm.abs(s), - (i - b) // s) - return tvm.if_then_else(tvm.expr.Or(bc, ec), 88, ss) + ss = tvm.tir.if_then_else(s < 0, + (b - i) // te.abs(s), + (i - b) // s) + return tvm.tir.if_then_else(tvm.tir.Or(bc, ec), 88, ss) def is_empty_shape(shape): diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index c171f8c..d95ca75 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -17,6 +17,7 @@ # pylint: disable=import-error, invalid-name, no-member, too-many-locals, too-many-arguments, undefined-variable, too-many-nested-blocks, too-many-branches, too-many-statements, too-many-function-args """Non-maximum suppression operator""" import tvm +from tvm import te from tvm import hybrid from ..sort import argsort @@ -28,16 +29,16 @@ def hybrid_rearrange_out(data, one): Parameters ---------- - data : tvm.Tensor or numpy NDArray + data : tvm.te.Tensor or numpy NDArray NMS output. 3-D tensor with shape [batch_size, num_anchors, 6]. - one: tvm.const + one: tvm.tir.const Constant one with the same dtype as data. Returns ------- - output : tvm.Tensor or numpy NDArray + output : tvm.te.Tensor or numpy NDArray Transformed NMS output. 3-D tensor with shape [batch_size, num_anchors, 6]. """ @@ -70,28 +71,28 @@ def hybrid_get_valid_counts(data, score_threshold, id_index, score_index, one): Parameters ---------- - data : tvm.Tensor or numpy NDArray + data : tvm.te.Tensor or numpy NDArray Input data. 3-D tensor with shape [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5]. - score_threshold : tvm.const + score_threshold : tvm.tir.const Lower limit of score for valid bounding boxes. - id_index : tvm.const + id_index : tvm.tir.const index of the class categories, -1 to disable. - score_index: tvm.const + score_index: tvm.tir.const Index of the scores/confidence of boxes. - one: tvm.const + one: tvm.tir.const Constant one with the same dtype as data. Returns ------- - out_tensor : tvm.Tensor or numpy NDArray + out_tensor : tvm.te.Tensor or numpy NDArray Rearranged data tensor. - valid_count : tvm.Tensor or numpy NDArray + valid_count : tvm.te.Tensor or numpy NDArray 1-D tensor for valid number of boxes. """ batch_size = data.shape[0] @@ -123,7 +124,7 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor Input data. 3-D tensor with shape [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5]. @@ -138,18 +139,18 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): Returns ------- - out_tensor : tvm.Tensor + out_tensor : tvm.te.Tensor Rearranged data tensor. - valid_count : tvm.Tensor + valid_count : tvm.te.Tensor 1-D tensor for valid number of boxes. """ - score_threshold_const = tvm.const(score_threshold, data.dtype) - id_index_const = tvm.const(id_index, "int32") - score_index_const = tvm.const(score_index, "int32") + score_threshold_const = tvm.tir.const(score_threshold, data.dtype) + id_index_const = tvm.tir.const(id_index, "int32") + score_index_const = tvm.tir.const(score_index, "int32") return hybrid_get_valid_counts(data, score_threshold_const, id_index_const, score_index_const, - tvm.const(1, data.dtype)) + tvm.tir.const(1, data.dtype)) @hybrid.script @@ -160,51 +161,51 @@ def hybrid_nms(data, sorted_index, valid_count, Parameters ---------- - data: tvm.Tensor or numpy NDArray + data: tvm.te.Tensor or numpy NDArray Bounding boxes with class and score. 3-D tensor with shape [batch_size, num_anchors, 6]. - sorted_index : tvm.Tensor or numpy NDArray + sorted_index : tvm.te.Tensor or numpy NDArray Bounding box indexes sorted by score, with shape [batch_size, num_anchors]. - valid_count : tvm.Tensor or numpy NDArray + valid_count : tvm.te.Tensor or numpy NDArray 1-D tensor for valid number of boxes. - max_output_size : tvm.const + max_output_size : tvm.tir.const Max number of output valid boxes for each instance. By default all valid boxes are returned. - iou_threshold : tvm.const + iou_threshold : tvm.tir.const Overlapping(IoU) threshold to suppress object with smaller score. - force_suppress : tvm.const + force_suppress : tvm.tir.const Whether to suppress all detections regardless of class_id. - top_k : tvm.const + top_k : tvm.tir.const Keep maximum top k detections before nms, -1 for no limit. - coord_start : tvm.const + coord_start : tvm.tir.const Start index of the consecutive 4 coordinates. - id_index : tvm.const + id_index : tvm.tir.const index of the class categories, -1 to disable. - score_index: tvm.const + score_index: tvm.tir.const Index of the scores/confidence of boxes. - zero: tvm.const + zero: tvm.tir.const Constant zero with the same dtype as data. - one: tvm.const + one: tvm.tir.const Constant one with the same dtype as data. Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 3-D tensor with shape [batch_size, num_anchors, 6]. - box_indices: tvm.Tensor + box_indices: tvm.te.Tensor 2-D tensor with shape [batch_size, num_anchors]. """ batch_size = data.shape[0] @@ -297,10 +298,10 @@ def non_max_suppression(data, valid_count, max_output_size=-1, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 3-D tensor with shape [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5]. - valid_count : tvm.Tensor + valid_count : tvm.te.Tensor 1-D tensor for valid number of boxes. max_output_size : optional, int @@ -333,7 +334,7 @@ def non_max_suppression(data, valid_count, max_output_size=-1, Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 3-D tensor with shape [batch_size, num_anchors, 6]. Example @@ -342,8 +343,8 @@ def non_max_suppression(data, valid_count, max_output_size=-1, # An example to use non_max_suppression dshape = (1, 5, 6) - data = tvm.placeholder(dshape, name="data") - valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count") + data = te.placeholder(dshape, name="data") + valid_count = te.placeholder((dshape[0],), dtype="int32", name="valid_count") iou_threshold = 0.7 force_suppress = True top_k = -1 @@ -363,19 +364,19 @@ def non_max_suppression(data, valid_count, max_output_size=-1, num_anchors = data.shape[1] score_axis = score_index score_shape = (batch_size, num_anchors) - score_tensor = tvm.compute(score_shape, lambda i, j: data[i, j, score_axis]) + score_tensor = te.compute(score_shape, lambda i, j: data[i, j, score_axis]) sort_tensor = argsort(score_tensor, valid_count=valid_count, axis=1, is_ascend=False) out, box_indices = hybrid_nms(data, sort_tensor, valid_count, - tvm.const(max_output_size, dtype="int32"), - tvm.const(iou_threshold, dtype=data.dtype), - tvm.const(force_suppress, dtype="bool"), - tvm.const(top_k, dtype="int32"), - tvm.const(coord_start, dtype="int32"), - tvm.const(id_index, dtype="int32"), - tvm.const(score_index, dtype="int32"), - zero=tvm.const(0, dtype=data.dtype), - one=tvm.const(1, dtype=data.dtype)) + tvm.tir.const(max_output_size, dtype="int32"), + tvm.tir.const(iou_threshold, dtype=data.dtype), + tvm.tir.const(force_suppress, dtype="bool"), + tvm.tir.const(top_k, dtype="int32"), + tvm.tir.const(coord_start, dtype="int32"), + tvm.tir.const(id_index, dtype="int32"), + tvm.tir.const(score_index, dtype="int32"), + zero=tvm.tir.const(0, dtype=data.dtype), + one=tvm.tir.const(1, dtype=data.dtype)) if not return_indices and invalid_to_bottom: - out = hybrid_rearrange_out(out, one=tvm.const(1, dtype=data.dtype)) + out = hybrid_rearrange_out(out, one=tvm.tir.const(1, dtype=data.dtype)) return box_indices if return_indices else out diff --git a/topi/python/topi/vision/rcnn/proposal.py b/topi/python/topi/vision/rcnn/proposal.py index 5de4998..23bd24d 100644 --- a/topi/python/topi/vision/rcnn/proposal.py +++ b/topi/python/topi/vision/rcnn/proposal.py @@ -18,6 +18,7 @@ """Proposal operator""" import math import tvm +from tvm import te from ...util import get_const_tuple, get_const_int from ...sort import argsort @@ -43,8 +44,8 @@ def reg_bbox(x1, y1, x2, y2, dx, dy, dw, dh): pred_ctr_x = dx * bbox_w + ctr_x pred_ctr_y = dy * bbox_h + ctr_y - pred_w = tvm.exp(dw) * bbox_w - pred_h = tvm.exp(dh) * bbox_h + pred_w = te.exp(dw) * bbox_w + pred_h = te.exp(dh) * bbox_h pred_x1 = pred_ctr_x - 0.5 * (pred_w - 1.0) pred_y1 = pred_ctr_y - 0.5 * (pred_h - 1.0) @@ -67,16 +68,16 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r Parameters ---------- - cls_prob_buf : tvm.schedule.Buffer + cls_prob_buf : tvm.te.schedule.Buffer 4-D with shape [batch, 2 * num_anchors, height, width] - bbox_pred_buf : tvm.schedule.Buffer + bbox_pred_buf : tvm.te.schedule.Buffer 4-D with shape [batch, 4 * num_anchors, height, width] - im_info_buf : tvm.schedule.Buffer + im_info_buf : tvm.te.schedule.Buffer 2-D with shape [batch, 3] - out_buf : tvm.schedule.Buffer + out_buf : tvm.te.schedule.Buffer 3-D with shape [batch, num_bbox, 5] The last dimension is in format of [w_start, h_start, w_end, h_end, score] @@ -103,15 +104,15 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r """ batch, num_anchors, height, width = get_const_tuple(cls_prob_buf.shape) num_anchors //= 2 - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() p_score = ib.buffer_ptr(cls_prob_buf) p_delta = ib.buffer_ptr(bbox_pred_buf) p_im_info = ib.buffer_ptr(im_info_buf) p_out = ib.buffer_ptr(out_buf) - idxm = tvm.indexmod - idxd = tvm.indexdiv + idxm = tvm.tir.indexmod + idxd = tvm.tir.indexdiv with ib.for_range(0, batch * height * width) as tid: w = idxm(tid, width) @@ -135,10 +136,10 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r regression_func = reg_iou if iou_loss else reg_bbox pred_x1, pred_y1, pred_x2, pred_y2 = regression_func(x1, y1, x2, y2, *delta) - pred_x1 = tvm.max(tvm.min(pred_x1, im_width - 1.0), 0.0) - pred_y1 = tvm.max(tvm.min(pred_y1, im_height - 1.0), 0.0) - pred_x2 = tvm.max(tvm.min(pred_x2, im_width - 1.0), 0.0) - pred_y2 = tvm.max(tvm.min(pred_y2, im_height - 1.0), 0.0) + pred_x1 = tvm.te.max(tvm.te.min(pred_x1, im_width - 1.0), 0.0) + pred_y1 = tvm.te.max(tvm.te.min(pred_y1, im_height - 1.0), 0.0) + pred_x2 = tvm.te.max(tvm.te.min(pred_x2, im_width - 1.0), 0.0) + pred_y2 = tvm.te.max(tvm.te.min(pred_y2, im_height - 1.0), 0.0) real_height = (im_height / feature_stride).astype('int32') real_width = (im_width / feature_stride).astype('int32') @@ -148,15 +149,15 @@ def predict_bbox_ir(cls_prob_buf, bbox_pred_buf, im_info_buf, out_buf, scales, r min_size = p_im_info[b * 3 + 2] * rpn_min_size pred_score = p_score[((b * num_anchors * 2 + num_anchors + k) * height + h) * width + w] - pred_score = tvm.expr.Select(tvm.any(h >= real_height, w >= real_width), - -1.0, pred_score) + pred_score = tvm.tir.Select(tvm.tir.any(h >= real_height, w >= real_width), + -1.0, pred_score) p_out[out_index * 5 + 0] = pred_x1 p_out[out_index * 5 + 1] = pred_y1 p_out[out_index * 5 + 2] = pred_x2 p_out[out_index * 5 + 3] = pred_y2 p_out[out_index * 5 + 4] = pred_score - with ib.if_scope(tvm.any(bbox_w < min_size, bbox_h < min_size)): + with ib.if_scope(tvm.tir.any(bbox_w < min_size, bbox_h < min_size)): p_out[out_index * 5 + 0] -= min_size / 2.0 p_out[out_index * 5 + 1] -= min_size / 2.0 p_out[out_index * 5 + 2] += min_size / 2.0 @@ -171,10 +172,10 @@ def argsort_ir(data_buf, out_index_buf): Parameters ---------- - data_buf : tvm.schedule.Buffer + data_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox] - out_index_buf : tvm.schedule.Buffer + out_index_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox]. Indices of data in sorted order. Returns @@ -183,12 +184,12 @@ def argsort_ir(data_buf, out_index_buf): The result IR statement. """ batch, num_bbox = get_const_tuple(data_buf.shape) - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() p_data = ib.buffer_ptr(data_buf) index_out = ib.buffer_ptr(out_index_buf) temp_data = ib.allocate("float32", (1,), name="temp_data", scope="local") temp_index = ib.allocate("int32", (1,), name="temp_index", scope="local") - idxm = tvm.indexmod + idxm = tvm.tir.indexmod with ib.for_range(0, batch, for_type="unroll") as b: start = b * num_bbox for i in range(2): @@ -199,8 +200,8 @@ def argsort_ir(data_buf, out_index_buf): with ib.for_range(0, num_bbox) as k: with ib.for_range(0, (num_bbox + 1) // 2) as tid: offset = start + 2 * tid + idxm(k, 2) - with ib.if_scope(tvm.all(offset + 1 < num_bbox, - p_data[offset] < p_data[offset + 1])): + with ib.if_scope(tvm.tir.all(offset + 1 < num_bbox, + p_data[offset] < p_data[offset + 1])): temp_data[0] = p_data[offset] p_data[offset] = p_data[offset + 1] p_data[offset + 1] = temp_data[0] @@ -215,11 +216,11 @@ def nms_ir(sorted_bbox_buf, out_buf, nms_threshold): Parameters ---------- - sorted_bbox_buf : tvm.schedule.Buffer + sorted_bbox_buf : tvm.te.schedule.Buffer 3-D with shape [batch, num_bbox, 5]. The last dimension is in format of [w_start, h_start, w_end, h_end, score]. - out_buf : tvm.schedule.Buffer + out_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox]. Boolean mask of whether a bounding box should be removed. nms_threshold : float @@ -233,10 +234,10 @@ def nms_ir(sorted_bbox_buf, out_buf, nms_threshold): def calculate_overlap(out_tensor, box_a_idx, box_b_idx): """Calculate overlap of two boxes. """ - w = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) - - tvm.max(out_tensor[box_a_idx], out_tensor[box_b_idx]) + 1.0) - h = tvm.max(0.0, tvm.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) - - tvm.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]) + 1.0) + w = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) + - tvm.te.max(out_tensor[box_a_idx], out_tensor[box_b_idx]) + 1.0) + h = tvm.te.max(0.0, tvm.te.min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) + - tvm.te.max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]) + 1.0) i = w * h u = (out_tensor[box_a_idx + 2] - out_tensor[box_a_idx] + 1.0) * \ (out_tensor[box_a_idx + 3] - out_tensor[box_a_idx + 1] + 1.0) + \ @@ -245,7 +246,7 @@ def nms_ir(sorted_bbox_buf, out_buf, nms_threshold): return i / u batch, num_bbox = get_const_tuple(out_buf.shape) - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() p_data = ib.buffer_ptr(sorted_bbox_buf) p_out = ib.buffer_ptr(out_buf) with ib.for_range(0, batch, for_type="unroll", name="n") as b: @@ -254,7 +255,7 @@ def nms_ir(sorted_bbox_buf, out_buf, nms_threshold): p_out[base_idx + i] = False with ib.for_range(0, num_bbox - 1) as l: with ib.for_range(0, num_bbox) as i: - with ib.if_scope(tvm.all(i < num_bbox, i > l, p_out[base_idx + l] == False)): + with ib.if_scope(tvm.tir.all(i < num_bbox, i > l, p_out[base_idx + l] == False)): iou = calculate_overlap(p_data, (base_idx + l) * 5, (base_idx + i) * 5) with ib.if_scope(iou > nms_threshold): p_out[base_idx + i] = True @@ -266,14 +267,14 @@ def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf): Parameters ---------- - sorted_bbox_buf : tvm.schedule.Buffer + sorted_bbox_buf : tvm.te.schedule.Buffer 3-D with shape [batch, num_bbox, 5]. The last dimension is in format of [w_start, h_start, w_end, h_end, score]. - remove_mask_buf : tvm.schedule.Buffer + remove_mask_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox]. Boolean mask of whether a bounding box should be removed. - out_buf : tvm.schedule.Buffer + out_buf : tvm.te.schedule.Buffer 2-D with shape [batch * rpn_post_nms_top_n, 5]. The last dimension is in format of [batch_index, w_start, h_start, w_end, h_end]. @@ -284,7 +285,7 @@ def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf): """ batch, num_bbox, _ = get_const_tuple(sorted_bbox_buf.shape) rpn_post_nms_top_n = get_const_int(out_buf.shape[0]) // batch - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() i = ib.allocate('int32', (batch,), 'i', scope='local') p_sorted_bbox = ib.buffer_ptr(sorted_bbox_buf) p_remove = ib.buffer_ptr(remove_mask_buf) @@ -302,14 +303,14 @@ def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf): nkeep[b] += 1 with ib.for_range(0, batch) as b: with ib.if_scope(nkeep[b] > 0): - with ib.for_range(0, tvm.ceil( - tvm.const(rpn_post_nms_top_n, 'float32') / nkeep[b]).astype('int32')): + with ib.for_range(0, te.ceil( + tvm.tir.const(rpn_post_nms_top_n, 'float32') / nkeep[b]).astype('int32')): with ib.for_range(0, num_bbox) as j: offset_j = (b * num_bbox + j) * 5 offset_i = (b * rpn_post_nms_top_n + i[b]) * 5 - with ib.if_scope(tvm.all(i[b] < rpn_post_nms_top_n, - p_remove[(b*num_bbox+j)] == False)): - p_out[offset_i] = tvm.expr.Cast('float32', b) + with ib.if_scope(tvm.tir.all(i[b] < rpn_post_nms_top_n, + p_remove[(b*num_bbox+j)] == False)): + p_out[offset_i] = tvm.tir.Cast('float32', b) with ib.for_range(0, 4, for_type='unroll') as k: p_out[offset_i + k + 1] = p_sorted_bbox[offset_j + k] i[b] = i[b] + 1 @@ -324,13 +325,13 @@ def proposal(cls_prob, bbox_pred, im_info, scales, ratios, feature_stride, thres Parameters ---------- - cls_prob : tvm.Tensor + cls_prob : tvm.te.Tensor 4-D with shape [batch, 2 * num_anchors, height, width] - bbox_pred : tvm.Tensor + bbox_pred : tvm.te.Tensor 4-D with shape [batch, 4 * num_anchors, height, width] - im_info : tvm.Tensor + im_info : tvm.te.Tensor 2-D with shape [batch, 3] scales : list/tuple of float @@ -360,7 +361,7 @@ def proposal(cls_prob, bbox_pred, im_info, scales, ratios, feature_stride, thres Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 2-D tensor with shape [batch * rpn_post_nms_top_n, 5]. The last dimension is in format of [batch_index, w_start, h_start, w_end, h_end]. """ @@ -370,20 +371,20 @@ def proposal(cls_prob, bbox_pred, im_info, scales, ratios, feature_stride, thres num_bbox = height * width * num_anchors rpn_pre_nms_top_n = min(rpn_pre_nms_top_n, num_bbox) if rpn_pre_nms_top_n > 0 else num_bbox - bbox = tvm.extern((batch, num_bbox, 5), [cls_prob, bbox_pred, im_info], lambda ins, outs: - predict_bbox_ir(ins[0], ins[1], ins[2], outs[0], scales, ratios, - feature_stride, rpn_min_size, iou_loss), - dtype=bbox_pred.dtype) - score = tvm.compute((batch, num_bbox), lambda b, i: bbox[b, i, 4], tag='bbox_score') + bbox = te.extern((batch, num_bbox, 5), [cls_prob, bbox_pred, im_info], lambda ins, outs: + predict_bbox_ir(ins[0], ins[1], ins[2], outs[0], scales, ratios, + feature_stride, rpn_min_size, iou_loss), + dtype=bbox_pred.dtype) + score = te.compute((batch, num_bbox), lambda b, i: bbox[b, i, 4], tag='bbox_score') valid_count_shape = (1,) - valid_count = tvm.compute(valid_count_shape, lambda i: num_bbox) + valid_count = te.compute(valid_count_shape, lambda i: num_bbox) sorted_index = argsort(score, valid_count=valid_count, axis=1, is_ascend=False) - sorted_bbox = tvm.compute((batch, rpn_pre_nms_top_n, 5), - lambda b, i, j: bbox[b, sorted_index[b, i], j], tag='sorted_bbox') - nms_remove_mask = tvm.extern((batch, rpn_pre_nms_top_n), [sorted_bbox], - lambda ins, outs: nms_ir(ins[0], outs[0], threshold), - dtype='bool') - nms_out = tvm.extern((batch * rpn_post_nms_top_n, 5), [sorted_bbox, nms_remove_mask], - lambda ins, outs: prepare_output_ir(ins[0], ins[1], outs[0]), - dtype=sorted_bbox.dtype) + sorted_bbox = te.compute((batch, rpn_pre_nms_top_n, 5), + lambda b, i, j: bbox[b, sorted_index[b, i], j], tag='sorted_bbox') + nms_remove_mask = te.extern((batch, rpn_pre_nms_top_n), [sorted_bbox], + lambda ins, outs: nms_ir(ins[0], outs[0], threshold), + dtype='bool') + nms_out = te.extern((batch * rpn_post_nms_top_n, 5), [sorted_bbox, nms_remove_mask], + lambda ins, outs: prepare_output_ir(ins[0], ins[1], outs[0]), + dtype=sorted_bbox.dtype) return nms_out diff --git a/topi/python/topi/vision/rcnn/roi_align.py b/topi/python/topi/vision/rcnn/roi_align.py index a0bc5e2..9aa1ef9 100644 --- a/topi/python/topi/vision/rcnn/roi_align.py +++ b/topi/python/topi/vision/rcnn/roi_align.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name """Roi align operator""" import tvm +from tvm import te from ...util import get_const_tuple from ...cpp.util import bilinear_sample_nchw @@ -26,10 +27,10 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, channel, height, width] - rois : tvm.Tensor + rois : tvm.te.Tensor 2-D with shape [num_roi, 5]. The last dimension should be in format of [batch_index, w_start, h_start, w_end, h_end] @@ -45,7 +46,7 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [num_roi, channel, pooled_size, pooled_size] """ dtype = rois.dtype @@ -58,11 +59,11 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): pooled_size_h, pooled_size_w = pooled_size def _bilinear(i, c, y, x): - outside = tvm.any(y < -1.0, x < -1.0, y > height, x > width) - y = tvm.max(y, 0.0) - x = tvm.max(x, 0.0) + outside = tvm.tir.any(y < -1.0, x < -1.0, y > height, x > width) + y = tvm.te.max(y, 0.0) + x = tvm.te.max(x, 0.0) val = bilinear_sample_nchw(data, (i, c, y, x), height - 1, width - 1) - return tvm.if_then_else(outside, 0.0, val) + return tvm.tir.if_then_else(outside, 0.0, val) def _sample(i, c, ph, pw): roi = rois[i] @@ -74,27 +75,27 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): roi_end_w *= spatial_scale # force malformed ROIs to be 1x1 - roi_h = tvm.max(roi_end_h - roi_start_h, tvm.const(1.0, dtype)) - roi_w = tvm.max(roi_end_w - roi_start_w, tvm.const(1.0, dtype)) + roi_h = tvm.te.max(roi_end_h - roi_start_h, tvm.tir.const(1.0, dtype)) + roi_w = tvm.te.max(roi_end_w - roi_start_w, tvm.tir.const(1.0, dtype)) bin_h = roi_h / pooled_size_h bin_w = roi_w / pooled_size_w if sample_ratio > 0: - roi_bin_grid_h = roi_bin_grid_w = tvm.const(sample_ratio, 'int32') + roi_bin_grid_h = roi_bin_grid_w = tvm.tir.const(sample_ratio, 'int32') else: - roi_bin_grid_h = tvm.ceil(roi_h / pooled_size_h).astype('int32') - roi_bin_grid_w = tvm.ceil(roi_w / pooled_size_w).astype('int32') + roi_bin_grid_h = te.ceil(roi_h / pooled_size_h).astype('int32') + roi_bin_grid_w = te.ceil(roi_w / pooled_size_w).astype('int32') count = roi_bin_grid_h * roi_bin_grid_w - rh = tvm.reduce_axis((0, roi_bin_grid_h)) - rw = tvm.reduce_axis((0, roi_bin_grid_w)) + rh = te.reduce_axis((0, roi_bin_grid_h)) + rw = te.reduce_axis((0, roi_bin_grid_w)) roi_start_h += ph * bin_h roi_start_w += pw * bin_w - return tvm.sum(_bilinear(batch_index, c, - roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h, - roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w) / count, - axis=[rh, rw]) + return te.sum(_bilinear(batch_index, c, + roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h, + roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w) / count, + axis=[rh, rw]) - return tvm.compute((num_roi, channel, pooled_size_h, pooled_size_w), _sample, - tag='pool,roi_align_nchw') + return te.compute((num_roi, channel, pooled_size_h, pooled_size_w), _sample, + tag='pool,roi_align_nchw') diff --git a/topi/python/topi/vision/rcnn/roi_pool.py b/topi/python/topi/vision/rcnn/roi_pool.py index f346f58..a206f34 100644 --- a/topi/python/topi/vision/rcnn/roi_pool.py +++ b/topi/python/topi/vision/rcnn/roi_pool.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name """ROI pool operator""" import tvm +from tvm import te from ...util import get_const_tuple def roi_pool_nchw(data, rois, pooled_size, spatial_scale): @@ -24,10 +25,10 @@ def roi_pool_nchw(data, rois, pooled_size, spatial_scale): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, channel, height, width] - rois : tvm.Tensor + rois : tvm.te.Tensor 2-D with shape [num_roi, 5]. The last dimension should be in format of [batch_index, w_start, h_start, w_end, h_end] @@ -40,7 +41,7 @@ def roi_pool_nchw(data, rois, pooled_size, spatial_scale): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [num_roi, channel, pooled_size, pooled_size] """ dtype = rois.dtype @@ -57,36 +58,36 @@ def roi_pool_nchw(data, rois, pooled_size, spatial_scale): batch_index = roi[0].astype('int32') roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[3], roi[4] - roi_start_h = tvm.round(roi_start_h * spatial_scale).astype('int32') - roi_start_w = tvm.round(roi_start_w * spatial_scale).astype('int32') - roi_end_h = tvm.round(roi_end_h * spatial_scale).astype('int32') - roi_end_w = tvm.round(roi_end_w * spatial_scale).astype('int32') + roi_start_h = te.round(roi_start_h * spatial_scale).astype('int32') + roi_start_w = te.round(roi_start_w * spatial_scale).astype('int32') + roi_end_h = te.round(roi_end_h * spatial_scale).astype('int32') + roi_end_w = te.round(roi_end_w * spatial_scale).astype('int32') # force malformed ROIs to be 1x1 - roi_h = tvm.max(roi_end_h - roi_start_h + 1, tvm.const(1, 'int32')) - roi_w = tvm.max(roi_end_w - roi_start_w + 1, tvm.const(1, 'int32')) + roi_h = tvm.te.max(roi_end_h - roi_start_h + 1, tvm.tir.const(1, 'int32')) + roi_w = tvm.te.max(roi_end_w - roi_start_w + 1, tvm.tir.const(1, 'int32')) bin_h = roi_h.astype(dtype) / pooled_size_h bin_w = roi_w.astype(dtype) / pooled_size_w # use epsilon to prevent floating point precision loss in floor/ceil - epsilon = tvm.const(0.00001, dtype) - hstart = tvm.floor(ph * bin_h + epsilon).astype('int32') - wstart = tvm.floor(pw * bin_w + epsilon).astype('int32') - hend = tvm.ceil((ph + 1) * bin_h - epsilon).astype('int32') - wend = tvm.ceil((pw + 1) * bin_w - epsilon).astype('int32') - hstart = tvm.min(tvm.max(hstart + roi_start_h, 0), height) - wstart = tvm.min(tvm.max(wstart + roi_start_w, 0), width) - hend = tvm.min(tvm.max(hend + roi_start_h, 0), height) - wend = tvm.min(tvm.max(wend + roi_start_w, 0), width) + epsilon = tvm.tir.const(0.00001, dtype) + hstart = te.floor(ph * bin_h + epsilon).astype('int32') + wstart = te.floor(pw * bin_w + epsilon).astype('int32') + hend = te.ceil((ph + 1) * bin_h - epsilon).astype('int32') + wend = te.ceil((pw + 1) * bin_w - epsilon).astype('int32') + hstart = tvm.te.min(tvm.te.max(hstart + roi_start_h, 0), height) + wstart = tvm.te.min(tvm.te.max(wstart + roi_start_w, 0), width) + hend = tvm.te.min(tvm.te.max(hend + roi_start_h, 0), height) + wend = tvm.te.min(tvm.te.max(wend + roi_start_w, 0), width) - non_empty = tvm.all(hstart < hend, wstart < wend) - min_value = lambda dtype: tvm.if_then_else(non_empty, tvm.min_value(dtype), - tvm.const(0.0, dtype)) + non_empty = tvm.tir.all(hstart < hend, wstart < wend) + min_value = lambda dtype: tvm.tir.if_then_else( + non_empty, tvm.te.min_value(dtype), tvm.tir.const(0.0, dtype)) # pylint: disable=unnecessary-lambda - _max = tvm.comm_reducer(lambda x, y: tvm.max(x, y), min_value, name='max') - rh = tvm.reduce_axis((0, hend - hstart), 'rh') - rw = tvm.reduce_axis((0, wend - wstart), 'rw') + _max = te.comm_reducer(lambda x, y: tvm.te.max(x, y), min_value, name='max') + rh = te.reduce_axis((0, hend - hstart), 'rh') + rw = te.reduce_axis((0, wend - wstart), 'rw') return _max(data[batch_index, c, hstart+rh, wstart+rw], axis=[rh, rw]) - return tvm.compute((num_roi, channel, pooled_size_h, pooled_size_w), _pool, tag="pool,roi_pool") + return te.compute((num_roi, channel, pooled_size_h, pooled_size_w), _pool, tag="pool,roi_pool") diff --git a/topi/python/topi/vision/reorg.py b/topi/python/topi/vision/reorg.py index 3ba5e84..ec790fa 100644 --- a/topi/python/topi/vision/reorg.py +++ b/topi/python/topi/vision/reorg.py @@ -27,7 +27,7 @@ def reorg(data, stride): Parameters ---------- - Input : tvm.Tensor + Input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] stride : int @@ -35,7 +35,7 @@ def reorg(data, stride): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ return cpp.vision.reorg(data, stride) diff --git a/topi/python/topi/vision/ssd/multibox.py b/topi/python/topi/vision/ssd/multibox.py index 4309af4..8f287b9 100644 --- a/topi/python/topi/vision/ssd/multibox.py +++ b/topi/python/topi/vision/ssd/multibox.py @@ -16,11 +16,10 @@ # under the License. # pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, undefined-variable """SSD multibox operators""" -from __future__ import absolute_import as _abs import tvm from tvm import hybrid -from tvm.intrin import exp, sqrt +from tvm.tir import exp, sqrt import topi @@ -32,7 +31,7 @@ def hybrid_multibox_prior(data, sizes, ratios, steps, offsets): Parameters ---------- - data : tvm.Tensor or numpy NDArray + data : tvm.te.Tensor or numpy NDArray 4-D tensor with shape [batch, channel, height, width]] sizes : tvm ConsExpr @@ -49,7 +48,7 @@ def hybrid_multibox_prior(data, sizes, ratios, steps, offsets): Returns ------- - output : tvm.Tensor or numpy NDArray + output : tvm.te.Tensor or numpy NDArray 3-D tensor with shape [1, h_in * w_in * (num_sizes + num_ratios - 1), 4] """ in_height = data.shape[2] @@ -80,7 +79,7 @@ def hybrid_multibox_prior(data, sizes, ratios, steps, offsets): * sqrt(ratios[k - num_sizes + 1] * 1.0) / 2.0 h = sizes[0] / sqrt(ratios[k - num_sizes + 1] * 1.0) / 2.0 count = i * in_width * (num_sizes + num_ratios - 1) \ - + j * (num_sizes + num_ratios - 1) + k + + j * (num_sizes + num_ratios - 1) + k output[0, count, 0] = center_w - w output[0, count, 1] = center_h - h output[0, count, 2] = center_w + w @@ -94,7 +93,7 @@ def multibox_prior(data, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5, Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, c_in, h_in, w_in]] sizes : tuple of float @@ -114,11 +113,11 @@ def multibox_prior(data, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5, Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 3-D tensor with shape [1, h_in * w_in * (num_sizes + num_ratios - 1), 4] """ - out = hybrid_multibox_prior(data, tvm.convert(sizes), tvm.convert(ratios), - tvm.convert(steps), tvm.convert(offsets)) + out = hybrid_multibox_prior(data, tvm.runtime.convert(sizes), tvm.runtime.convert(ratios), + tvm.runtime.convert(steps), tvm.runtime.convert(offsets)) if clip: out = topi.clip(out, 0, 1) return out @@ -166,19 +165,19 @@ def hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, Parameters ---------- - cls_prob : tvm.Tensor or numpy NDArray + cls_prob : tvm.te.Tensor or numpy NDArray 3-D tensor of class probabilities. - loc_pred : tvm.Tensor or numpy NDArray + loc_pred : tvm.te.Tensor or numpy NDArray 2-D tensor of location regression predictions. - anchor : tvm.Tensor or numpy NDArray + anchor : tvm.te.Tensor or numpy NDArray 3-D tensor of prior anchor boxes. - clip : tvm.const + clip : tvm.tir.const Whether to clip out-of-boundary boxes. - threshold : tvm.const + threshold : tvm.tir.const Threshold to be a positive prediction. variances : tvm.nd.NDArray @@ -186,10 +185,10 @@ def hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, Returns ------- - out_loc : tvm.Tensor or numpy NDArray + out_loc : tvm.te.Tensor or numpy NDArray 3-D tensor of transformed location. - valid_count : tvm.Tensor or numpy NDArray + valid_count : tvm.te.Tensor or numpy NDArray 1_d tensor of valid counts for boxes. """ batch_size = cls_prob.shape[0] @@ -238,13 +237,13 @@ def multibox_transform_loc(cls_prob, loc_pred, anchor, clip=True, threshold=0.01 Parameters ---------- - cls_prob : tvm.Tensor + cls_prob : tvm.te.Tensor Class probabilities. - loc_pred : tvm.Tensor + loc_pred : tvm.te.Tensor Location regression predictions. - anchor : tvm.Tensor + anchor : tvm.te.Tensor Prior anchor boxes. clip : boolean @@ -258,12 +257,12 @@ def multibox_transform_loc(cls_prob, loc_pred, anchor, clip=True, threshold=0.01 Returns ------- - ret : tuple of tvm.Tensor + ret : tuple of tvm.te.Tensor """ return hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, - tvm.const(clip, "bool"), - tvm.const(threshold, "float32"), - tvm.convert(variances)) + tvm.tir.const(clip, "bool"), + tvm.tir.const(threshold, "float32"), + tvm.runtime.convert(variances)) def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nms_threshold=0.5, force_suppress=False, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=-1): @@ -271,13 +270,13 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm Parameters ---------- - cls_prob : tvm.Tensor + cls_prob : tvm.te.Tensor Class probabilities. - loc_pred : tvm.Tensor + loc_pred : tvm.te.Tensor Location regression predictions. - anchor : tvm.Tensor + anchor : tvm.te.Tensor Prior anchor boxes. clip : boolean @@ -300,7 +299,7 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm Returns ------- - out : tvm.Tensor + out : tvm.te.Tensor 3-D tensor with shape (batch_size, num_anchors, 6) """ inter_out = multibox_transform_loc(cls_prob, loc_pred, anchor, diff --git a/topi/python/topi/x86/batch_matmul.py b/topi/python/topi/x86/batch_matmul.py index a7cb9e9..539a918 100644 --- a/topi/python/topi/x86/batch_matmul.py +++ b/topi/python/topi/x86/batch_matmul.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name,too-many-locals,unused-variable """x86 batch_matmul operators""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity from tvm.contrib import cblas @@ -34,13 +33,13 @@ def batch_matmul(cfg, x, y): ---------- cfg : ConfigSpace Autotvm tuning space config file - x : tvm.Tensor + x : tvm.te.Tensor 3-D with shape [batch, M, K] - y : tvm.Tensor + y : tvm.te.Tensor 3-D with shape [batch, N, K] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 3-D with shape [batch, M, N] """ assert len(x.shape) == 3 and len( @@ -54,10 +53,10 @@ def batch_matmul(cfg, x, y): if cfg.is_fallback: _default_batch_matmul_config(cfg, M, N, K) - k = tvm.reduce_axis((0, K), name='k') - C = tvm.compute( + k = te.reduce_axis((0, K), name='k') + C = te.compute( (B, M, N), - lambda b, i, j: tvm.sum(x[b, i, k] * y[b, j, k], axis=k), + lambda b, i, j: te.sum(x[b, i, k] * y[b, j, k], axis=k), tag='batch_matmul') return C @@ -79,7 +78,7 @@ def schedule_batch_matmul(cfg, outs): sch: Schedule The computation schedule for the op. """ - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if "batch_matmul" in op.tag: @@ -140,13 +139,13 @@ def batch_matmul_cblas(cfg, x, y): ---------- cfg : ConfigSpace Autotvm tuning space config file - x : tvm.Tensor + x : tvm.te.Tensor 3-D with shape [batch, M, K] - y : tvm.Tensor + y : tvm.te.Tensor 3-D with shape [batch, N, K] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 3-D with shape [batch, M, N] """ assert len(x.shape) == 3 and len( diff --git a/topi/python/topi/x86/binarize_pack.py b/topi/python/topi/x86/binarize_pack.py index bab91a9..b4a01a5 100644 --- a/topi/python/topi/x86/binarize_pack.py +++ b/topi/python/topi/x86/binarize_pack.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name """Schedule for binarization and bit-packing.""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te def schedule_binarize_pack(outs): @@ -34,8 +33,8 @@ def schedule_binarize_pack(outs): s: Schedule The computation schedule for binarize_pack. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(Out): s[Out].parallel(Out.op.axis[0]) diff --git a/topi/python/topi/x86/binary_dense.py b/topi/python/topi/x86/binary_dense.py index ccf74e7..d90694e 100644 --- a/topi/python/topi/x86/binary_dense.py +++ b/topi/python/topi/x86/binary_dense.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, unused-argument """Schedule for binary dense operator.""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from .. import tag @@ -35,8 +34,8 @@ def schedule_binary_dense(outs): s: Schedule The computation schedule for binary_dense. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def _schedule(A, B, C): @@ -56,7 +55,7 @@ def schedule_binary_dense(outs): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule binary_dense elif OP.tag == 'binary_dense': diff --git a/topi/python/topi/x86/bitserial_conv2d.py b/topi/python/topi/x86/bitserial_conv2d.py index 2ec5653..37fe352 100644 --- a/topi/python/topi/x86/bitserial_conv2d.py +++ b/topi/python/topi/x86/bitserial_conv2d.py @@ -17,6 +17,7 @@ # pylint: disable=invalid-name,unused-variable,invalid-name """Bitserial conv2d schedule on x86""" import tvm +from tvm import te from tvm import autotvm from .. import tag from ..util import get_const_int, get_const_tuple @@ -94,40 +95,40 @@ def bitserial_conv2d_nchw(cfg, data, kernel, stride, padding, in_bits, weight_bi else: data_pad = data_q - data_vec = tvm.compute(dvshape, lambda n, h, w, ci, vh, vw, b: \ - data_pad[b][n][ci][h*VH*HSTR+vh][w*VW*WSTR+vw], name='data_vec') + data_vec = te.compute(dvshape, lambda n, h, w, ci, vh, vw, b: \ + data_pad[b][n][ci][h*VH*HSTR+vh][w*VW*WSTR+vw], name='data_vec') if len(kernel.shape) == 4: - kernel_vec = tvm.compute(kvshape, lambda co, ci, dh, dw, b, vc: \ - kernel_q[b][co*VC+vc][ci][dh][dw], name='kernel_vec') + kernel_vec = te.compute(kvshape, lambda co, ci, dh, dw, b, vc: \ + kernel_q[b][co*VC+vc][ci][dh][dw], name='kernel_vec') - ci = tvm.reduce_axis((0, CI), name='ci') - dh = tvm.reduce_axis((0, KH), name='dh') - dw = tvm.reduce_axis((0, KW), name='dw') - b1 = tvm.reduce_axis((0, IB), name='ib') - b2 = tvm.reduce_axis((0, KB), name='kb') + ci = te.reduce_axis((0, CI), name='ci') + dh = te.reduce_axis((0, KH), name='dh') + dw = te.reduce_axis((0, KW), name='dw') + b1 = te.reduce_axis((0, IB), name='ib') + b2 = te.reduce_axis((0, KB), name='kb') def _conv(n, co, h, w, vh, vw, vc): b1b2 = (b1+b2).astype(out_dtype) if unipolar: - return tvm.sum((tvm.popcount( + return te.sum((tvm.tir.popcount( data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1].astype(out_dtype) & kernel_vec[co, ci, dh, dw, b2, vc].astype(out_dtype)) - - tvm.popcount( - data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1].astype(out_dtype) - & ~kernel_vec[co, ci, dh, dw, b2, vc]).astype(out_dtype)) << b1b2, - axis=[ci, dh, dw, b1, b2]) + tvm.tir.popcount( + data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1].astype(out_dtype) + & ~kernel_vec[co, ci, dh, dw, b2, vc]).astype(out_dtype)) << b1b2, + axis=[ci, dh, dw, b1, b2]) - return tvm.sum((tvm.popcount( + return te.sum((tvm.tir.popcount( data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1] & kernel_vec[co, ci, dh, dw, b2, vc])).astype(out_dtype) << b1b2, - axis=[ci, dh, dw, b1, b2]) + axis=[ci, dh, dw, b1, b2]) - conv = tvm.compute(ovshape, _conv, name='conv_out') - idxd = tvm.indexdiv - idxm = tvm.indexmod + conv = te.compute(ovshape, _conv, name='conv_out') + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod - return tvm.compute( + return te.compute( oshape, lambda n, co, h, w: conv[n, idxd(co, VC), idxd(h, VH), idxd(w, VW), @@ -202,38 +203,38 @@ def bitserial_conv2d_nhwc(cfg, data, kernel, stride, padding, in_bits, weight_bi else: data_pad = data_q - data_vec = tvm.compute(dvshape, lambda n, h, w, vh, vw, ci, b: \ - data_pad[n][h*VH*HSTR+vh][w*VW*WSTR+vw][ci][b], name='data_vec') + data_vec = te.compute(dvshape, lambda n, h, w, vh, vw, ci, b: \ + data_pad[n][h*VH*HSTR+vh][w*VW*WSTR+vw][ci][b], name='data_vec') - kernel_vec = tvm.compute(kvshape, lambda co, dh, dw, ci, vc, b: \ - kernel_q[dh][dw][ci][co*VC+vc][b], name='kernel_vec') + kernel_vec = te.compute(kvshape, lambda co, dh, dw, ci, vc, b: \ + kernel_q[dh][dw][ci][co*VC+vc][b], name='kernel_vec') - ci = tvm.reduce_axis((0, CI), name='ci') - dh = tvm.reduce_axis((0, KH), name='dh') - dw = tvm.reduce_axis((0, KW), name='dw') - b1 = tvm.reduce_axis((0, IB), name='ib') - b2 = tvm.reduce_axis((0, KB), name='kb') + ci = te.reduce_axis((0, CI), name='ci') + dh = te.reduce_axis((0, KH), name='dh') + dw = te.reduce_axis((0, KW), name='dw') + b1 = te.reduce_axis((0, IB), name='ib') + b2 = te.reduce_axis((0, KB), name='kb') def _conv(n, h, w, co, vh, vw, vc): b1b2 = (b1+b2).astype(out_dtype) if unipolar: - return tvm.sum( - ((tvm.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1] & - kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype) - - tvm.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1]& - ~kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype)) << b1b2), + return te.sum( + ((tvm.tir.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1] & + kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype) - + tvm.tir.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1]& + ~kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype)) << b1b2), axis=[dh, dw, ci, b1, b2]) - return tvm.sum(tvm.popcount( + return te.sum(tvm.tir.popcount( data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1] & kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype) << b1b2, - axis=[dh, dw, ci, b1, b2]) + axis=[dh, dw, ci, b1, b2]) - conv = tvm.compute(ovshape, _conv, name='conv') + conv = te.compute(ovshape, _conv, name='conv') - idxd = tvm.indexdiv - idxm = tvm.indexmod - return tvm.compute( + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod + return te.compute( oshape, lambda n, h, w, co: conv[n, idxd(h, VH), idxd(w, VW), idxd(co, VC), @@ -250,7 +251,7 @@ def schedule_bitserial_conv2d_nhwc(cfg, outs): def _schedule_bitserial_conv2d(cfg, outs): """CPU schedule for bitserial convolutions NCHW and NHWC""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse(op): @@ -262,7 +263,7 @@ def _schedule_bitserial_conv2d(cfg, outs): s[op].compute_inline() for tensor in op.input_tensors: if tensor.op.input_tensors and (tensor.op not in scheduled_ops): - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) elif 'spatial_bitserial_conv_nchw' in op.tag or 'spatial_bitserial_conv_nhwc' in op.tag: @@ -273,7 +274,7 @@ def _schedule_bitserial_conv2d(cfg, outs): data_q = data_vec.op.input_tensors[0] data = data_q.op.input_tensors[0] data_pad = None - if isinstance(data_q.op, tvm.tensor.ComputeOp) and "pad" in data_q.op.tag: + if isinstance(data_q.op, tvm.te.ComputeOp) and "pad" in data_q.op.tag: data_pad = data_q data_q = data data = data_q.op.input_tensors[0] @@ -320,7 +321,7 @@ def _schedule_bitserial_conv2d_nchw(cfg, s, data_q, data_pad, data_vec, VH = cfg["tile_oh"].size[-1] VW = cfg["tile_ow"].size[-1] - ##### Schedule Data padding, and bitpacking + ##### Schedule Data padding, and bitpacking if data_pad is not None: s[data_pad].compute_inline() diff --git a/topi/python/topi/x86/bitserial_dense.py b/topi/python/topi/x86/bitserial_dense.py index d464cae..cbc6ac8 100644 --- a/topi/python/topi/x86/bitserial_dense.py +++ b/topi/python/topi/x86/bitserial_dense.py @@ -18,6 +18,7 @@ """Schedule for bitserial dense operator.""" from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from topi.util import get_const_int, get_const_tuple from .. import tag @@ -30,14 +31,14 @@ def bitserial_dense(cfg, data, weight, data_bits, weight_bits, pack_dtype='uint3 Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 2-D with shape [batch, in_dim] - weight : tvm.Tensor + weight : tvm.te.Tensor 2-D with shape [out_dim, in_dim] or 3-D with shape [out_dim, weight_bits, in_dim] Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 2-D with shape [batch, out_dim] """ data_packed = bitpack(data, data_bits, pack_axis=1, bit_axis=1, pack_type=pack_dtype) @@ -68,26 +69,26 @@ def bitserial_dense(cfg, data, weight, data_bits, weight_bits, pack_dtype='uint3 wvshape = (X//VX, WB, VX, K) oshape = (Y, X) - k = tvm.reduce_axis((0, K), name='k') - db = tvm.reduce_axis((0, DB), name='db') - wb = tvm.reduce_axis((0, WB), name='wb') + k = te.reduce_axis((0, K), name='k') + db = te.reduce_axis((0, DB), name='db') + wb = te.reduce_axis((0, WB), name='wb') # Tile data and weights - weight_vec = tvm.compute(wvshape, lambda xo, wb, vx, k: - weight_packed[xo*VX+vx][wb][k], name='weight_vec') + weight_vec = te.compute(wvshape, lambda xo, wb, vx, k: + weight_packed[xo*VX+vx][wb][k], name='weight_vec') - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod - matmul_unipolar = tvm.compute(oshape, lambda i, j: tvm.sum( - (tvm.popcount(weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k]) - - tvm.popcount(~weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k]) - ).astype(out_dtype) + matmul_unipolar = te.compute(oshape, lambda i, j: te.sum( + (tvm.tir.popcount(weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k]) - + tvm.tir.popcount(~weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k]) + ).astype(out_dtype) << (db+wb).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense_unipolar') - matmul = tvm.compute(oshape, lambda i, j: tvm.sum( - tvm.popcount(weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k] - ).astype(out_dtype) + matmul = te.compute(oshape, lambda i, j: te.sum( + tvm.tir.popcount(weight_vec[idxdiv(j, VX), wb, idxmod(j, VX), k] & data_packed[i, db, k] + ).astype(out_dtype) << (db+wb).astype(out_dtype), axis=[wb, db, k]), tag='bitserial_dense') # binary ops @@ -112,8 +113,8 @@ def schedule_bitserial_dense(cfg, outs): s: Schedule The computation schedule for bitserial_dense. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _schedule(cfg, s, data_vec, weight_vec, output): s[data_vec].parallel(s[data_vec].op.axis[0]) @@ -149,7 +150,7 @@ def schedule_bitserial_dense(cfg, outs): if op not in s.outputs: s[op].compute_inline() for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp): + if isinstance(tensor.op, tvm.te.ComputeOp): traverse(tensor.op) elif op.tag == 'bitserial_dense' or 'bitserial_dense_unipolar': diff --git a/topi/python/topi/x86/conv1d.py b/topi/python/topi/x86/conv1d.py index 70c2a68..1e30c9f 100644 --- a/topi/python/topi/x86/conv1d.py +++ b/topi/python/topi/x86/conv1d.py @@ -16,14 +16,13 @@ # under the License. # pylint: disable=invalid-name,unused-variable,unused-argument,invalid-name """Conv1D schedule on for Intel CPU""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from .. import tag def schedule_conv1d_ncw(outs): """Create schedule for tensors""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) output_op = outs[0].op scheduled_ops = [] @@ -40,18 +39,18 @@ def schedule_conv1d_ncw(outs): s[op].parallel(fused) s[op].vectorize(w) for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) if 'conv1d_ncw' in op.tag: conv = op.output(0) kernel = op.input_tensors[1] - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, te.tensor.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() data = op.input_tensors[0] data_pad = None - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, te.tensor.ComputeOp) and "pad" in data.op.tag: data_pad = data data = data_pad.op.input_tensors[0] @@ -77,7 +76,7 @@ def schedule_conv1d_ncw(outs): def schedule_conv1d_nwc(outs): """Create schedule for tensors""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) output_op = outs[0].op scheduled_ops = [] @@ -94,18 +93,18 @@ def schedule_conv1d_nwc(outs): s[op].parallel(fused) s[op].vectorize(c) for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) if 'conv1d_nwc' in op.tag: conv = op.output(0) kernel = op.input_tensors[1] - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, te.tensor.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() data = op.input_tensors[0] data_pad = None - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, te.tensor.ComputeOp) and "pad" in data.op.tag: data_pad = data data = data_pad.op.input_tensors[0] diff --git a/topi/python/topi/x86/conv2d.py b/topi/python/topi/x86/conv2d.py index 2403b01..81d848a 100644 --- a/topi/python/topi/x86/conv2d.py +++ b/topi/python/topi/x86/conv2d.py @@ -21,6 +21,7 @@ import logging import tvm +from tvm import te from tvm import autotvm from .. import nn from ..nn.conv2d import conv2d_infer_layout, _get_workload as _get_conv2d_workload @@ -39,11 +40,11 @@ def _get_default_config(cfg, data, kernel, strides, padding, out_dtype, is_depth """ static_data_shape = [] for dim in get_const_tuple(data.shape): - if isinstance(dim, tvm.expr.Var): + if isinstance(dim, tvm.tir.Var): static_data_shape.append(1) else: static_data_shape.append(dim) - data = tvm.placeholder(static_data_shape, dtype=data.dtype) + data = te.placeholder(static_data_shape, dtype=data.dtype) if is_depthwise: wkl = _get_depthwise_conv2d_workload(data, kernel, strides, padding, out_dtype) from .depthwise_conv2d import _fallback_schedule @@ -61,7 +62,7 @@ def _conv2d_infer_layout(workload, cfg): _, data, kernel, strides, padding, dilation, layout, _, dtype = workload batch_size, in_channel, in_height, in_width = data[1] out_channel, _, k_height, k_width = kernel[1] - idxdiv = tvm.indexdiv + idxdiv = tvm.tir.indexdiv pt, pl, pb, pr = get_pad_tuple(padding, (k_height, k_width)) out_height = idxdiv(in_height + pt + pb - k_height, strides[0]) + 1 @@ -75,20 +76,20 @@ def _conv2d_infer_layout(workload, cfg): def schedule_conv2d_nhwc(outs): """Create schedule for conv2d_nhwc""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) output_op = outs[0].op def _callback(op): if 'conv2d_nhwc' in op.tag: conv = op.output(0) kernel = op.input_tensors[1] - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() data = op.input_tensors[0] data_pad = None - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: data_pad = data data = data_pad.op.input_tensors[0] @@ -132,11 +133,11 @@ def _pack_data(cfg, data, kernel): ic_chunk = ic // ic_bn oc_chunk = oc // oc_bn - data = tvm.compute((n, ic_chunk, ih, iw, ic_bn), - lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], - name="data_vec") + data = te.compute((n, ic_chunk, ih, iw, ic_bn), + lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], + name="data_vec") - kernel = tvm.compute( + kernel = te.compute( (oc_chunk, ic_chunk, kh, kw, ic_bn, oc_bn), lambda occ, icc, k_h, k_w, icb, ocb: kernel[occ * oc_bn + ocb, icc * ic_bn + icb, k_h, k_w], @@ -176,9 +177,9 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, out_layo # If no config was set, we can fallback to default config. if cfg.is_fallback: - _get_default_config(cfg, tvm.placeholder((n, in_channel, ih, iw), dtype=data.dtype), - tvm.placeholder((num_filter, in_channel, kernel_height, kernel_width), - dtype=kernel.dtype), + _get_default_config(cfg, te.placeholder((n, in_channel, ih, iw), dtype=data.dtype), + te.placeholder((num_filter, in_channel, kernel_height, kernel_width), + dtype=kernel.dtype), strides, padding, out_dtype) # Pack data if raw 4-D data is provided. @@ -198,8 +199,8 @@ def conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, layout, out_layo @autotvm.register_topi_schedule("conv2d_NCHWc.x86") def schedule_conv2d_NCHWc(cfg, outs): """Create schedule for tensors""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'conv2d_NCHWc' in op.tag: diff --git a/topi/python/topi/x86/conv2d_alter_op.py b/topi/python/topi/x86/conv2d_alter_op.py index 377d815..5ee691b 100644 --- a/topi/python/topi/x86/conv2d_alter_op.py +++ b/topi/python/topi/x86/conv2d_alter_op.py @@ -20,6 +20,7 @@ import logging import tvm +from tvm import te from tvm import relay from tvm import autotvm from .conv2d import _get_default_config @@ -79,10 +80,10 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['out_layout'] = 'NCHW%dc' % oc_bn # Store altered operator's config - new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), - dtype=data_dtype) - new_kernel = tvm.placeholder((out_channel//oc_bn, in_channel//ic_bn, - kh, kw, ic_bn, oc_bn), dtype=kernel_tensor.dtype) + new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), + dtype=data_dtype) + new_kernel = te.placeholder((out_channel//oc_bn, in_channel//ic_bn, + kh, kw, ic_bn, oc_bn), dtype=kernel_tensor.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, new_attrs["data_layout"], new_attrs["out_layout"], out_dtype], topi_tmpl) @@ -118,15 +119,15 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['out_layout'] = 'NCHW%dc' % oc_bn # Store altered operator's config. - new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), - dtype=data_dtype) - new_kernel = tvm.placeholder((out_channel // oc_bn, - in_channel // ic_bn, - kh, - kw, - ic_bn // n_elems, - oc_bn, - n_elems), dtype=kernel_dtype) + new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), + dtype=data_dtype) + new_kernel = te.placeholder((out_channel // oc_bn, + in_channel // ic_bn, + kh, + kw, + ic_bn // n_elems, + oc_bn, + n_elems), dtype=kernel_dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, new_attrs['data_layout'], new_attrs['out_layout'], out_dtype], topi_tmpl) @@ -152,9 +153,9 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): new_attrs['out_layout'] = 'NCHW%dc' % oc_bn # Store altered operator's config. - new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), - dtype=data_dtype) - new_kernel = tvm.placeholder((out_channel//oc_bn, 1, kh, kw, 1, oc_bn), dtype=kernel_dtype) + new_data = te.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), + dtype=data_dtype) + new_kernel = te.placeholder((out_channel//oc_bn, 1, kh, kw, 1, oc_bn), dtype=kernel_dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, new_attrs['data_layout'], new_attrs['out_layout'], out_dtype], topi_tmpl) diff --git a/topi/python/topi/x86/conv2d_avx_1x1.py b/topi/python/topi/x86/conv2d_avx_1x1.py index 083fff4..432f8b2 100644 --- a/topi/python/topi/x86/conv2d_avx_1x1.py +++ b/topi/python/topi/x86/conv2d_avx_1x1.py @@ -18,6 +18,7 @@ """1x1 Conv2D schedule on for Intel CPU""" from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity @@ -65,7 +66,7 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): _, _, _, _, ic_bn = get_const_tuple(data_vec.shape) # schedule pad - if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \ + if isinstance(s[data_vec].op, tvm.te.ComputeOp) \ and "pad" in data_vec.op.tag: batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih) @@ -78,7 +79,7 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): # this part will be folded during Relay fold_constant pass. s[data_vec].pragma(s[data_vec].op.axis[0], "debug_skip_region") s[kernel_vec].pragma(s[kernel_vec].op.axis[0], "debug_skip_region") - elif isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and \ + elif isinstance(kernel_vec.op, tvm.te.ComputeOp) and \ kernel_vec.name == 'kernel_vec': # data and kernel are not pre-computed, schedule layout transform here. # this should only be used by x86 conv2d_nchw, which is for @@ -190,23 +191,23 @@ def _declaration_conv_nhwc_pack(cfg, Input, Filter, stride, padding, dilation, o # packing the Filter to let memory access be consecutive for AVX512 intrinsic # Done in pre-compute stage - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod packw_shape = (kernel_h, kernel_w, idxd(num_filter, 16), 16 * idxd(channel, 4), 4) - PackW = tvm.compute(packw_shape, - lambda a, b, c, d, e: - Filter[a, b, - c*16 + idxm(d, 16), - idxd(d, 16) * 4 + e], - name="packed_filter") - - rc = tvm.reduce_axis((0, in_channel), name='rc') - ry = tvm.reduce_axis((0, kernel_h), name='ry') - rx = tvm.reduce_axis((0, kernel_w), name='rx') - Output = tvm.compute( + PackW = te.compute(packw_shape, + lambda a, b, c, d, e: + Filter[a, b, + c*16 + idxm(d, 16), + idxd(d, 16) * 4 + e], + name="packed_filter") + + rc = te.reduce_axis((0, in_channel), name='rc') + ry = te.reduce_axis((0, kernel_h), name='ry') + rx = te.reduce_axis((0, kernel_w), name='rx') + Output = te.compute( (batch, out_height, out_width, out_channel), - lambda nn, yy, xx, ff: tvm.sum( + lambda nn, yy, xx, ff: te.sum( PaddedInput[nn, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w, rc].astype(out_dtype) * PackW[ry, rx, idxd(ff, 16), @@ -238,7 +239,7 @@ def _schedule_conv_nhwc_pack_int8(s, cfg, data, conv_out, last): ic_factor, oc_factor = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1] # schedule data A = data - if isinstance(s[A].op, tvm.tensor.ComputeOp): + if isinstance(s[A].op, tvm.te.ComputeOp): batch, ih, iw, ic = s[A].op.axis d_ic_chunk, d_ic_block = s[A].split(ic, factor=4) s[A].vectorize(d_ic_block) diff --git a/topi/python/topi/x86/conv2d_avx_common.py b/topi/python/topi/x86/conv2d_avx_common.py index 085d0ae..ebed14c 100644 --- a/topi/python/topi/x86/conv2d_avx_common.py +++ b/topi/python/topi/x86/conv2d_avx_common.py @@ -16,7 +16,6 @@ # under the License. # pylint: disable=invalid-name,unused-variable,unused-argument,invalid-name """Conv2D schedule on for Intel CPU""" -from __future__ import absolute_import as _abs import tvm from tvm import autotvm from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity @@ -89,7 +88,7 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): _, _, _, _, ic_bn = get_const_tuple(data_vec.shape) # schedule pad - if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \ + if isinstance(s[data_vec].op, tvm.te.ComputeOp) \ and "pad" in data_vec.op.tag: batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih) @@ -102,7 +101,7 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): # this part will be folded during Relay fold_constant pass. s[data_vec].pragma(s[data_vec].op.axis[0], "debug_skip_region") s[kernel_vec].pragma(s[kernel_vec].op.axis[0], "debug_skip_region") - elif isinstance(kernel_vec.op, tvm.tensor.ComputeOp) and \ + elif isinstance(kernel_vec.op, tvm.te.ComputeOp) and \ kernel_vec.name == 'kernel_vec': # data and kernel are not pre-computed, schedule layout transform here. # this should only be used by x86 conv2d_nchw, which is for diff --git a/topi/python/topi/x86/conv2d_int8.py b/topi/python/topi/x86/conv2d_int8.py index 64fe92b..4b11143 100644 --- a/topi/python/topi/x86/conv2d_int8.py +++ b/topi/python/topi/x86/conv2d_int8.py @@ -19,6 +19,7 @@ """Conv2D int8 schedule on x86""" import tvm +from tvm import te from tvm import autotvm from ..nn.conv2d import _get_workload as _get_conv2d_workload from .. import tag @@ -96,11 +97,11 @@ def _pack_data(cfg, data, kernel): ic_chunk = ic // ic_bn oc_chunk = oc // oc_bn - data = tvm.compute((n, ic_chunk, ih, iw, ic_bn), - lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], - name="data_vec") + data = te.compute((n, ic_chunk, ih, iw, ic_bn), + lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], + name="data_vec") - kernel = tvm.compute( + kernel = te.compute( (oc_chunk, ic_chunk, kh, kw, ic_bn//n_elems, oc_bn, n_elems), lambda occ, icc, k_h, k_w, icbc, ocb, icbb: kernel[occ * oc_bn + ocb, @@ -145,9 +146,9 @@ def conv2d_NCHWc_int8(cfg, data, kernel, strides, padding, # If no config was set, we can fallback to default config. if cfg.is_fallback: _get_default_config_int8( - cfg, tvm.placeholder((n, in_channel, ih, iw), dtype=data.dtype), - tvm.placeholder((num_filter, in_channel, kernel_height, kernel_width), - dtype=kernel.dtype), + cfg, te.placeholder((n, in_channel, ih, iw), dtype=data.dtype), + te.placeholder((num_filter, in_channel, kernel_height, kernel_width), + dtype=kernel.dtype), strides, padding, out_dtype) # Pack data if raw 4-D data is provided. @@ -168,7 +169,7 @@ def conv2d_NCHWc_int8(cfg, data, kernel, strides, padding, @autotvm.register_topi_schedule("conv2d_NCHWc_int8.x86") def schedule_conv2d_NCHWc_int8(cfg, outs): """Create schedule for tensors""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): """Traverse operators from computation graph""" @@ -192,7 +193,7 @@ def schedule_conv2d_NCHWc_int8(cfg, outs): @autotvm.register_topi_schedule("conv2d_nhwc_pack_int8.x86") def schedule_conv2d_nhwc_pack_int8(cfg, outs): """Create schedule for tensors""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) output_op = outs[0].op scheduled_ops = [] @@ -209,7 +210,7 @@ def schedule_conv2d_nhwc_pack_int8(cfg, outs): s[op].parallel(fused) s[op].vectorize(c) for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) if 'conv2d_nhwc_pack_int8' in op.tag: @@ -217,9 +218,9 @@ def schedule_conv2d_nhwc_pack_int8(cfg, outs): kernel = conv_out.op.input_tensors[1] data_vec = conv_out.op.input_tensors[0] data = data_vec.op.input_tensors[0] \ - if isinstance(data_vec.op, tvm.tensor.ComputeOp) and "pad" not in data_vec.op.tag \ + if isinstance(data_vec.op, te.tensor.ComputeOp) and "pad" not in data_vec.op.tag \ else data_vec - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, te.tensor.ComputeOp) and "pad" in data.op.tag: data_pad = data data = data_pad.op.input_tensors[0] diff --git a/topi/python/topi/x86/conv2d_transpose.py b/topi/python/topi/x86/conv2d_transpose.py index 71f47d6..f90edb5 100644 --- a/topi/python/topi/x86/conv2d_transpose.py +++ b/topi/python/topi/x86/conv2d_transpose.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name,unused-variable,unused-argument,no-member """Conv2D Transpose schedule on x86""" -import tvm +from tvm import te from ..util import traverse_inline from .. import nn from .conv2d import conv2d_nchw, schedule_conv2d_nchw @@ -30,7 +30,7 @@ def conv2d_transpose_nchw(data, kernel, strides, padding, out_dtype): def schedule_conv2d_transpose_nchw(outs): """Create schedule for tensors""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs s = schedule_conv2d_nchw(outs) def _callback(op): if 'unpack_nchwc' in op.tag: diff --git a/topi/python/topi/x86/conv3d.py b/topi/python/topi/x86/conv3d.py index 1e15650..989ec4c 100644 --- a/topi/python/topi/x86/conv3d.py +++ b/topi/python/topi/x86/conv3d.py @@ -19,6 +19,7 @@ """Conv3D operators""" from collections import namedtuple import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity from ..util import traverse_inline @@ -39,12 +40,12 @@ def conv3d_ndhwc(cfg, data, kernel, strides, padding, dilation, out_dtype): Parameters ---------- - input : tvm.Tensor + input : tvm.te.Tensor 5-D input data with shapes: [batch, in_channel, in_depth, in_height, in_width] for NCDHW layout [batch, in_depth, in_height, in_width, in_channel] for NDHWC layout - filter : tvm.Tensor + filter : tvm.te.Tensor 5-D filter with shape [kernel_depth, kernel_height, kernel_width, in_channels, out_channels] strides : int or a list/tuple of three ints @@ -58,7 +59,7 @@ def conv3d_ndhwc(cfg, data, kernel, strides, padding, dilation, out_dtype): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, out_depth, out_height, out_width, out_channel] for NDHWC layout 5-D with shape [batch, out_channel, out_depth, out_height, out_width] for NCDHW layout """ @@ -86,7 +87,7 @@ def schedule_conv3d_ndhwc(cfg, outs): s: Schedule The computation schedule for conv3d. """ - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _traverse(op): if 'conv3d_ndhwc' in op.tag: @@ -94,12 +95,12 @@ def schedule_conv3d_ndhwc(cfg, outs): conv_out = op.input_tensors[0] kernel_vec = conv_out.op.input_tensors[1] kernel = kernel_vec.op.input_tensors[0] - if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: + if isinstance(kernel.op, tvm.te.ComputeOp) and "dilate" in kernel.op.tag: s[kernel].compute_inline() data_vec = conv_out.op.input_tensors[0] data = data_vec.op.input_tensors[0] data_pad = None - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: data_pad = data data = data_pad.op.input_tensors[0] @@ -154,47 +155,47 @@ def _conv3d_ndhwc(cfg, data, kernel, strides, padding, dilation, out_dtype): # fetch schedule ic_bn, oc_bn = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1] shape = (batch_size, in_channel // ic_bn, pad_depth, pad_height, ic_bn, pad_width) - data_vec = tvm.compute(shape, - lambda n, C, d, h, c, w: data_pad[n, d, h, w, C * ic_bn + c], - name='data_vec') + data_vec = te.compute(shape, + lambda n, C, d, h, c, w: data_pad[n, d, h, w, C * ic_bn + c], + name='data_vec') # pack kernel shape = (num_filter//oc_bn, in_channel//ic_bn, kernel_depth, kernel_height, kernel_width, ic_bn, oc_bn) - kernel_vec = tvm.compute(shape, - lambda CO, CI, d, h, w, ci, co: - kernel[d, h, w, CI * ic_bn + ci, CO * oc_bn + co], - name='kernel_vec') + kernel_vec = te.compute(shape, + lambda CO, CI, d, h, w, ci, co: + kernel[d, h, w, CI * ic_bn + ci, CO * oc_bn + co], + name='kernel_vec') # convolution oshape = (batch_size, num_filter//oc_bn, out_depth, out_height, out_width, oc_bn) unpack_shape = (batch_size, out_depth, out_height, out_width, num_filter) - ic = tvm.reduce_axis((0, in_channel), name='ic') - kh = tvm.reduce_axis((0, kernel_height), name='kh') - kw = tvm.reduce_axis((0, kernel_width), name='kw') - kd = tvm.reduce_axis((0, kernel_depth), name='kd') - idxmod = tvm.indexmod - idxdiv = tvm.indexdiv - - conv = tvm.compute(oshape, lambda n, oc_chunk, od, oh, ow, oc_block: - tvm.sum(data_vec[n, - idxdiv(ic, ic_bn), - od*DSTR+kd*dilation_d, - oh*HSTR+kh*dilation_h, + ic = te.reduce_axis((0, in_channel), name='ic') + kh = te.reduce_axis((0, kernel_height), name='kh') + kw = te.reduce_axis((0, kernel_width), name='kw') + kd = te.reduce_axis((0, kernel_depth), name='kd') + idxmod = tvm.tir.indexmod + idxdiv = tvm.tir.indexdiv + + conv = te.compute(oshape, lambda n, oc_chunk, od, oh, ow, oc_block: + te.sum(data_vec[n, + idxdiv(ic, ic_bn), + od*DSTR+kd*dilation_d, + oh*HSTR+kh*dilation_h, + idxmod(ic, ic_bn), + ow*WSTR+kw*dilation_w].astype(out_dtype) * + kernel_vec[oc_chunk, idxdiv(ic, ic_bn), kd, kh, kw, idxmod(ic, ic_bn), - ow*WSTR+kw*dilation_w].astype(out_dtype) * - kernel_vec[oc_chunk, idxdiv(ic, ic_bn), kd, kh, kw, - idxmod(ic, ic_bn), - oc_block].astype(out_dtype), - axis=[kd, kh, kw, ic]), name='conv') - conv_unpacked = tvm.compute(unpack_shape, - lambda n, d, h, w, c: conv[n, idxdiv(c, oc_bn), - d, h, w, - idxmod(c, oc_bn)] - .astype(out_dtype), - name='output_unpack', - tag='conv3d_ndhwc') + oc_block].astype(out_dtype), + axis=[kd, kh, kw, ic]), name='conv') + conv_unpacked = te.compute(unpack_shape, + lambda n, d, h, w, c: conv[n, idxdiv(c, oc_bn), + d, h, w, + idxmod(c, oc_bn)] + .astype(out_dtype), + name='output_unpack', + tag='conv3d_ndhwc') return conv_unpacked @@ -231,11 +232,11 @@ def _get_default_config(cfg, data, kernel, strides, padding, out_dtype, layout): static_data_shape = [] for dim in get_const_tuple(data.shape): - if isinstance(dim, tvm.expr.Var): + if isinstance(dim, tvm.tir.Var): static_data_shape.append(1) else: static_data_shape.append(dim) - data = tvm.placeholder(static_data_shape, dtype=data.dtype) + data = te.placeholder(static_data_shape, dtype=data.dtype) wkl = _get_conv3d_workload(data, kernel, strides, padding, out_dtype, layout) _fallback_schedule(cfg, wkl) diff --git a/topi/python/topi/x86/dense.py b/topi/python/topi/x86/dense.py index ea89cf4..3e99d06 100644 --- a/topi/python/topi/x86/dense.py +++ b/topi/python/topi/x86/dense.py @@ -18,6 +18,7 @@ """x86 dense operators""" from __future__ import absolute_import as _abs import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity from tvm.contrib import cblas @@ -79,11 +80,11 @@ def _schedule_dense_nopack_template(cfg, s, C): def _default_dense_pack_config(cfg, M, N, K): # Generate default schedule for dynamic shape. - if isinstance(M, tvm.expr.Var): + if isinstance(M, tvm.tir.Var): M = 16 - if isinstance(N, tvm.expr.Var): + if isinstance(N, tvm.tir.Var): N = 16 - if isinstance(K, tvm.expr.Var): + if isinstance(K, tvm.tir.Var): K = 16 vec_width = get_fp32_len() @@ -116,11 +117,11 @@ def _default_dense_pack_config(cfg, M, N, K): def _default_dense_nopack_config(cfg, M, N, K): # Generate default schedule for dynamic shape. - if isinstance(M, tvm.expr.Var): + if isinstance(M, tvm.tir.Var): M = 16 - if isinstance(N, tvm.expr.Var): + if isinstance(N, tvm.tir.Var): N = 16 - if isinstance(K, tvm.expr.Var): + if isinstance(K, tvm.tir.Var): K = 16 vec_width = get_fp32_len() @@ -141,33 +142,33 @@ def dense_nopack(cfg, data, weight, bias=None, out_dtype=None): M, K = get_const_tuple(data.shape) N, _ = get_const_tuple(weight.shape) # create tuning space - cfg.define_split("tile_y", 32 if isinstance(M, tvm.expr.Var) else M, num_outputs=2) - cfg.define_split("tile_x", 32 if isinstance(N, tvm.expr.Var) else N, num_outputs=2) - cfg.define_split("tile_k", 32 if isinstance(K, tvm.expr.Var) else K, num_outputs=2) + cfg.define_split("tile_y", 32 if isinstance(M, tvm.tir.Var) else M, num_outputs=2) + cfg.define_split("tile_x", 32 if isinstance(N, tvm.tir.Var) else N, num_outputs=2) + cfg.define_split("tile_k", 32 if isinstance(K, tvm.tir.Var) else K, num_outputs=2) if cfg.is_fallback: _default_dense_nopack_config(cfg, M, N, K) vec = cfg["tile_k"].size[-1] - k = tvm.reduce_axis((0, K // vec), "k") - CC = tvm.compute((M, N, vec), - lambda z, y, x: tvm.sum( - data[z, k * vec + x].astype(out_dtype) * - weight[y, k * vec + x].astype(out_dtype), axis=k)) - - kk = tvm.reduce_axis((0, vec), "kk") - C = tvm.compute((M, N), - lambda y, x: tvm.sum(CC[y, x, kk], axis=kk), - tag="dense_nopack") + k = te.reduce_axis((0, K // vec), "k") + CC = te.compute((M, N, vec), + lambda z, y, x: te.sum( + data[z, k * vec + x].astype(out_dtype) * + weight[y, k * vec + x].astype(out_dtype), axis=k)) + + kk = te.reduce_axis((0, vec), "kk") + C = te.compute((M, N), + lambda y, x: te.sum(CC[y, x, kk], axis=kk), + tag="dense_nopack") if bias is not None: - C = tvm.compute((M, N), lambda i, j: C[i, j] + bias[j].astype(out_dtype), - tag=tag.BROADCAST) + C = te.compute((M, N), lambda i, j: C[i, j] + bias[j].astype(out_dtype), + tag=tag.BROADCAST) return C @autotvm.register_topi_schedule("dense_nopack.x86") def schedule_dense_nopack(cfg, outs): """Create the schedule for dense_nopack""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if 'dense_nopack' in op.tag: @@ -191,27 +192,27 @@ def dense_pack(cfg, data, weight, bias=None, out_dtype=None): packw_bn = cfg["tile_x"].size[-1] packw_shape = (N // packw_bn, K, packw_bn) - packw = tvm.compute(packw_shape, - lambda z, y, x: weight[z * packw_bn + x, y], name="packed_weight") - - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod - k = tvm.reduce_axis((0, K), name="k") - C = tvm.compute((M, N), - lambda y, x: tvm.sum( - data[y, k].astype(out_dtype) * - packw[idxdiv(x, packw_bn), k, idxmod(x, packw_bn)].astype(out_dtype), - axis=k), - tag="dense_pack") + packw = te.compute(packw_shape, + lambda z, y, x: weight[z * packw_bn + x, y], name="packed_weight") + + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod + k = te.reduce_axis((0, K), name="k") + C = te.compute((M, N), + lambda y, x: te.sum( + data[y, k].astype(out_dtype) * + packw[idxdiv(x, packw_bn), k, idxmod(x, packw_bn)].astype(out_dtype), + axis=k), + tag="dense_pack") if bias is not None: - C = tvm.compute((M, N), lambda i, j: C[i, j] + bias[j].astype(out_dtype), - tag=tag.BROADCAST) + C = te.compute((M, N), lambda i, j: C[i, j] + bias[j].astype(out_dtype), + tag=tag.BROADCAST) return C @autotvm.register_topi_schedule("dense_pack.x86") def schedule_dense_pack(cfg, outs): """Create the schedule for dense_pack""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): if "dense_pack" in op.tag: @@ -227,8 +228,8 @@ def dense_cblas(cfg, data, weight, bias=None, out_dtype=None): cfg.add_flop(M * K * N * 2) C = cblas.matmul(data, weight, False, True) if bias is not None: - C = tvm.compute(C.shape, lambda i, j: C[i, j] + bias[j].astype(out_dtype), - tag=tag.BROADCAST) + C = te.compute(C.shape, lambda i, j: C[i, j] + bias[j].astype(out_dtype), + tag=tag.BROADCAST) return C @autotvm.register_topi_schedule("dense_cblas.x86") diff --git a/topi/python/topi/x86/depthwise_conv2d.py b/topi/python/topi/x86/depthwise_conv2d.py index 70b30fe..fda964e 100644 --- a/topi/python/topi/x86/depthwise_conv2d.py +++ b/topi/python/topi/x86/depthwise_conv2d.py @@ -18,6 +18,7 @@ # pylint: disable=no-value-for-parameter """Depthwise Conv2D schedule on x86""" import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity from ..nn.pad import pad @@ -87,11 +88,11 @@ def _pack_data(cfg, data, kernel): ic_chunk = ic // ic_bn oc_chunk = oc // oc_bn - data = tvm.compute((n, ic_chunk, ih, iw, ic_bn), - lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], - name="data_vec") + data = te.compute((n, ic_chunk, ih, iw, ic_bn), + lambda bs, c, h, w, vc: data[bs, c*ic_bn + vc, h, w], + name="data_vec") - kernel = tvm.compute( + kernel = te.compute( (oc_chunk, 1, kh, kw, 1, oc_bn), lambda occ, icc, k_h, k_w, icb, ocb: kernel[(occ * oc_bn + ocb) // cm, @@ -135,9 +136,9 @@ def depthwise_conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, # get workload and related schedule config wkl = _get_workload( - tvm.placeholder((batch, in_channel, in_height, in_width), dtype=data.dtype), - tvm.placeholder((out_channel, channel_multiplier, filter_height, filter_width), - dtype=kernel.dtype), + te.placeholder((batch, in_channel, in_height, in_width), dtype=data.dtype), + te.placeholder((out_channel, channel_multiplier, filter_height, filter_width), + dtype=kernel.dtype), strides, padding, out_dtype) if cfg.is_fallback: _fallback_schedule(cfg, wkl) @@ -160,14 +161,14 @@ def depthwise_conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, data_pad = data # depthconv stage - idxdiv = tvm.indexdiv - idxmod = tvm.indexmod + idxdiv = tvm.tir.indexdiv + idxmod = tvm.tir.indexmod - kh = tvm.reduce_axis((0, filter_height), name='kh') - kw = tvm.reduce_axis((0, filter_width), name='kw') - Output = tvm.compute( + kh = te.reduce_axis((0, filter_height), name='kh') + kw = te.reduce_axis((0, filter_width), name='kw') + Output = te.compute( (batch, out_channel_chunk, out_height, out_width, out_channel_block), - lambda b, oco, oh, ow, oci: tvm.sum( + lambda b, oco, oh, ow, oci: te.sum( (data_pad[ b, idxdiv(idxdiv(oco * out_channel_block + oci, channel_multiplier), in_channel_block), @@ -182,8 +183,8 @@ def depthwise_conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation, @autotvm.register_topi_schedule("depthwise_conv2d_NCHWc.x86") def schedule_depthwise_conv2d_NCHWc(cfg, outs): """CPU schedule for depthwise conv2d in NCHW[x]c layout""" - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) def _callback(op): """Traverse operators from computation graph""" @@ -199,7 +200,7 @@ def schedule_depthwise_conv2d_NCHWc(cfg, outs): def _schedule_depthwise_conv2d_NCHWc_impl(s, cfg, data_vec, kernel_vec, conv_out, output): tile_ow, oc_bn = cfg["tile_ow"].size[-1], cfg["tile_oc"].size[-1] # schedule pad - if isinstance(s[data_vec].op, tvm.tensor.ComputeOp) \ + if isinstance(s[data_vec].op, tvm.te.ComputeOp) \ and "pad" in data_vec.op.tag: batch, ic_chunk, ih, iw, ic_block = s[data_vec].op.axis parallel_axis = s[data_vec].fuse(batch, ic_chunk, ih) diff --git a/topi/python/topi/x86/injective.py b/topi/python/topi/x86/injective.py index 375827b..7c37ac7 100644 --- a/topi/python/topi/x86/injective.py +++ b/topi/python/topi/x86/injective.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name """x86 declaration and schedules.""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te from ..util import is_empty_shape def schedule_injective_from_existing(sch, out): @@ -65,10 +64,10 @@ def schedule_injective(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs x = outs[0] - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + s = te.create_schedule([x.op for x in outs]) + te.schedule.AutoInlineInjective(s) if not is_empty_shape(x.shape): schedule_injective_from_existing(s, x) @@ -104,10 +103,10 @@ def schedule_concatenate(outs): _, inner_i = sch[tensor].split(inner_axis, split_factor) sch[tensor].vectorize(inner_i) - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs x = outs[0] - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineInjective(s) + s = te.create_schedule([x.op for x in outs]) + te.schedule.AutoInlineInjective(s) if len(s[x].op.axis) >= 5: fused = s[x].fuse(s[x].op.axis[0], s[x].op.axis[1], s[x].op.axis[2]) vectorize(s, x, 64) diff --git a/topi/python/topi/x86/nn.py b/topi/python/topi/x86/nn.py index 3d57b6b..8f884b8 100644 --- a/topi/python/topi/x86/nn.py +++ b/topi/python/topi/x86/nn.py @@ -16,8 +16,7 @@ # under the License. # pylint: disable=invalid-name,too-many-locals,unused-variable """x86 nn operators""" -from __future__ import absolute_import as _abs -import tvm +from tvm import te def schedule_softmax(outs): """Schedule for softmax @@ -33,9 +32,9 @@ def schedule_softmax(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs softmax = outs[0] - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) op_tag = softmax.op.tag if op_tag == 'softmax_output': diff --git a/topi/python/topi/x86/pooling.py b/topi/python/topi/x86/pooling.py index a8251dd..f7664d9 100644 --- a/topi/python/topi/x86/pooling.py +++ b/topi/python/topi/x86/pooling.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable """Schedule for pooling operators""" -import tvm +from tvm import te from .. import tag def _parallel_sch(sch, oshape, do_vectorize=False): @@ -75,12 +75,12 @@ def schedule_pool(outs, layout): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def _schedule(PaddedInput, Pool): - if isinstance(PaddedInput.op, tvm.tensor.ComputeOp): + if isinstance(PaddedInput.op, te.tensor.ComputeOp): s[PaddedInput].compute_inline() do_vectorize = layout[-1] not in "HWhw" _parallel_sch(s[Pool], outs[0].shape, do_vectorize) @@ -92,7 +92,7 @@ def schedule_pool(outs, layout): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule pool elif OP.tag.startswith('pool'): @@ -129,8 +129,8 @@ def schedule_adaptive_pool(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse(OP): @@ -140,7 +140,7 @@ def schedule_adaptive_pool(outs): if OP not in s.outputs: s[OP].compute_inline() for tensor in OP.input_tensors: - if isinstance(tensor.op, tvm.tensor.ComputeOp) and tensor.op not in scheduled_ops: + if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops: traverse(tensor.op) # schedule pool elif OP.tag.startswith('adaptive_pool'): diff --git a/topi/python/topi/x86/reduction.py b/topi/python/topi/x86/reduction.py index b9dd4d4..0dfc3f2 100644 --- a/topi/python/topi/x86/reduction.py +++ b/topi/python/topi/x86/reduction.py @@ -16,8 +16,8 @@ # under the License. # pylint: disable=invalid-name """x86 declaration and schedules.""" -from __future__ import absolute_import as _abs import tvm +from tvm import te from .injective import schedule_injective_from_existing from .. import tag from ..util import get_const_tuple @@ -72,13 +72,13 @@ def schedule_reduce(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - sch = tvm.create_schedule([x.op for x in outs]) + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + sch = te.create_schedule([x.op for x in outs]) scheduled_ops = [] def traverse_before_reduce(operator): """Internal traverse function""" - if isinstance(operator, tvm.tensor.PlaceholderOp): + if isinstance(operator, tvm.te.PlaceholderOp): return if tag.is_injective(operator.tag): sch[operator].compute_inline() @@ -108,7 +108,7 @@ def schedule_reduce(outs): for tensor in input_tensors: if tensor.op not in scheduled_ops: traverse_before_reduce(tensor.op) - elif isinstance(operator, tvm.tensor.PlaceholderOp): + elif isinstance(operator, tvm.te.PlaceholderOp): pass else: raise RuntimeError("Unsupported operator: %s (tag: %s)" % (operator, operator.tag)) diff --git a/topi/python/topi/x86/roi_align.py b/topi/python/topi/x86/roi_align.py index 203c3dd..205d709 100644 --- a/topi/python/topi/x86/roi_align.py +++ b/topi/python/topi/x86/roi_align.py @@ -30,32 +30,32 @@ def roi_align_nchw_ir(data, rois, w_pc, pos_pc, pooled_size, spatial_scale, samp Parameters ---------- - data : tvm.Tensor or numpy NDArray + data : tvm.te.Tensor or numpy NDArray 4-D with shape [batch, channel, height, width] - rois : tvm.Tensor or numpy NDArray + rois : tvm.te.Tensor or numpy NDArray 2-D with shape [num_roi, 5]. The last dimension should be in format of [batch_index, w_start, h_start, w_end, h_end] - w_pc : tvm.Tensor or numpy NDArray + w_pc : tvm.te.Tensor or numpy NDArray 3-D weight pre-calculation buffer - pos_pc : tvm.Tensor or numpy NDArray + pos_pc : tvm.te.Tensor or numpy NDArray 3-D position pre-calculation buffer pooled_size : tvm ConsExpr [out_height, out_width] - spatial_scale : tvm.const + spatial_scale : tvm.tir.const Ratio of input feature map height (or w) to raw image height (or w). Equals the reciprocal of total stride in convolutional layers, which should be in range (0.0, 1.0] - sample_ratio : tvm.const + sample_ratio : tvm.tir.const Sampling ratio of ROI align, using adaptive size by default. Returns ------- - output : tvm.Tensor or numpy NDArray + output : tvm.te.Tensor or numpy NDArray 4-D with shape [num_roi, channel, pooled_size, pooled_size] """ channels = data.shape[1] @@ -161,21 +161,21 @@ def roi_align_nchw_ir(data, rois, w_pc, pos_pc, pooled_size, spatial_scale, samp for iy in range(roi_bin_grid_h): for ix in range(roi_bin_grid_w): output_val += w_pc[n, pre_calc_index, 0] \ - * data[roi_batch_index, c, - pos_pc[n, pre_calc_index, 2], - pos_pc[n, pre_calc_index, 0]] \ - + w_pc[n, pre_calc_index, 1] \ - * data[roi_batch_index, c, - pos_pc[n, pre_calc_index, 2], - pos_pc[n, pre_calc_index, 1]] \ - + w_pc[n, pre_calc_index, 2] \ - * data[roi_batch_index, c, - pos_pc[n, pre_calc_index, 3], - pos_pc[n, pre_calc_index, 0]] \ - + w_pc[n, pre_calc_index, 3] \ - * data[roi_batch_index, c, - pos_pc[n, pre_calc_index, 3], - pos_pc[n, pre_calc_index, 1]] + * data[roi_batch_index, c, + pos_pc[n, pre_calc_index, 2], + pos_pc[n, pre_calc_index, 0]] \ + + w_pc[n, pre_calc_index, 1] \ + * data[roi_batch_index, c, + pos_pc[n, pre_calc_index, 2], + pos_pc[n, pre_calc_index, 1]] \ + + w_pc[n, pre_calc_index, 2] \ + * data[roi_batch_index, c, + pos_pc[n, pre_calc_index, 3], + pos_pc[n, pre_calc_index, 0]] \ + + w_pc[n, pre_calc_index, 3] \ + * data[roi_batch_index, c, + pos_pc[n, pre_calc_index, 3], + pos_pc[n, pre_calc_index, 1]] pre_calc_index += 1 output_val /= count @@ -189,10 +189,10 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): Parameters ---------- - data : tvm.Tensor + data : tvm.te.Tensor 4-D with shape [batch, channel, height, width] - rois : tvm.Tensor + rois : tvm.te.Tensor 2-D with shape [num_roi, 5]. The last dimension should be in format of [batch_index, w_start, h_start, w_end, h_end] @@ -208,7 +208,7 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 4-D with shape [num_roi, channel, pooled_size, pooled_size] """ if not isinstance(pooled_size, (tuple, list)): @@ -226,8 +226,8 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): w_pc_buffer = full(max_pc_shape, data.dtype, 0) pos_pc_buffer = full(max_pc_shape, "int32", 0) - pooled_size = tvm.convert(pooled_size) - spatial_scale = tvm.const(spatial_scale, "float32") - sample_ratio = tvm.const(sample_ratio, "int32") + pooled_size = tvm.runtime.convert(pooled_size) + spatial_scale = tvm.tir.const(spatial_scale, "float32") + sample_ratio = tvm.tir.const(sample_ratio, "int32") return roi_align_nchw_ir(data, rois, w_pc_buffer, pos_pc_buffer, pooled_size, spatial_scale, sample_ratio) diff --git a/topi/python/topi/x86/sparse.py b/topi/python/topi/x86/sparse.py index 898d0e5..54a5af9 100644 --- a/topi/python/topi/x86/sparse.py +++ b/topi/python/topi/x86/sparse.py @@ -16,7 +16,7 @@ # under the License. """sparse_dense schedule on x86""" -import tvm +from tvm import te from ..util import traverse_inline, get_const_int from .util import get_fp32_len @@ -24,7 +24,7 @@ from .util import get_fp32_len def schedule_sparse_dense(outs): """Create schedule for sparse dense""" - s = tvm.create_schedule([x.op for x in outs]) + s = te.create_schedule([x.op for x in outs]) def _callback(op): simd_width = get_fp32_len() diff --git a/topi/python/topi/x86/tensor_intrin.py b/topi/python/topi/x86/tensor_intrin.py index dc9e145..955b6b4 100644 --- a/topi/python/topi/x86/tensor_intrin.py +++ b/topi/python/topi/x86/tensor_intrin.py @@ -17,6 +17,7 @@ """Core kernel of dot product of 4 Int8 operations""" #pylint: disable=invalid-name import tvm +from tvm import te import tvm.target.codegen @@ -25,7 +26,7 @@ def dot_16x1x16_uint8_int8_int32(): mcpu = tvm.target.Target.current().mcpu assert mcpu in ("skylake-avx512", "cascadelake"), \ - "An old Intel machine that does not have fast Int8 support." + "An old Intel machine that does not have fast Int8 support." if mcpu == "skylake-avx512": return dot_16x1x16_uint8_int8_int32_skylake() # cascadelake @@ -63,43 +64,43 @@ def dot_16x1x16_uint8_int8_int32_skylake(): int32_lanes = 16 # 16 int32 lanes in AVX512 num_int8_elements = 4 # 4 int8 elements in int32 - data = tvm.placeholder((num_int8_elements,), dtype='uint8', name='data') - kernel = tvm.placeholder((int32_lanes, num_int8_elements), dtype='int8', name='kernel') - k = tvm.reduce_axis((0, num_int8_elements), name='k') - C = tvm.compute((int32_lanes,), - lambda i: tvm.sum(data[k].astype('int32') * - kernel[i, k].astype('int32'), - axis=k), - name="C") - - a_buffer = tvm.decl_buffer(data.shape, dtype='uint8', name="a_buffer", - offset_factor=1, - strides=[1]) - b_buffer = tvm.decl_buffer(kernel.shape, dtype='int8', name="b_buffer", - offset_factor=1, - strides=[tvm.var('ldw'), 1]) + data = te.placeholder((num_int8_elements,), dtype='uint8', name='data') + kernel = te.placeholder((int32_lanes, num_int8_elements), dtype='int8', name='kernel') + k = te.reduce_axis((0, num_int8_elements), name='k') + C = te.compute((int32_lanes,), + lambda i: te.sum(data[k].astype('int32') * + kernel[i, k].astype('int32'), + axis=k), + name="C") + + a_buffer = tvm.tir.decl_buffer(data.shape, dtype='uint8', name="a_buffer", + offset_factor=1, + strides=[1]) + b_buffer = tvm.tir.decl_buffer(kernel.shape, dtype='int8', name="b_buffer", + offset_factor=1, + strides=[te.var('ldw'), 1]) def _intrin_func(ins, outs): def _instr(index): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() if index == 1: - ib.emit(outs[0].vstore(0, tvm.const(0, 'int32x16'))) + ib.emit(outs[0].vstore(0, tvm.tir.const(0, 'int32x16'))) return ib.get() a_int8 = ins[0].vload([0], "uint8x4") - re_int32 = tvm.call_pure_intrin('int32', 'reinterpret', a_int8) + re_int32 = tvm.tir.call_pure_intrin('int32', 'reinterpret', a_int8) vec_ai32 = re_int32.astype('int32x16') - vec_a = tvm.call_pure_intrin('int8x64', 'reinterpret', vec_ai32) + vec_a = tvm.tir.call_pure_intrin('int8x64', 'reinterpret', vec_ai32) vec_b = ins[1].vload([0, 0], "int8x64") - vec_one = tvm.const(1, "int16x32") - pair_reduction = tvm.call_llvm_intrin('int16x32', - 'llvm.x86.avx512.pmaddubs.w.512', - tvm.const(0, 'uint32'), - vec_a, vec_b) - quad_reduction = tvm.call_llvm_intrin('int32x16', - 'llvm.x86.avx512.pmaddw.d.512', - tvm.const(0, 'uint32'), - pair_reduction, vec_one) + vec_one = tvm.tir.const(1, "int16x32") + pair_reduction = tvm.tir.call_llvm_intrin('int16x32', + 'llvm.x86.avx512.pmaddubs.w.512', + tvm.tir.const(0, 'uint32'), + vec_a, vec_b) + quad_reduction = tvm.tir.call_llvm_intrin('int32x16', + 'llvm.x86.avx512.pmaddw.d.512', + tvm.tir.const(0, 'uint32'), + pair_reduction, vec_one) if index == 0: ib.emit(outs[0].vstore(0, quad_reduction)) else: @@ -109,8 +110,8 @@ def dot_16x1x16_uint8_int8_int32_skylake(): # body, reset, update return _instr(0), _instr(1), _instr(2) - with tvm.build_config(offset_factor=1, partition_const_loop=True): - return tvm.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer}) + with tvm.target.build_config(offset_factor=1, partition_const_loop=True): + return te.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer}) def dot_16x1x16_uint8_int8_int16(): @@ -146,41 +147,41 @@ def dot_16x1x16_uint8_int8_int16(): int16_lanes = 4*32 # 4*32 int32 lanes in 4 AVX512 vector registers num_int8_elements = 2 # 2 int8 elements in int16 - data = tvm.placeholder((num_int8_elements,), dtype='uint8', name='data') - kernel = tvm.placeholder((int16_lanes, num_int8_elements), dtype='int8', name='kernel') - k = tvm.reduce_axis((0, num_int8_elements), name='k') - C = tvm.compute((int16_lanes, ), - lambda i: tvm.sum(data[k].astype('int16') * - kernel[i, k].astype('int16'), - axis=k), - name="C") - - a_buffer = tvm.decl_buffer(data.shape, dtype='uint8', name="a_buffer", - offset_factor=1, - strides=[1]) - b_buffer = tvm.decl_buffer(kernel.shape, dtype='int8', name="b_buffer", - offset_factor=1) - # strides=[tvm.var('ldw'), 1, 1]) + data = te.placeholder((num_int8_elements,), dtype='uint8', name='data') + kernel = te.placeholder((int16_lanes, num_int8_elements), dtype='int8', name='kernel') + k = te.reduce_axis((0, num_int8_elements), name='k') + C = te.compute((int16_lanes, ), + lambda i: te.sum(data[k].astype('int16') * + kernel[i, k].astype('int16'), + axis=k), + name="C") + + a_buffer = tvm.tir.decl_buffer(data.shape, dtype='uint8', name="a_buffer", + offset_factor=1, + strides=[1]) + b_buffer = tvm.tir.decl_buffer(kernel.shape, dtype='int8', name="b_buffer", + offset_factor=1) + # strides=[te.var('ldw'), 1, 1]) def _intrin_func(ins, outs): def _instr(index): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() if index == 1: for i in range(4): - ib.emit(outs[0].vstore([i*32], tvm.const(0, 'int16x32'))) + ib.emit(outs[0].vstore([i*32], tvm.tir.const(0, 'int16x32'))) return ib.get() a_int8 = ins[0].vload([0], "uint8x2") - re_int16 = tvm.call_pure_intrin('int16', 'reinterpret', a_int8) + re_int16 = tvm.tir.call_pure_intrin('int16', 'reinterpret', a_int8) vec_ai16 = re_int16.astype('int16x32') - vec_a = tvm.call_pure_intrin('int8x64', 'reinterpret', vec_ai16) + vec_a = tvm.tir.call_pure_intrin('int8x64', 'reinterpret', vec_ai16) for i in range(4): vec_b = ins[1].vload([i*32, 0], "int8x64") - pair_reduction = tvm.call_llvm_intrin('int16x32', - 'llvm.x86.avx512.pmaddubs.w.512', - tvm.const(0, 'uint32'), - vec_a, vec_b) + pair_reduction = tvm.tir.call_llvm_intrin('int16x32', + 'llvm.x86.avx512.pmaddubs.w.512', + tvm.tir.const(0, 'uint32'), + vec_a, vec_b) if index == 0: ib.emit(outs[0].vstore([i*32], pair_reduction)) else: @@ -191,8 +192,8 @@ def dot_16x1x16_uint8_int8_int16(): # body, reset, update return _instr(0), _instr(1), _instr(2) - with tvm.build_config(offset_factor=1, partition_const_loop=True): - return tvm.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer}) + with tvm.target.build_config(offset_factor=1, partition_const_loop=True): + return te.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer}) def dot_16x1x16_uint8_int8_int32_cascadelake(): @@ -226,31 +227,31 @@ def dot_16x1x16_uint8_int8_int32_cascadelake(): int32_lanes = 16 # 16 int32 lanes in AVX512 num_int8_elements = 4 # 4 int8 elements in int32 - data = tvm.placeholder((num_int8_elements,), dtype='uint8', name='data') - kernel = tvm.placeholder((int32_lanes, num_int8_elements), dtype='int8', name='kernel') - k = tvm.reduce_axis((0, num_int8_elements), name='k') - C = tvm.compute((int32_lanes,), - lambda i: tvm.sum(data[k].astype('int32') * - kernel[i, k].astype('int32'), - axis=k), - name="C") - - a_buffer = tvm.decl_buffer(data.shape, dtype='uint8', name="a_buffer", - offset_factor=1, - strides=[1]) - b_buffer = tvm.decl_buffer(kernel.shape, dtype='int8', name="b_buffer", - offset_factor=1, - strides=[tvm.var('ldw'), 1]) + data = te.placeholder((num_int8_elements,), dtype='uint8', name='data') + kernel = te.placeholder((int32_lanes, num_int8_elements), dtype='int8', name='kernel') + k = te.reduce_axis((0, num_int8_elements), name='k') + C = te.compute((int32_lanes,), + lambda i: te.sum(data[k].astype('int32') * + kernel[i, k].astype('int32'), + axis=k), + name="C") + + a_buffer = tvm.tir.decl_buffer(data.shape, dtype='uint8', name="a_buffer", + offset_factor=1, + strides=[1]) + b_buffer = tvm.tir.decl_buffer(kernel.shape, dtype='int8', name="b_buffer", + offset_factor=1, + strides=[te.var('ldw'), 1]) def _intrin_func(ins, outs): def _instr(index): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() if index == 1: - ib.emit(outs[0].vstore(0, tvm.const(0, 'int32x16'))) + ib.emit(outs[0].vstore(0, tvm.tir.const(0, 'int32x16'))) return ib.get() a_int8 = ins[0].vload([0], "uint8x4") - re_int32 = tvm.call_pure_intrin('int32', 'reinterpret', a_int8) + re_int32 = tvm.tir.call_pure_intrin('int32', 'reinterpret', a_int8) vec_ai32 = re_int32.astype('int32x16') vec_b = ins[1].vload([0, 0], "int8x64") @@ -258,24 +259,24 @@ def dot_16x1x16_uint8_int8_int32_cascadelake(): llvm_id = tvm.target.codegen.llvm_lookup_intrinsic_id(vnni_inst_name) if llvm_id != 0: # VNNI is available for current LLVM version - vec_bi32 = tvm.call_pure_intrin('int32x16', 'reinterpret', vec_b) - vec_zero = tvm.const(0, "int32x16") - quad_reduction = tvm.call_llvm_intrin('int32x16', - 'llvm.x86.avx512.vpdpbusd.512', - tvm.const(0, 'uint32'), - vec_zero, - vec_ai32, vec_bi32) + vec_bi32 = tvm.tir.call_pure_intrin('int32x16', 'reinterpret', vec_b) + vec_zero = tvm.tir.const(0, "int32x16") + quad_reduction = tvm.tir.call_llvm_intrin('int32x16', + 'llvm.x86.avx512.vpdpbusd.512', + tvm.tir.const(0, 'uint32'), + vec_zero, + vec_ai32, vec_bi32) else: # Fall back to the normal AVX512 - vec_a = tvm.call_pure_intrin('int8x64', 'reinterpret', vec_ai32) - vec_one = tvm.const(1, "int16x32") - pair_reduction = tvm.call_llvm_intrin('int16x32', - 'llvm.x86.avx512.pmaddubs.w.512', - tvm.const(0, 'uint32'), - vec_a, vec_b) - quad_reduction = tvm.call_llvm_intrin('int32x16', - 'llvm.x86.avx512.pmaddw.d.512', - tvm.const(0, 'uint32'), - pair_reduction, vec_one) + vec_a = tvm.tir.call_pure_intrin('int8x64', 'reinterpret', vec_ai32) + vec_one = tvm.tir.const(1, "int16x32") + pair_reduction = tvm.tir.call_llvm_intrin('int16x32', + 'llvm.x86.avx512.pmaddubs.w.512', + tvm.tir.const(0, 'uint32'), + vec_a, vec_b) + quad_reduction = tvm.tir.call_llvm_intrin('int32x16', + 'llvm.x86.avx512.pmaddw.d.512', + tvm.tir.const(0, 'uint32'), + pair_reduction, vec_one) if index == 0: ib.emit(outs[0].vstore(0, quad_reduction)) @@ -286,5 +287,5 @@ def dot_16x1x16_uint8_int8_int32_cascadelake(): # body, reset, update return _instr(0), _instr(1), _instr(2) - with tvm.build_config(offset_factor=1, partition_const_loop=True): - return tvm.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer}) + with tvm.target.build_config(offset_factor=1, partition_const_loop=True): + return te.decl_tensor_intrin(C.op, _intrin_func, binds={data:a_buffer, kernel:b_buffer}) diff --git a/topi/python/topi/x86/util.py b/topi/python/topi/x86/util.py index 04931f5..f2a35d2 100644 --- a/topi/python/topi/x86/util.py +++ b/topi/python/topi/x86/util.py @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. """Common x86 related utilities""" -from __future__ import absolute_import as _abs import tvm + def get_fp32_len(): mcpu = tvm.target.Target.current().mcpu fp32_vec_len = 8 diff --git a/topi/recipe/broadcast/test_broadcast_map.py b/topi/recipe/broadcast/test_broadcast_map.py index 4f8a4de..2f2bb9e 100644 --- a/topi/recipe/broadcast/test_broadcast_map.py +++ b/topi/recipe/broadcast/test_broadcast_map.py @@ -16,6 +16,7 @@ # under the License. import os import tvm +from tvm import te from tvm.contrib import nvcc import numpy as np @@ -52,7 +53,7 @@ def test_broadcast_to(in_shape, out_shape): TASK = "bcast_to_i" + "_".join([str(ele) for ele in in_shape])\ + "o" + "_".join([str(ele) for ele in out_shape]) # Build the logic and compile the function - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") B = topi.broadcast_to(A, out_shape) s = topi.cuda.schedule_broadcast(B) fcuda = tvm.build(s, [A, B], "cuda", name="broadcast_to") @@ -72,8 +73,8 @@ def test_broadcast_binary_op(lhs_shape, rhs_shape, typ="add"): TASK = "bcast_binary_" + typ + "_lhs" +\ "_".join([str(ele) for ele in lhs_shape]) +\ "rhs" + "_".join([str(ele) for ele in rhs_shape]) - A = tvm.placeholder(shape=lhs_shape, name="A") - B = tvm.placeholder(shape=rhs_shape, name="B") + A = te.placeholder(shape=lhs_shape, name="A") + B = te.placeholder(shape=rhs_shape, name="B") if typ == "add": C = topi.broadcast_add(A, B) elif typ == "sub": diff --git a/topi/recipe/conv/depthwise_conv2d_test.py b/topi/recipe/conv/depthwise_conv2d_test.py index 90c6103..a2b5273 100644 --- a/topi/recipe/conv/depthwise_conv2d_test.py +++ b/topi/recipe/conv/depthwise_conv2d_test.py @@ -16,6 +16,7 @@ # under the License. import os import tvm +from tvm import te import numpy as np from scipy import signal from tvm.contrib import nvcc @@ -63,11 +64,11 @@ def test_depthwise_conv2d_nchw(): padding = 'SAME' # or 'VALID' # Placeholder - Input = tvm.placeholder((batch, in_channel, in_height, in_width), name='Input') - Filter = tvm.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter') + Input = te.placeholder((batch, in_channel, in_height, in_width), name='Input') + Filter = te.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter') Stride = [stride_h, stride_w] - Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale') - Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift') + Scale = te.placeholder((in_channel * channel_multiplier,), name='Scale') + Shift = te.placeholder((in_channel * channel_multiplier,), name='Shift') # Declare DepthwiseConv2d = topi.nn.depthwise_conv2d_nchw(Input, Filter, Stride, padding) ScaleShift = topi.nn.scale_shift_nchw(DepthwiseConv2d, Scale, Shift) @@ -128,7 +129,7 @@ def test_depthwise_conv2d_nchw(): print("success") for device in ['cuda', 'opencl', 'rocm']: - with tvm.build_config(auto_unroll_max_step=128, + with tvm.target.build_config(auto_unroll_max_step=128, unroll_explicit=device == 'rocm', detect_global_barrier=False, restricted_func=True): @@ -152,11 +153,11 @@ def test_depthwise_conv2d_nhwc(): padding = 'SAME' # or 'VALID' # Placeholder - Input = tvm.placeholder((batch, in_height, in_width, in_channel), name='Input') - Filter = tvm.placeholder((filter_height, filter_width,filter_channel, channel_multiplier), name='Filter') + Input = te.placeholder((batch, in_height, in_width, in_channel), name='Input') + Filter = te.placeholder((filter_height, filter_width,filter_channel, channel_multiplier), name='Filter') Stride = [stride_h, stride_w] - Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale') - Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift') + Scale = te.placeholder((in_channel * channel_multiplier,), name='Scale') + Shift = te.placeholder((in_channel * channel_multiplier,), name='Shift') # Declare DepthwiseConv2d = topi.nn.depthwise_conv2d_nhwc(Input, Filter, Stride, padding) ScaleShift = topi.nn.scale_shift_nhwc(DepthwiseConv2d, Scale, Shift) @@ -217,7 +218,7 @@ def test_depthwise_conv2d_nhwc(): print("success") for device in ['cuda', 'opencl', 'rocm']: - with tvm.build_config(auto_unroll_max_step=128, + with tvm.target.build_config(auto_unroll_max_step=128, detect_global_barrier=False, restricted_func=True): check_device(device) diff --git a/topi/recipe/conv/test_conv2d_hwcn_map.py b/topi/recipe/conv/test_conv2d_hwcn_map.py index 3f7deca..69bda79 100644 --- a/topi/recipe/conv/test_conv2d_hwcn_map.py +++ b/topi/recipe/conv/test_conv2d_hwcn_map.py @@ -19,6 +19,7 @@ import os import numpy as np import scipy.signal import tvm +from tvm import te from tvm.contrib import nvcc import topi from topi.util import get_const_tuple @@ -55,8 +56,8 @@ def test_conv2d_hwcn_map(): stride = 2 padding = 'SAME' - A = tvm.placeholder((in_height, in_width, in_channel, batch), name='A') - W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W') + A = te.placeholder((in_height, in_width, in_channel, batch), name='A') + W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W') B = topi.nn.conv2d_hwcn(A, W, stride, padding) C = topi.nn.relu(B) s1 = topi.cuda.schedule_conv2d_hwcn([B]) @@ -76,7 +77,7 @@ def test_conv2d_hwcn_map(): w = tvm.nd.array(w_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx) - with tvm.build_config(auto_unroll_max_step=128, + with tvm.target.build_config(auto_unroll_max_step=128, unroll_explicit=device == 'rocm'): func1 = tvm.build(s1, [A, W, B], device) func1(a, w, b) diff --git a/topi/recipe/conv/test_conv_int8_arm.py b/topi/recipe/conv/test_conv_int8_arm.py index ff0d37d..336e2f2 100644 --- a/topi/recipe/conv/test_conv_int8_arm.py +++ b/topi/recipe/conv/test_conv_int8_arm.py @@ -20,6 +20,7 @@ import sys import logging import numpy as np import tvm +from tvm import te import topi logging.basicConfig(stream=sys.stdout, level=logging.INFO) @@ -92,8 +93,8 @@ def run_inference(data_dtype, kernel_dtype, out_dtype, im_height, im_width, in_f hstride, wstride, out_dtype) # Create TVM placeholders - data = tvm.placeholder(data_shape, name='data', dtype=data_dtype) - kernel = tvm.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype) + data = te.placeholder(data_shape, name='data', dtype=data_dtype) + kernel = te.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype) # Create the numpy arrays to be used for executing conv models if data_dtype == 'float32': @@ -119,7 +120,7 @@ def run_inference(data_dtype, kernel_dtype, out_dtype, im_height, im_width, in_f padding=hpad, dilation=(1, 1), layout='NCHWc', out_layout='NCHWc', out_dtype=out_dtype) out = topi.nn.relu(conv) - sch = tvm.create_schedule(out.op) + sch = te.create_schedule(out.op) func = tvm.build(sch, [data, kernel, out], target=TARGET_NAME, name='out') func(data_array, kernel_array, c_orig) LOGGER.debug(tvm.lower(sch, [data, kernel], simple_mode=True)) diff --git a/topi/recipe/conv/test_conv_int8_intel.py b/topi/recipe/conv/test_conv_int8_intel.py index f39f4cd..767262d 100644 --- a/topi/recipe/conv/test_conv_int8_intel.py +++ b/topi/recipe/conv/test_conv_int8_intel.py @@ -20,6 +20,7 @@ import sys import logging import numpy as np import tvm +from tvm import te import topi logging.basicConfig(stream=sys.stdout, level=logging.INFO) @@ -93,8 +94,8 @@ def run_inference(data_dtype, kernel_dtype, out_dtype, im_height, im_width, in_f hstride, wstride, out_dtype) # Create TVM placeholders - data = tvm.placeholder(data_shape, name='data', dtype=data_dtype) - kernel = tvm.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype) + data = te.placeholder(data_shape, name='data', dtype=data_dtype) + kernel = te.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype) # Create the numpy arrays to be used for executing conv models if data_dtype == 'float32': @@ -115,7 +116,7 @@ def run_inference(data_dtype, kernel_dtype, out_dtype, im_height, im_width, in_f padding=hpad, dilation=(1, 1), layout='NCHWc', out_layout='NCHWc', out_dtype=out_dtype) out = topi.nn.relu(conv) - sch = tvm.create_schedule(out.op) + sch = te.create_schedule(out.op) func = tvm.build(sch, [data, kernel, out], target=TARGET_NAME, name='out') func(data_array, kernel_array, c_orig) LOGGER.debug(tvm.lower(sch, [data, kernel], simple_mode=True)) diff --git a/topi/recipe/gemm/android_gemm_square.py b/topi/recipe/gemm/android_gemm_square.py index 46129cb..7692f9c 100644 --- a/topi/recipe/gemm/android_gemm_square.py +++ b/topi/recipe/gemm/android_gemm_square.py @@ -16,6 +16,7 @@ # under the License. """Example code to do square matrix multiplication on Android Phone.""" import tvm +from tvm import te import os from tvm import rpc from tvm.contrib import util, ndk @@ -52,28 +53,28 @@ def test_gemm_gpu(N, times, bn, num_block, num_thread): assert(bn <= N) assert(num_thread * num_thread * 16 <= N) assert(num_block * num_block * 2 <= N) - A = tvm.placeholder((N, N), name='A') - B = tvm.placeholder((N, N), name='Btmp') - k = tvm.reduce_axis((0, N), name='k') + A = te.placeholder((N, N), name='A') + B = te.placeholder((N, N), name='Btmp') + k = te.reduce_axis((0, N), name='k') - packedB = tvm.compute((N, N / bn, bn), + packedB = te.compute((N, N / bn, bn), lambda x, y, z: B[x, y * bn + z], name = 'B') - C = tvm.compute( + C = te.compute( (N, N), - lambda ii, jj: tvm.sum(A[ii, k] * packedB[k, jj / bn, jj % bn], axis=k), + lambda ii, jj: te.sum(A[ii, k] * packedB[k, jj / bn, jj % bn], axis=k), name='C') - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) CC = s.cache_write(C, "local") - block_x = tvm.thread_axis("blockIdx.x") - block_y = tvm.thread_axis("blockIdx.y") - thread_x = tvm.thread_axis("threadIdx.x") - thread_y = tvm.thread_axis("threadIdx.y") + block_x = te.thread_axis("blockIdx.x") + block_y = te.thread_axis("blockIdx.y") + thread_x = te.thread_axis("threadIdx.x") + thread_y = te.thread_axis("threadIdx.y") - thread_xz = tvm.thread_axis((0, 2), "vthread", name="vx") - thread_yz = tvm.thread_axis((0, 2), "vthread", name="vy") + thread_xz = te.thread_axis((0, 2), "vthread", name="vx") + thread_yz = te.thread_axis((0, 2), "vthread", name="vy") pby, pbi = s[packedB].split(packedB.op.axis[0], nparts=num_thread) pbx, pbj = s[packedB].split(packedB.op.axis[1], nparts=num_thread) diff --git a/topi/recipe/gemm/cuda_gemm_square.py b/topi/recipe/gemm/cuda_gemm_square.py index 899379e..196bf72 100644 --- a/topi/recipe/gemm/cuda_gemm_square.py +++ b/topi/recipe/gemm/cuda_gemm_square.py @@ -16,6 +16,7 @@ # under the License. """Example code to do square matrix multiplication.""" import tvm +from tvm import te import os from tvm.contrib import nvcc from tvm.contrib import spirv @@ -46,19 +47,19 @@ def tvm_callback_cuda_postproc(code): def test_gemm(): # graph nn = 2048 - n = tvm.var('n') - n = tvm.convert(nn) + n = te.var('n') + n = tvm.runtime.convert(nn) m, l = n, n - A = tvm.placeholder((l, n), name='A') - B = tvm.placeholder((l, m), name='B') - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute( + A = te.placeholder((l, n), name='A') + B = te.placeholder((l, m), name='B') + k = te.reduce_axis((0, l), name='k') + C = te.compute( (m, n), - lambda ii, jj: tvm.sum(A[k, jj] * B[k, ii], axis=k), + lambda ii, jj: te.sum(A[k, jj] * B[k, ii], axis=k), name='C') # schedule - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) AA = s.cache_read(A, "shared", [C]) BB = s.cache_read(B, "shared", [C]) AL = s.cache_read(AA, "local", [C]) @@ -68,12 +69,12 @@ def test_gemm(): scale = 8 num_thread = 8 block_factor = scale * num_thread - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") - block_y = tvm.thread_axis("blockIdx.y") - thread_y = tvm.thread_axis((0, num_thread), "threadIdx.y") - thread_xz = tvm.thread_axis((0, 2), "vthread", name="vx") - thread_yz = tvm.thread_axis((0, 2), "vthread", name="vy") + block_x = te.thread_axis("blockIdx.x") + thread_x = te.thread_axis((0, num_thread), "threadIdx.x") + block_y = te.thread_axis("blockIdx.y") + thread_y = te.thread_axis((0, num_thread), "threadIdx.y") + thread_xz = te.thread_axis((0, 2), "vthread", name="vx") + thread_yz = te.thread_axis((0, 2), "vthread", name="vy") by, yi = s[C].split(C.op.axis[0], factor=block_factor) bx, xi = s[C].split(C.op.axis[1], factor=block_factor) @@ -145,7 +146,7 @@ def test_gemm(): print("average time cost of %d runs = %g ms, %g GFLOPS." % (num_runs, t * 1e3, GFLOPS)) for device in ["cuda", "opencl", "rocm", "nvptx", "vulkan"]: - with tvm.build_config(auto_unroll_max_step=128, + with tvm.target.build_config(auto_unroll_max_step=128, unroll_explicit=(device != "cuda")): check_device(device) diff --git a/topi/recipe/gemm/gemm_int8.py b/topi/recipe/gemm/gemm_int8.py index cf36214..9d668eb 100644 --- a/topi/recipe/gemm/gemm_int8.py +++ b/topi/recipe/gemm/gemm_int8.py @@ -19,6 +19,7 @@ import logging import sys import numpy as np import tvm +from tvm import te from tvm import autotvm from topi.cuda.tensor_intrin import dp4a @@ -29,15 +30,15 @@ intrin_dp4a = dp4a('local', 'local', 'local') @autotvm.template def gemm_int8(n, m, l): - A = tvm.placeholder((n, l), name='A', dtype='int8') - B = tvm.placeholder((m, l), name='B', dtype='int8') + A = te.placeholder((n, l), name='A', dtype='int8') + B = te.placeholder((m, l), name='B', dtype='int8') - k = tvm.reduce_axis((0, l), name='k') - C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k].astype('int32') * B[j, k].astype( + k = te.reduce_axis((0, l), name='k') + C = te.compute((n, m), lambda i, j: te.sum(A[i, k].astype('int32') * B[j, k].astype( 'int32'), axis=k), name='C') cfg = autotvm.get_config() - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) y, x = C.op.axis AA = s.cache_read(A, 'shared', [C]) @@ -56,10 +57,10 @@ def gemm_int8(n, m, l): s[CC].tensorize(ki, intrin_dp4a) - block_x = tvm.thread_axis('blockIdx.x') - block_y = tvm.thread_axis('blockIdx.y') - thread_x = tvm.thread_axis('threadIdx.x') - thread_y = tvm.thread_axis('threadIdx.y') + block_x = te.thread_axis('blockIdx.x') + block_y = te.thread_axis('blockIdx.y') + thread_x = te.thread_axis('threadIdx.x') + thread_y = te.thread_axis('threadIdx.y') def block_size_filter(entity): return entity.size[0] * 2 >= entity.size[1] * 2 and \ @@ -71,8 +72,8 @@ def gemm_int8(n, m, l): s[C].bind(by, block_y) s[C].bind(bx, block_x) - s[C].bind(tyz, tvm.thread_axis('vthread')) - s[C].bind(txz, tvm.thread_axis('vthread')) + s[C].bind(tyz, te.thread_axis('vthread')) + s[C].bind(txz, te.thread_axis('vthread')) s[C].bind(ty, thread_y) s[C].bind(tx, thread_x) s[C].reorder(by, bx, tyz, txz, ty, tx, yi, xi) diff --git a/topi/recipe/reduce/test_reduce_map.py b/topi/recipe/reduce/test_reduce_map.py index 1adc413..31f9bae 100644 --- a/topi/recipe/reduce/test_reduce_map.py +++ b/topi/recipe/reduce/test_reduce_map.py @@ -16,6 +16,7 @@ # under the License. import os import tvm +from tvm import te from tvm.contrib import nvcc import numpy as np @@ -50,7 +51,7 @@ def tvm_callback_cuda_postproc(code): def test_reduce_map(in_shape, axis, keepdims, type="sum", test_id=0): global TASK # Build the logic and compile the function - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") if type == "sum": TASK = "sum_map_id%d" %test_id B = topi.sum(A, axis=axis, keepdims=keepdims) @@ -63,7 +64,7 @@ def test_reduce_map(in_shape, axis, keepdims, type="sum", test_id=0): else: raise NotImplementedError s = topi.cuda.schedule_reduce(B) - with tvm.build_config(auto_unroll_max_step=16, + with tvm.target.build_config(auto_unroll_max_step=16, auto_unroll_min_depth=0): fcuda = tvm.build(s, [A, B], "cuda", name="sum") diff --git a/topi/recipe/rnn/lstm.py b/topi/recipe/rnn/lstm.py index 0d7635d..4076eb6 100644 --- a/topi/recipe/rnn/lstm.py +++ b/topi/recipe/rnn/lstm.py @@ -16,6 +16,7 @@ # under the License. """LSTM Example, still work in progress..""" import tvm +from tvm import te import os from tvm.contrib import nvcc import numpy as np @@ -58,52 +59,52 @@ def lstm(): num_thread_x = 16 * 3 // 2 num_sm = 24 n_num_step = 128 - num_step = tvm.var('num_step') + num_step = te.var('num_step') num_hidden = 1152 // 2 batch_size = 1 # Global transition matrix # Input hidden channel can be pre-caculated by a gemm - Xi2h = tvm.placeholder((num_step, batch_size, 4, num_hidden), name="Xi2h") + Xi2h = te.placeholder((num_step, batch_size, 4, num_hidden), name="Xi2h") # Only handle hidden transition, saves space. - Wh2h = tvm.placeholder((4, num_hidden, num_hidden), name="Wh2h") + Wh2h = te.placeholder((4, num_hidden, num_hidden), name="Wh2h") # h: output hidden state, c: cell state. - s_state_h = tvm.placeholder((num_step, batch_size, num_hidden)) - s_state_c = tvm.placeholder((num_step, batch_size, num_hidden)) - s_init_c = tvm.compute((1, batch_size, num_hidden), + s_state_h = te.placeholder((num_step, batch_size, num_hidden)) + s_state_c = te.placeholder((num_step, batch_size, num_hidden)) + s_init_c = te.compute((1, batch_size, num_hidden), lambda *i: 0.0, name="init_c") - s_init_h = tvm.compute((1, batch_size, num_hidden), + s_init_h = te.compute((1, batch_size, num_hidden), lambda *i: 0.0, name="init_h") # LSTM transition - k = tvm.reduce_axis((0, num_hidden), name="ki2h") - s_h2h = tvm.compute( + k = te.reduce_axis((0, num_hidden), name="ki2h") + s_h2h = te.compute( (num_step, batch_size, 4, num_hidden), - lambda t, i, x, j: tvm.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k), + lambda t, i, x, j: te.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k), name="s_h2h") # Gate rules - gates = tvm.compute(Xi2h.shape, lambda *i: + gates = te.compute(Xi2h.shape, lambda *i: Xi2h(*i) + s_h2h(*i), name="gates") gshape = (num_step, batch_size, num_hidden) - in_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, i, 0, j]), name="in_gate") - in_transform = tvm.compute(gshape, lambda t, i, j: tvm.tanh(gates[t, i, 1, j]), name="in_transform") - forget_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, i, 2, j]), name="forget_gate") - out_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, i, 3, j]), name="out_gate") - next_c = tvm.compute(gshape, + in_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, i, 0, j]), name="in_gate") + in_transform = te.compute(gshape, lambda t, i, j: te.tanh(gates[t, i, 1, j]), name="in_transform") + forget_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, i, 2, j]), name="forget_gate") + out_gate = te.compute(gshape, lambda t, i, j: te.sigmoid(gates[t, i, 3, j]), name="out_gate") + next_c = te.compute(gshape, lambda t, i, j: forget_gate[t, i, j] * s_state_c[t - 1, i, j] + in_gate[t, i, j] * in_transform[t, i, j], name="next_c") - next_h = tvm.compute(gshape, - lambda t, i, j: out_gate[t, i, j] * tvm.tanh(next_c[t, i, j]), name="next_h") - update_c = tvm.compute(gshape, lambda *i: next_c(*i), name="update_c") - update_h = tvm.compute(gshape, lambda *i: next_h(*i), name="update_h") + next_h = te.compute(gshape, + lambda t, i, j: out_gate[t, i, j] * te.tanh(next_c[t, i, j]), name="next_h") + update_c = te.compute(gshape, lambda *i: next_c(*i), name="update_c") + update_h = te.compute(gshape, lambda *i: next_h(*i), name="update_h") # schedule - scan_h, scan_c = tvm.scan( + scan_h, scan_c = tvm.te.scan( [s_init_h, s_init_c], [update_h, update_c], [s_state_h, s_state_c], inputs=[Xi2h], name="lstm_scan") # schedule - s = tvm.create_schedule(scan_h.op) + s = te.create_schedule(scan_h.op) # Inline gate computations s[gates].compute_inline() s[in_gate].compute_inline() @@ -111,9 +112,9 @@ def lstm(): s[forget_gate].compute_inline() s[out_gate].compute_inline() - block_x = tvm.thread_axis((0, num_sm), "blockIdx.x") - thread_x = tvm.thread_axis((0, num_thread_x), "threadIdx.x") - thread_y = tvm.thread_axis((0, num_thread_y), "threadIdx.y") + block_x = te.thread_axis((0, num_sm), "blockIdx.x") + thread_x = te.thread_axis((0, num_thread_x), "threadIdx.x") + thread_y = te.thread_axis((0, num_thread_y), "threadIdx.y") s_state_h_S = s.cache_read(s_state_h, "shared", [s_h2h]) s_state_c_S = s.cache_read(s_state_c, "shared", [next_c]) @@ -187,7 +188,7 @@ def lstm(): print("Time cost=%g" % eval_result.mean) # set unroll_explicit for more readable code. - with tvm.build_config( + with tvm.target.build_config( detect_global_barrier=DETECT_GLOBAL_BARRIER, auto_unroll_max_step=128, unroll_explicit=False): diff --git a/topi/recipe/rnn/matexp.py b/topi/recipe/rnn/matexp.py index 7466008..9991895 100644 --- a/topi/recipe/rnn/matexp.py +++ b/topi/recipe/rnn/matexp.py @@ -24,6 +24,7 @@ X[t] = dot(X[t-1], W) ``` """ import tvm +from tvm import te import time import os import argparse @@ -62,25 +63,25 @@ def rnn_matexp(): n_batch_size = 4 detect_global_barrier = DETECT_GLOBAL_BARRIER - num_step = tvm.var("num_step") - num_hidden = tvm.convert(n_num_hidden) - batch_size = tvm.convert(n_batch_size) + num_step = te.var("num_step") + num_hidden = tvm.runtime.convert(n_num_hidden) + batch_size = tvm.runtime.convert(n_batch_size) num_thread_y = 8 num_thread_x = 16 * 3 num_sm = 24 - Whh = tvm.placeholder((num_hidden, num_hidden), name="Whh") - s_init = tvm.compute((1, batch_size, num_hidden), + Whh = te.placeholder((num_hidden, num_hidden), name="Whh") + s_init = te.compute((1, batch_size, num_hidden), lambda _, i, j: 1.0, name="init") - s_state = tvm.placeholder((num_step, batch_size, num_hidden)) - kh = tvm.reduce_axis((0, num_hidden), name="kh") - s_update = tvm.compute( + s_state = te.placeholder((num_step, batch_size, num_hidden)) + kh = te.reduce_axis((0, num_hidden), name="kh") + s_update = te.compute( (num_step, batch_size, num_hidden), - lambda t, i, j: tvm.sum(s_state[t-1, i, kh] * Whh[kh, j], axis=kh), + lambda t, i, j: te.sum(s_state[t-1, i, kh] * Whh[kh, j], axis=kh), name="update") - s_scan = tvm.scan(s_init, s_update, s_state) + s_scan = tvm.te.scan(s_init, s_update, s_state) # schedule - s = tvm.create_schedule(s_scan.op) + s = te.create_schedule(s_scan.op) CL = s_update SS = s.cache_read(s_state, "shared", [CL]) SL = s.cache_read(SS, "local", [CL]) @@ -88,9 +89,9 @@ def rnn_matexp(): ko, ki = s[CL].split(s[CL].op.reduce_axis[0], nparts=num_thread_y) CLF = s.rfactor(CL, ko) - block_x = tvm.thread_axis((0, num_sm), "blockIdx.x") - thread_x = tvm.thread_axis((0, num_thread_x), "threadIdx.x") - thread_y = tvm.thread_axis((0, num_thread_y), "threadIdx.y") + block_x = te.thread_axis((0, num_sm), "blockIdx.x") + thread_x = te.thread_axis((0, num_thread_x), "threadIdx.x") + thread_y = te.thread_axis((0, num_thread_y), "threadIdx.y") if PERSIST_KERNEL: s[s_scan.op].env_threads([block_x, thread_y, thread_x]) @@ -126,7 +127,7 @@ def rnn_matexp(): s[SS].bind(tx, thread_x) def check_device(target): - with tvm.build_config( + with tvm.target.build_config( detect_global_barrier=detect_global_barrier, auto_unroll_max_step=128, unroll_explicit=False): diff --git a/topi/tests/python/common.py b/topi/tests/python/common.py index e03708c..eeaf632 100644 --- a/topi/tests/python/common.py +++ b/topi/tests/python/common.py @@ -17,6 +17,7 @@ """Common utility for topi test""" import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import FallbackConfigEntity import topi diff --git a/topi/tests/python/test_fifo_buffer.py b/topi/tests/python/test_fifo_buffer.py index 34c389a..676c1f9 100644 --- a/topi/tests/python/test_fifo_buffer.py +++ b/topi/tests/python/test_fifo_buffer.py @@ -17,6 +17,7 @@ """Test code for FIFO buffer""" import tvm +from tvm import te import topi import topi.testing import numpy as np @@ -25,8 +26,8 @@ from tvm.contrib.pickle_memoize import memoize from common import get_all_backend def verify_fifo_buffer(buffer_shape, data_shape, axis, dtype='float32'): - buffer = tvm.placeholder(buffer_shape, name='buffer', dtype=dtype) - data = tvm.placeholder(data_shape, name='data', dtype=dtype) + buffer = te.placeholder(buffer_shape, name='buffer', dtype=dtype) + data = te.placeholder(data_shape, name='data', dtype=dtype) # Use memoize, pickle the test data for next time use @memoize('topi.tests.test_fifo_buffer') @@ -98,12 +99,12 @@ def verify_conv1d_integration(): dtype = 'float32' - inc_input = tvm.placeholder(inc_input_shape, name='inc_input', dtype=dtype) - input_window = tvm.placeholder(input_window_shape, name='input_window', dtype=dtype) - context = tvm.placeholder(context_shape, name='context', dtype=dtype) - kernel = tvm.placeholder(kernel_shape, name='kernel', dtype=dtype) - inc_output = tvm.placeholder(inc_input_shape, name='inc_output', dtype=dtype) - output_window = tvm.placeholder(output_window_shape, name='output_window', dtype=dtype) + inc_input = te.placeholder(inc_input_shape, name='inc_input', dtype=dtype) + input_window = te.placeholder(input_window_shape, name='input_window', dtype=dtype) + context = te.placeholder(context_shape, name='context', dtype=dtype) + kernel = te.placeholder(kernel_shape, name='kernel', dtype=dtype) + inc_output = te.placeholder(inc_input_shape, name='inc_output', dtype=dtype) + output_window = te.placeholder(output_window_shape, name='output_window', dtype=dtype) # Use memoize, pickle the test data for next time use @memoize('topi.tests.test_fifo_buffer_conv1d_integration') diff --git a/topi/tests/python/test_topi_basic.py b/topi/tests/python/test_topi_basic.py index 53b29df..83f0469 100644 --- a/topi/tests/python/test_topi_basic.py +++ b/topi/tests/python/test_topi_basic.py @@ -15,20 +15,21 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import topi from topi import util def test_util(): - x = tvm.const(100, "int32") + x = tvm.tir.const(100, "int32") assert util.get_const_int(x) == 100 assert util.get_const_tuple((x, x)) == (100, 100) def test_ewise(): - m = tvm.var('m') - l = tvm.var('l') - A = tvm.placeholder((m, l), name='A') + m = te.var('m') + l = te.var('l') + A = te.placeholder((m, l), name='A') def test_apply(func, name): B = func(A) diff --git a/topi/tests/python/test_topi_batch_matmul.py b/topi/tests/python/test_topi_batch_matmul.py index 1b38e90..b8c8547 100644 --- a/topi/tests/python/test_topi_batch_matmul.py +++ b/topi/tests/python/test_topi_batch_matmul.py @@ -17,6 +17,7 @@ """Test code for batch_matmul operator""" import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -31,8 +32,8 @@ _batch_matmul_implement = { } def verify_batch_matmul(batch, M, N, K): - x = tvm.placeholder((batch, M, K), name='x') - y = tvm.placeholder((batch, N, K), name='y') + x = te.placeholder((batch, M, K), name='x') + y = te.placeholder((batch, N, K), name='y') dtype = x.dtype # use memoize to pickle the test data for next time use diff --git a/topi/tests/python/test_topi_bitserial_conv2d.py b/topi/tests/python/test_topi_bitserial_conv2d.py index 274743d..44811d1 100644 --- a/topi/tests/python/test_topi_bitserial_conv2d.py +++ b/topi/tests/python/test_topi_bitserial_conv2d.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -33,8 +34,8 @@ def verify_bitserial_conv2d_nchw(batch, in_size, in_channel, num_filter, kernel, out_dtype = 'int32' with tvm.target.create('llvm'): - A = tvm.placeholder((batch, in_channel, in_height, in_width), dtype=input_dtype, name='A') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), dtype=input_dtype, name='W') + A = te.placeholder((batch, in_channel, in_height, in_width), dtype=input_dtype, name='A') + W = te.placeholder((num_filter, in_channel, kernel, kernel), dtype=input_dtype, name='W') B = topi.x86.bitserial_conv2d_nchw(A, W, stride, padding, activation_bits, weight_bits, input_dtype, out_dtype, unipolar) s = topi.x86.schedule_bitserial_conv2d_nchw([B]) @@ -71,8 +72,8 @@ def verify_bitserial_conv2d_nhwc(batch, in_size, in_channel, num_filter, kernel, out_dtype='int32' with tvm.target.create('llvm'): - A = tvm.placeholder((batch, in_height, in_width, in_channel), dtype=input_dtype, name='A') - W = tvm.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_dtype, name='W') + A = te.placeholder((batch, in_height, in_width, in_channel), dtype=input_dtype, name='A') + W = te.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_dtype, name='W') B = topi.x86.bitserial_conv2d_nhwc(A, W, stride, padding, activation_bits, weight_bits, input_dtype, out_dtype, unipolar) s = topi.x86.schedule_bitserial_conv2d_nhwc([B]) diff --git a/topi/tests/python/test_topi_bitserial_conv2d_rasp.py b/topi/tests/python/test_topi_bitserial_conv2d_rasp.py index 1f87785..99ba0db 100644 --- a/topi/tests/python/test_topi_bitserial_conv2d_rasp.py +++ b/topi/tests/python/test_topi_bitserial_conv2d_rasp.py @@ -18,6 +18,7 @@ import os import re import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -37,8 +38,8 @@ def verify_bitserial_conv2d_nhwc(batch, in_size, in_channel, num_filter, kernel, device = 'llvm -device=arm_cpu -model=bcm2837 -target=armv7l-linux-gnueabihf -mattr=+neon' with tvm.target.create(device): - A = tvm.placeholder((batch, in_height, in_width, in_channel), dtype=input_type, name='A') - W = tvm.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_type, name='W') + A = te.placeholder((batch, in_height, in_width, in_channel), dtype=input_type, name='A') + W = te.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_type, name='W') B = topi.arm_cpu.bitserial_conv2d_nhwc(A, W, stride, padding, activation_bits, weight_bits, 'uint8', out_dtype, unipolar) s = topi.arm_cpu.schedule_bitserial_conv2d_nhwc([B]) diff --git a/topi/tests/python/test_topi_bitserial_dense.py b/topi/tests/python/test_topi_bitserial_dense.py index 505ce79..fbb20a6 100644 --- a/topi/tests/python/test_topi_bitserial_dense.py +++ b/topi/tests/python/test_topi_bitserial_dense.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -54,8 +55,8 @@ def verify_bitserial_dense(batch, in_dim, out_dim, activation_bits, weight_bits, print ("Skipped running code, not an arm device") continue input_dtype = 'uint8' if "arm_cpu" in target else "uint32" - A = tvm.placeholder((batch, in_dim), dtype=input_dtype, name='A') - B = tvm.placeholder((out_dim, in_dim), dtype=input_dtype, name='B') + A = te.placeholder((batch, in_dim), dtype=input_dtype, name='A') + B = te.placeholder((out_dim, in_dim), dtype=input_dtype, name='B') fcompute, fschedule = topi.testing.dispatch(target, _bitserial_dense_implement) C = fcompute(A, B, activation_bits, weight_bits, input_dtype, out_dtype, unipolar) diff --git a/topi/tests/python/test_topi_bnn.py b/topi/tests/python/test_topi_bnn.py index ce6a286..275f34f 100644 --- a/topi/tests/python/test_topi_bnn.py +++ b/topi/tests/python/test_topi_bnn.py @@ -17,19 +17,20 @@ """Test code for binary neural network operators.""" import numpy as np import tvm +from tvm import te import topi from topi.util import get_const_tuple from tvm.contrib.pickle_memoize import memoize def verify_binary_dense(batch, in_dim, out_dim): - A = tvm.placeholder((batch, in_dim), name='A') - B = tvm.placeholder((out_dim, in_dim), name='B') + A = te.placeholder((batch, in_dim), name='A') + B = te.placeholder((out_dim, in_dim), name='B') bnn_A = topi.nn.binarize_pack(A) bnn_B = topi.nn.binarize_pack(B) # binary dense - bnn_A1 = tvm.placeholder(bnn_A.shape, dtype=bnn_A.dtype) - bnn_B1 = tvm.placeholder(bnn_B.shape, dtype=bnn_B.dtype) + bnn_A1 = te.placeholder(bnn_A.shape, dtype=bnn_A.dtype) + bnn_B1 = te.placeholder(bnn_B.shape, dtype=bnn_B.dtype) bnn_C = topi.nn.binary_dense(bnn_A1, bnn_B1) # schedule with tvm.target.create('llvm'): diff --git a/topi/tests/python/test_topi_broadcast.py b/topi/tests/python/test_topi_broadcast.py index 2bea9b0..2fe00c7 100644 --- a/topi/tests/python/test_topi_broadcast.py +++ b/topi/tests/python/test_topi_broadcast.py @@ -17,6 +17,7 @@ """Test code for broadcasting operators.""" import numpy as np import tvm +from tvm import te import topi import topi.testing from common import get_all_backend @@ -24,7 +25,7 @@ from common import get_all_backend def verify_broadcast_to_ele(in_shape, out_shape, fbcast): # Build the logic and compile the function - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") B = fbcast(A, out_shape) def check_device(device): @@ -54,13 +55,13 @@ def verify_broadcast_binary_ele(lhs_shape, rhs_shape, rhs_min=-100, rhs_max=100, dtype="float32"): # Build the logic and compile the function - A = (tvm.var("A", dtype=dtype) if lhs_shape is None - else tvm.placeholder(shape=lhs_shape, name="A", dtype=dtype)) - B = (tvm.var("B", dtype=dtype) if rhs_shape is None - else tvm.placeholder(shape=rhs_shape, name="B", dtype=dtype)) + A = (te.var("A", dtype=dtype) if lhs_shape is None + else te.placeholder(shape=lhs_shape, name="A", dtype=dtype)) + B = (te.var("B", dtype=dtype) if rhs_shape is None + else te.placeholder(shape=rhs_shape, name="B", dtype=dtype)) C = ftopi(A, B) - if isinstance(A, tvm.expr.PrimExpr) and isinstance(B, tvm.expr.PrimExpr): - assert(isinstance(C, tvm.expr.PrimExpr)) + if isinstance(A, tvm.tir.PrimExpr) and isinstance(B, tvm.tir.PrimExpr): + assert(isinstance(C, tvm.tir.PrimExpr)) return def gen_operand(shape, low, high, ctx): @@ -240,10 +241,10 @@ def test_logical_single_ele(): dtype="bool", ): # Build the logic and compile the function - A = tvm.placeholder(shape=indata.shape, name="A", dtype=dtype) + A = te.placeholder(shape=indata.shape, name="A", dtype=dtype) B = func(A) - if isinstance(A, tvm.expr.PrimExpr): - assert (isinstance(B, tvm.expr.PrimExpr)) + if isinstance(A, tvm.tir.PrimExpr): + assert (isinstance(B, tvm.tir.PrimExpr)) return def check_device(device): @@ -280,11 +281,11 @@ def test_bitwise_not(): dtype="int32", ): # Build the logic and compile the function - A = tvm.placeholder(shape=shape, name="A", dtype=dtype) + A = te.placeholder(shape=shape, name="A", dtype=dtype) B = func(A) - if isinstance(A, tvm.expr.PrimExpr): - assert (isinstance(B, tvm.expr.PrimExpr)) + if isinstance(A, tvm.tir.PrimExpr): + assert (isinstance(B, tvm.tir.PrimExpr)) return def check_device(device): @@ -322,11 +323,11 @@ def test_logical_binary_ele(): dtype="bool", ): # Build the logic and compile the function - A = (tvm.var("A", dtype=dtype)) - B = (tvm.var("B", dtype=dtype)) + A = (te.var("A", dtype=dtype)) + B = (te.var("B", dtype=dtype)) C = func(A, B) - if isinstance(A, tvm.expr.PrimExpr) and isinstance(B, tvm.expr.PrimExpr): - assert (isinstance(C, tvm.expr.PrimExpr)) + if isinstance(A, tvm.tir.PrimExpr) and isinstance(B, tvm.tir.PrimExpr): + assert (isinstance(C, tvm.tir.PrimExpr)) return def check_device(device): diff --git a/topi/tests/python/test_topi_clip.py b/topi/tests/python/test_topi_clip.py index 74034ce..38617ee 100644 --- a/topi/tests/python/test_topi_clip.py +++ b/topi/tests/python/test_topi_clip.py @@ -17,6 +17,7 @@ """Test code for clip operator""" import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -25,9 +26,9 @@ from tvm.contrib.pickle_memoize import memoize from common import get_all_backend def verify_clip(N, a_min, a_max, dtype): - A = tvm.placeholder((N, N), dtype=dtype, name='A') + A = te.placeholder((N, N), dtype=dtype, name='A') B = topi.clip(A, a_min, a_max) - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) # use memoize to pickle the test data for next time use @memoize("topi.tests.test_topi_clip") diff --git a/topi/tests/python/test_topi_conv1d.py b/topi/tests/python/test_topi_conv1d.py index 6e55a57..972a3f1 100644 --- a/topi/tests/python/test_topi_conv1d.py +++ b/topi/tests/python/test_topi_conv1d.py @@ -18,6 +18,7 @@ import numpy as np import itertools import tvm +from tvm import te import topi import topi.testing from tvm.contrib.pickle_memoize import memoize @@ -54,8 +55,8 @@ def verify_conv1d(batch, kernel_shape = [kernel_size, in_channels, filters] dtype = 'float32' - A = tvm.placeholder(in_shape, name='A', dtype=dtype) - W = tvm.placeholder(kernel_shape, name='W', dtype=dtype) + A = te.placeholder(in_shape, name='A', dtype=dtype) + W = te.placeholder(kernel_shape, name='W', dtype=dtype) def get_ref_data(layout): a_np = np.random.uniform(size=in_shape).astype(dtype) diff --git a/topi/tests/python/test_topi_conv1d_transpose_ncw.py b/topi/tests/python/test_topi_conv1d_transpose_ncw.py index 64af254..4d015bf 100644 --- a/topi/tests/python/test_topi_conv1d_transpose_ncw.py +++ b/topi/tests/python/test_topi_conv1d_transpose_ncw.py @@ -18,6 +18,7 @@ import numpy as np import itertools import tvm +from tvm import te import topi import topi.testing from tvm.contrib.pickle_memoize import memoize @@ -31,8 +32,8 @@ _conv1d_transpose_ncw_implement = { def verify_conv1d_transpose_ncw(batch, in_channel, in_size, num_filter, kernel, stride, padding): in_width = in_size - A = tvm.placeholder((batch, in_channel, in_width), name='A') - W = tvm.placeholder((in_channel, num_filter, kernel), name='W') + A = te.placeholder((batch, in_channel, in_width), name='A') + W = te.placeholder((in_channel, num_filter, kernel), name='W') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv2d_NCHWc.py b/topi/tests/python/test_topi_conv2d_NCHWc.py index 8a74b4f..a072d2a 100644 --- a/topi/tests/python/test_topi_conv2d_NCHWc.py +++ b/topi/tests/python/test_topi_conv2d_NCHWc.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi import topi.testing @@ -71,9 +72,9 @@ def verify_conv2d_NCHWc(batch, in_channel, in_size, num_filter, kernel, stride, ic_block = bn break - A = tvm.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='A') - W = tvm.placeholder((num_filter//oc_block, in_channel//ic_block, kernel, kernel, ic_block, oc_block), name='W') - bias = tvm.placeholder((num_filter//oc_block, 1, 1, oc_block), name='bias') + A = te.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='A') + W = te.placeholder((num_filter//oc_block, in_channel//ic_block, kernel, kernel, ic_block, oc_block), name='W') + bias = te.placeholder((num_filter//oc_block, 1, 1, oc_block), name='bias') @memoize("topi.tests.test_topi_conv2d_NCHWc.verify_conv2d_NCHWc") def get_ref_data(): diff --git a/topi/tests/python/test_topi_conv2d_hwcn.py b/topi/tests/python/test_topi_conv2d_hwcn.py index 086523e..41192bd 100644 --- a/topi/tests/python/test_topi_conv2d_hwcn.py +++ b/topi/tests/python/test_topi_conv2d_hwcn.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te import topi import topi.testing from tvm.contrib.pickle_memoize import memoize @@ -33,9 +34,9 @@ _conv2d_hwcn_implement = { def verify_conv2d_hwcn(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1): in_height = in_width = in_size - A = tvm.placeholder((in_height, in_width, in_channel, batch), name='A') - W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W') - B = tvm.placeholder((1, num_filter, 1), name='bias') + A = te.placeholder((in_height, in_width, in_channel, batch), name='A') + W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W') + B = te.placeholder((1, num_filter, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv2d_int8.py b/topi/tests/python/test_topi_conv2d_int8.py index c36bfa3..d784e5c 100644 --- a/topi/tests/python/test_topi_conv2d_int8.py +++ b/topi/tests/python/test_topi_conv2d_int8.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import FallbackConfigEntity import topi @@ -38,9 +39,9 @@ def verify_conv2d_NCHWc_int8(batch, in_channel, in_size, num_filter, kernel, str in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='int8') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W', dtype='int8') - bias = tvm.placeholder((num_filter // oc_block_factor, 1, 1, oc_block_factor), name='bias', + A = te.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='int8') + W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W', dtype='int8') + bias = te.placeholder((num_filter // oc_block_factor, 1, 1, oc_block_factor), name='bias', dtype='int8') a_shape = get_const_tuple(A.shape) diff --git a/topi/tests/python/test_topi_conv2d_nchw.py b/topi/tests/python/test_topi_conv2d_nchw.py index a0258ec..d42c8c7 100644 --- a/topi/tests/python/test_topi_conv2d_nchw.py +++ b/topi/tests/python/test_topi_conv2d_nchw.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi import topi.testing @@ -36,9 +37,9 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W') - bias = tvm.placeholder((num_filter, 1, 1), name='bias') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') + W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W') + bias = te.placeholder((num_filter, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv2d_nhwc.py b/topi/tests/python/test_topi_conv2d_nhwc.py index 2a5915e..814fd45 100644 --- a/topi/tests/python/test_topi_conv2d_nhwc.py +++ b/topi/tests/python/test_topi_conv2d_nhwc.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te import topi import topi.testing from tvm.contrib.pickle_memoize import memoize @@ -37,8 +38,8 @@ _conv2d_nhwc_implement = { def verify_conv2d_nhwc(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1): in_height = in_width = in_size - A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A') - W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W') + A = te.placeholder((batch, in_height, in_width, in_channel), name='A') + W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv2d_nhwc_pack_int8.py b/topi/tests/python/test_topi_conv2d_nhwc_pack_int8.py index 8267aad..a5d532c 100644 --- a/topi/tests/python/test_topi_conv2d_nhwc_pack_int8.py +++ b/topi/tests/python/test_topi_conv2d_nhwc_pack_int8.py @@ -19,6 +19,7 @@ import pytest import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import FallbackConfigEntity import topi @@ -30,8 +31,8 @@ from topi.util import get_const_tuple def verify_conv2d_1x1_nhwc_pack_int8(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1): in_height = in_width = in_size - A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='uint8') - W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W', dtype='int8') + A = te.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='uint8') + W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W', dtype='int8') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv2d_transpose_nchw.py b/topi/tests/python/test_topi_conv2d_transpose_nchw.py index e8aabc6..e8e1fce 100644 --- a/topi/tests/python/test_topi_conv2d_transpose_nchw.py +++ b/topi/tests/python/test_topi_conv2d_transpose_nchw.py @@ -17,6 +17,7 @@ """Test code for transposed convolution.""" import numpy as np import tvm +from tvm import te import topi import topi.testing from tvm.contrib.pickle_memoize import memoize @@ -38,8 +39,8 @@ def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel, stride_height, stride_width = stride pad_top, pad_left, pad_bottom, pad_right = padding - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') - W = tvm.placeholder((in_channel, num_filter, kernel_height, kernel_width), name='W') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') + W = te.placeholder((in_channel, num_filter, kernel_height, kernel_width), name='W') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv2d_winograd.py b/topi/tests/python/test_topi_conv2d_winograd.py index 2d12336..cfbc300 100644 --- a/topi/tests/python/test_topi_conv2d_winograd.py +++ b/topi/tests/python/test_topi_conv2d_winograd.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import FallbackConfigEntity import topi @@ -42,9 +43,9 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W') - bias = tvm.placeholder((num_filter, 1, 1), name='bias') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') + W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W') + bias = te.placeholder((num_filter, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv3d_ncdhw.py b/topi/tests/python/test_topi_conv3d_ncdhw.py index 6c60c27..33e7917 100644 --- a/topi/tests/python/test_topi_conv3d_ncdhw.py +++ b/topi/tests/python/test_topi_conv3d_ncdhw.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi import topi.testing @@ -40,9 +41,9 @@ def verify_conv3d_ncdhw(batch, in_channel, in_size, num_filter, kernel, stride, in_depth = in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel, kernel), name='W') - bias = tvm.placeholder((num_filter, 1, 1, 1), name='bias') + A = te.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A') + W = te.placeholder((num_filter, in_channel, kernel, kernel, kernel), name='W') + bias = te.placeholder((num_filter, 1, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_conv3d_ndhwc.py b/topi/tests/python/test_topi_conv3d_ndhwc.py index 7e2f02c..8526bb1 100644 --- a/topi/tests/python/test_topi_conv3d_ndhwc.py +++ b/topi/tests/python/test_topi_conv3d_ndhwc.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te import topi import topi.testing from tvm.contrib.pickle_memoize import memoize @@ -41,8 +42,8 @@ def verify_conv3d_ndhwc(batch, in_channel, in_size, num_filter, kernel, stride, else: kernel_depth = kernel_height = kernel_width = kernel - A = tvm.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A') - W = tvm.placeholder((kernel_depth, kernel_height, kernel_width, in_channel, num_filter), name='W') + A = te.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A') + W = te.placeholder((kernel_depth, kernel_height, kernel_width, in_channel, num_filter), name='W') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) diff --git a/topi/tests/python/test_topi_deformable_conv2d.py b/topi/tests/python/test_topi_deformable_conv2d.py index 1b1a032..a885254 100644 --- a/topi/tests/python/test_topi_deformable_conv2d.py +++ b/topi/tests/python/test_topi_deformable_conv2d.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te from tvm import autotvm import topi import topi.testing @@ -34,11 +35,11 @@ def verify_deformable_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d, %d, %d)" % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation, deformable_groups, groups)) - A = tvm.placeholder((batch, in_channel, in_size, in_size), name='A') + A = te.placeholder((batch, in_channel, in_size, in_size), name='A') out_size = (in_size - (kernel - 1) * dilation - 1 + 2 * padding) // stride + 1 - Offset = tvm.placeholder((batch, deformable_groups * kernel * kernel * 2, out_size, out_size), name='offset') - W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W') - bias = tvm.placeholder((num_filter, 1, 1), name='bias') + Offset = te.placeholder((batch, deformable_groups * kernel * kernel * 2, out_size, out_size), name='offset') + W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W') + bias = te.placeholder((num_filter, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) offset_shape = get_const_tuple(Offset.shape) diff --git a/topi/tests/python/test_topi_dense.py b/topi/tests/python/test_topi_dense.py index d729e43..7498c00 100644 --- a/topi/tests/python/test_topi_dense.py +++ b/topi/tests/python/test_topi_dense.py @@ -17,6 +17,7 @@ """Test code for dense operator""" import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -38,9 +39,9 @@ _dense_implement = { } def verify_dense(batch, in_dim, out_dim, use_bias=True): - A = tvm.placeholder((batch, in_dim), name='A') - B = tvm.placeholder((out_dim, in_dim), name='B') - C = tvm.placeholder((out_dim,), name='C') + A = te.placeholder((batch, in_dim), name='A') + B = te.placeholder((out_dim, in_dim), name='B') + C = te.placeholder((out_dim,), name='C') dtype = A.dtype # use memoize to pickle the test data for next time use @@ -83,9 +84,9 @@ def verify_dense(batch, in_dim, out_dim, use_bias=True): def verify_dense_int8(batch, in_dim, out_dim, use_bias=True): dtype = 'int8' out_dtype = 'int32' - A = tvm.placeholder((batch, in_dim), name='A', dtype=dtype) - B = tvm.placeholder((out_dim, in_dim), name='B', dtype=dtype) - C = tvm.placeholder((out_dim,), name='C', dtype=out_dtype) + A = te.placeholder((batch, in_dim), name='A', dtype=dtype) + B = te.placeholder((out_dim, in_dim), name='B', dtype=dtype) + C = te.placeholder((out_dim,), name='C', dtype=out_dtype) # use memoize to pickle the test data for next time use @memoize("topi.tests.test_topi_dense_int8") diff --git a/topi/tests/python/test_topi_depth_to_space.py b/topi/tests/python/test_topi_depth_to_space.py index 693bfb6..b21eb97 100644 --- a/topi/tests/python/test_topi_depth_to_space.py +++ b/topi/tests/python/test_topi_depth_to_space.py @@ -17,6 +17,7 @@ """Test code for depth to space""" import numpy as np import tvm +from tvm import te import topi import topi.testing @@ -37,7 +38,7 @@ def verify_depth_to_space(block_size, batch, in_channel, in_height, in_width, la else: raise NotImplementedError('Layout not supported {}'.format(layout)) - A = tvm.placeholder(in_shape, name='A', dtype='float32') + A = te.placeholder(in_shape, name='A', dtype='float32') dtype = A.dtype a_np = np.random.uniform(size=in_shape).astype(dtype) diff --git a/topi/tests/python/test_topi_depthwise_conv2d.py b/topi/tests/python/test_topi_depthwise_conv2d.py index 7efe5a2..6933489 100644 --- a/topi/tests/python/test_topi_depthwise_conv2d.py +++ b/topi/tests/python/test_topi_depthwise_conv2d.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te from tvm import autotvm import topi import topi.testing @@ -58,10 +59,10 @@ def depthwise_conv2d_with_workload_nchw(batch, in_channel, in_height, channel_mu padding_args = padding # placeholder - Input = tvm.placeholder((batch, in_channel, in_height, in_width), name='Input') - Filter = tvm.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter') - Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale') - Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift') + Input = te.placeholder((batch, in_channel, in_height, in_width), name='Input') + Filter = te.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter') + Scale = te.placeholder((in_channel * channel_multiplier,), name='Scale') + Shift = te.placeholder((in_channel * channel_multiplier,), name='Shift') dtype = 'float32' @@ -161,10 +162,10 @@ def depthwise_conv2d_with_workload_nhwc(batch, in_channel, in_height, channel_mu padding_args = padding # placeholder - Input = tvm.placeholder((batch, in_height, in_width, in_channel), name='Input') - Filter = tvm.placeholder((filter_height, filter_width,filter_channel, channel_multiplier), name='Filter') - Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale') - Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift') + Input = te.placeholder((batch, in_height, in_width, in_channel), name='Input') + Filter = te.placeholder((filter_height, filter_width,filter_channel, channel_multiplier), name='Filter') + Scale = te.placeholder((in_channel * channel_multiplier,), name='Scale') + Shift = te.placeholder((in_channel * channel_multiplier,), name='Shift') dtype = 'float32' @@ -289,8 +290,8 @@ def depthwise_conv2d_with_workload_NCHWc(batch, in_channel, in_height, channel_m break # placeholder - Input = tvm.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='Input') - Filter = tvm.placeholder((out_channel//oc_block, 1, filter_height, filter_width, 1, oc_block), name='Filter') + Input = te.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='Input') + Filter = te.placeholder((out_channel//oc_block, 1, filter_height, filter_width, 1, oc_block), name='Filter') in_layout = "NCHW%dc" % ic_block out_layout = "NCHW%dc" % oc_block dtype = 'float32' diff --git a/topi/tests/python/test_topi_depthwise_conv2d_back_input.py b/topi/tests/python/test_topi_depthwise_conv2d_back_input.py index ad44429..aac0cd5 100644 --- a/topi/tests/python/test_topi_depthwise_conv2d_back_input.py +++ b/topi/tests/python/test_topi_depthwise_conv2d_back_input.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import topi import numpy as np from tvm.contrib.pickle_memoize import memoize @@ -40,8 +41,8 @@ def verify_depthwise_conv2d_back_input(batch, in_channel, in_h, channel_multipli oshape = [batch, out_h, out_w, out_channel] # placeholder - Out_grad = tvm.placeholder(oshape, name='Out_grad') - Filter = tvm.placeholder((filter_h, filter_w, filter_channel, channel_multiplier)) + Out_grad = te.placeholder(oshape, name='Out_grad') + Filter = te.placeholder((filter_h, filter_w, filter_channel, channel_multiplier)) # declare In_grad = topi.nn.depthwise_conv2d_backward_input_nhwc(Filter, Out_grad, oshape, ishape, stride=[stride_h, stride_w], padding=[padding_h, padding_w]) diff --git a/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py b/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py index 2e09e67..4602d09 100644 --- a/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py +++ b/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import topi import topi.testing import numpy as np @@ -40,8 +41,8 @@ def verify_depthwise_conv2d_back_weight(batch, in_channel, in_h, channel_multipl fshape = [filter_h, filter_w, in_channel, channel_multiplier] # placeholder - Out_grad = tvm.placeholder(oshape, name='Out_grad') - Input = tvm.placeholder((batch, in_h, in_w, in_channel), name='In_grad') + Out_grad = te.placeholder(oshape, name='Out_grad') + Input = te.placeholder((batch, in_h, in_w, in_channel), name='In_grad') # declare Weight_grad = topi.nn.depthwise_conv2d_backward_weight_nhwc(Input, Out_grad, oshape, fshape, stride=[stride_h, stride_w], padding=[padding_h, padding_w]) diff --git a/topi/tests/python/test_topi_dilate.py b/topi/tests/python/test_topi_dilate.py index 2498821..1e69383 100644 --- a/topi/tests/python/test_topi_dilate.py +++ b/topi/tests/python/test_topi_dilate.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import topi import topi.testing import numpy as np @@ -25,9 +26,9 @@ def test_dilate(): ctx = tvm.cpu(0) def _test_dilate(input_size, strides): - Input = tvm.placeholder((input_size)) + Input = te.placeholder((input_size)) Output = topi.nn.dilate(Input, strides) - schedule = tvm.create_schedule(Output.op) + schedule = te.create_schedule(Output.op) input_np = np.random.uniform(size=input_size).astype(Input.dtype) output_np = topi.testing.dilate_python(input_np, strides) input_tvm = tvm.nd.array(input_np, ctx=ctx) diff --git a/topi/tests/python/test_topi_group_conv2d.py b/topi/tests/python/test_topi_group_conv2d.py index 3904db7..6909bbe 100644 --- a/topi/tests/python/test_topi_group_conv2d.py +++ b/topi/tests/python/test_topi_group_conv2d.py @@ -18,6 +18,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.autotvm.task.space import FallbackConfigEntity import topi @@ -41,9 +42,9 @@ def verify_group_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, str in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') - W = tvm.placeholder((num_filter, in_channel // groups, kernel, kernel), name='W') - bias = tvm.placeholder((num_filter, 1, 1), name='bias') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') + W = te.placeholder((num_filter, in_channel // groups, kernel, kernel), name='W') + bias = te.placeholder((num_filter, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) @@ -112,9 +113,9 @@ def verify_group_conv2d_NCHWc_int8(batch, in_channel, in_size, num_filter, kerne in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='int8') - W = tvm.placeholder((num_filter, in_channel // groups, kernel, kernel), name='W', dtype='int8') - bias = tvm.placeholder((num_filter // oc_block_factor, 1, 1, oc_block_factor), name='bias', + A = te.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='int8') + W = te.placeholder((num_filter, in_channel // groups, kernel, kernel), name='W', dtype='int8') + bias = te.placeholder((num_filter // oc_block_factor, 1, 1, oc_block_factor), name='bias', dtype='int8') a_shape = get_const_tuple(A.shape) diff --git a/topi/tests/python/test_topi_group_conv2d_NCHWc_int8.py b/topi/tests/python/test_topi_group_conv2d_NCHWc_int8.py index 08f136e..0fd4205 100644 --- a/topi/tests/python/test_topi_group_conv2d_NCHWc_int8.py +++ b/topi/tests/python/test_topi_group_conv2d_NCHWc_int8.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi import topi.testing @@ -61,8 +62,8 @@ def verify_group_conv2d_NCHWc_int8(batch, in_channel, groups, in_size, num_filte ic_block = 8 autotvm.DispatchContext.current.silent = True - A = tvm.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='A', dtype='uint8') - W = tvm.placeholder((num_filter//oc_block, in_channel//ic_block//groups, kernel, kernel, ic_block//4, oc_block, 4), name='W', dtype='int8') + A = te.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='A', dtype='uint8') + W = te.placeholder((num_filter//oc_block, in_channel//ic_block//groups, kernel, kernel, ic_block//4, oc_block, 4), name='W', dtype='int8') @memoize("topi.tests.test_topi_conv2d_NCHWc_int8.verify_conv2d_NCHWc_int8") def get_ref_data(): diff --git a/topi/tests/python/test_topi_image.py b/topi/tests/python/test_topi_image.py index 4297638..4eea75d 100644 --- a/topi/tests/python/test_topi_image.py +++ b/topi/tests/python/test_topi_image.py @@ -17,6 +17,7 @@ """Test code for bilinear scale """ import numpy as np import tvm +from tvm import te import topi import topi.testing @@ -25,12 +26,12 @@ from common import get_all_backend def verify_resize(batch, in_channel, in_height, in_width, out_height, out_width, layout='NCHW', coord_trans="align_corners", method="bilinear"): if layout == 'NCHW': - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='float32') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='float32') dtype = A.dtype out_shape = (batch, in_channel, out_height, out_width) a_np = np.random.uniform(size=(batch, in_channel, in_height, in_width)).astype(dtype) elif layout == 'NHWC': - A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='float32') + A = te.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='float32') dtype = A.dtype out_shape = (batch, out_height, out_width, in_channel) a_np = np.random.uniform(size=(batch, in_height, in_width, in_channel)).astype(dtype) @@ -84,12 +85,12 @@ def test_resize(): def verify_resize3d(batch, in_channel, in_depth, in_height, in_width, out_depth, out_height, out_width, layout='NCDHW', coordinate_transformation_mode="half_pixel", method="trilinear"): if layout == 'NCDHW': - A = tvm.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A', dtype='float32') + A = te.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A', dtype='float32') dtype = A.dtype out_shape = (batch, in_channel, out_depth, out_height, out_width) a_np = np.random.uniform(size=(batch, in_channel, in_depth, in_height, in_width)).astype(dtype) elif layout == 'NDHWC': - A = tvm.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A', dtype='float32') + A = te.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A', dtype='float32') dtype = A.dtype out_shape = (batch, out_depth, out_height, out_width, in_channel) a_np = np.random.uniform(size=(batch, in_depth, in_height, in_width, in_channel)).astype(dtype) @@ -146,10 +147,10 @@ def test_crop_and_resize(): def verify_crop_and_resize(image_shape, np_boxes, np_box_indices, np_crop_size, layout='NHWC', method="bilinear", extrapolation_value=0.0): - images = tvm.placeholder(image_shape, name='images', dtype='float32') + images = te.placeholder(image_shape, name='images', dtype='float32') np_images = np.random.uniform(size=image_shape).astype("float32") - boxes = tvm.placeholder(np_boxes.shape, name="boxes", dtype="float32") - box_ind = tvm.placeholder(np_box_indices.shape, name="box_ind", dtype="int32") + boxes = te.placeholder(np_boxes.shape, name="boxes", dtype="float32") + box_ind = te.placeholder(np_box_indices.shape, name="box_ind", dtype="int32") batch = len(np_box_indices) target_height, target_width = np_crop_size[0], np_crop_size[1] diff --git a/topi/tests/python/test_topi_lrn.py b/topi/tests/python/test_topi_lrn.py index 4cb3c75..7e003a7 100644 --- a/topi/tests/python/test_topi_lrn.py +++ b/topi/tests/python/test_topi_lrn.py @@ -17,6 +17,7 @@ """Test code for local response normalization""" import numpy as np import tvm +from tvm import te import topi from topi.util import get_const_tuple import topi.testing @@ -32,7 +33,7 @@ _lrn_schedule = { } def verify_lrn(shape, size, axis, bias, alpha, beta): - A = tvm.placeholder(shape, name='A') + A = te.placeholder(shape, name='A') B = topi.nn.lrn(A, size, axis, alpha, beta, bias) dtype = A.dtype diff --git a/topi/tests/python/test_topi_math.py b/topi/tests/python/test_topi_math.py index debc3ef..30a0f44 100644 --- a/topi/tests/python/test_topi_math.py +++ b/topi/tests/python/test_topi_math.py @@ -17,6 +17,7 @@ import numpy as np import scipy import tvm +from tvm import te import topi import topi.testing from topi import util @@ -24,7 +25,7 @@ from common import get_all_backend def test_util(): - x = tvm.const(100, "int32") + x = tvm.tir.const(100, "int32") assert util.get_const_int(x) == 100 assert util.get_const_tuple((x, x)) == (100, 100) @@ -37,13 +38,13 @@ def test_ewise(): low, high, shape=(20, 3), - dtype=tvm.float32, + dtype="float32", check_round=False, skip_name_check=False, ): - m = tvm.var("m") - l = tvm.var("l") - A = tvm.placeholder((m, l), dtype=dtype, name="A") + m = te.var("m") + l = te.var("l") + A = te.placeholder((m, l), dtype=dtype, name="A") B = func(A) assert tuple(B.shape) == tuple(A.shape) @@ -76,13 +77,13 @@ def test_ewise(): low, high, shape=(20, 3), - dtype=tvm.float32, + dtype="float32", check_round=False, skip_name_check=False, ): - m = tvm.var("m") - l = tvm.var("l") - A = tvm.placeholder((m, l), dtype=dtype, name="A") + m = te.var("m") + l = te.var("l") + A = te.placeholder((m, l), dtype=dtype, name="A") B = topi.isnan(A) assert tuple(B.shape) == tuple(A.shape) @@ -134,7 +135,7 @@ def test_ewise(): def test_cast(): def verify(from_dtype, to_dtype, low=-100, high=100): shape = (5, 4) - A = tvm.placeholder(shape, dtype=from_dtype, name="A") + A = te.placeholder(shape, dtype=from_dtype, name="A") B = topi.cast(A, to_dtype) if from_dtype == "bool": @@ -177,11 +178,11 @@ def test_fastmath(): low, high, step, - dtype=tvm.float32 + dtype="float32" ): a_np = np.arange(low, high, step).astype(dtype) b_np = f_numpy(a_np) - A = tvm.placeholder(a_np.shape, dtype=dtype, name="A") + A = te.placeholder(a_np.shape, dtype=dtype, name="A") B = func(A) assert tuple(B.shape) == tuple(A.shape) diff --git a/topi/tests/python/test_topi_matmul.py b/topi/tests/python/test_topi_matmul.py index c712970..0c0a365 100644 --- a/topi/tests/python/test_topi_matmul.py +++ b/topi/tests/python/test_topi_matmul.py @@ -16,6 +16,7 @@ # under the License. import numpy as np import tvm +from tvm import te import topi from topi.util import get_const_tuple @@ -27,12 +28,12 @@ def with_tvm(lam, *args): pls = [] # placeholders vals_nd = [] # initial values for i,arg in enumerate(args): - pls.append(tvm.placeholder(arg.shape, name='pl'+str(i))) + pls.append(te.placeholder(arg.shape, name='pl'+str(i))) vals_nd.append(tvm.nd.array(arg, ctx)) out = lam(*pls) out_nd = tvm.nd.array(np.zeros(get_const_tuple(out.shape), dtype=out.dtype), ctx) - s = tvm.create_schedule([out.op]) + s = te.create_schedule([out.op]) m = tvm.build(s, pls + [out], "llvm") m(*(vals_nd+[out_nd])) return out_nd.asnumpy() diff --git a/topi/tests/python/test_topi_pooling.py b/topi/tests/python/test_topi_pooling.py index 084a2c7..64f0841 100644 --- a/topi/tests/python/test_topi_pooling.py +++ b/topi/tests/python/test_topi_pooling.py @@ -18,6 +18,7 @@ import math import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -48,7 +49,7 @@ def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_ sw = sh pt, pl, pb, pr = padding layout = "NCHW" - A = tvm.placeholder((n, ic, ih, iw), name='A') + A = te.placeholder((n, ic, ih, iw), name='A') B = topi.nn.pool(A, kernel=[kh, kw], stride=[sh, sw], padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout="NCHW", count_include_pad=count_include_pad) @@ -112,7 +113,7 @@ def verify_pool_grad(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_inc sw = sh pt, pl, pb, pr = padding layout = "NCHW" - A = tvm.placeholder((n, ic, ih, iw), name='A') + A = te.placeholder((n, ic, ih, iw), name='A') B = topi.nn.pool(A, kernel=[kh, kw], stride=[sh, sw], padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout="NCHW", count_include_pad=count_include_pad) @@ -126,7 +127,7 @@ def verify_pool_grad(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_inc else: assert bshape[2] == int(math.floor(float(ashape[2] - kh + pt + pb) / sh) + 1) assert bshape[3] == int(math.floor(float(ashape[3] - kw + pl + pr) / sw) + 1) - OutGrad = tvm.placeholder(bshape, name='OutGrad') + OutGrad = te.placeholder(bshape, name='OutGrad') PoolGrad = topi.nn.pool_grad(OutGrad, A, kernel=[kh, kw], stride=[sh, sw], padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout="NCHW", count_include_pad=count_include_pad) @@ -202,7 +203,7 @@ def test_pool_grad(): def verify_global_pool(n, c, h, w, pool_type, layout='NCHW'): assert layout in ["NCHW", "NHWC"] - A = tvm.placeholder((n, c, h, w), name='A') + A = te.placeholder((n, c, h, w), name='A') B = topi.nn.global_pool(A, pool_type=pool_type, layout=layout) B = topi.nn.relu(B) @@ -268,7 +269,7 @@ def verify_adaptive_pool(dshape, out_size, pool_type, layout="NCHW", dtype="floa l_sl = slice(l_start, l_end) np_out[i, j, k, l] = np_op(np_data[i, j, k_sl, l_sl]) - data = tvm.placeholder(dshape, name="data", dtype=dtype) + data = te.placeholder(dshape, name="data", dtype=dtype) out = topi.nn.adaptive_pool(data, out_size, pool_type, layout) def check_device(device): ctx = tvm.context(device, 0) @@ -302,7 +303,7 @@ def verify_pool3d(n, ic, ih, kh, sh, padding, pool_type, input_shape = (n, ic, id, ih, iw) kernel = [kd, kh, kw] stride = [sd, sh, sw] - A = tvm.placeholder(input_shape, name='A') + A = te.placeholder(input_shape, name='A') B = topi.nn.pool3d(A, kernel=kernel, stride=stride, padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout=layout, count_include_pad=count_include_pad) @@ -355,7 +356,7 @@ def verify_pool1d(n, ic, iw, kw, sw, padding, pool_type, input_shape = (n, ic, iw) kernel = [kw] stride = [sw] - A = tvm.placeholder(input_shape, name='A') + A = te.placeholder(input_shape, name='A') B = topi.nn.pool1d(A, kernel=kernel, stride=stride, padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout=layout, count_include_pad=count_include_pad) diff --git a/topi/tests/python/test_topi_reduce.py b/topi/tests/python/test_topi_reduce.py index 751025b..cc84fe0 100644 --- a/topi/tests/python/test_topi_reduce.py +++ b/topi/tests/python/test_topi_reduce.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te import topi import topi.testing @@ -46,7 +47,7 @@ def _my_npy_argmin(arr, axis, keepdims): def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum", dtype="float32"): # Build the logic and compile the function - A = tvm.placeholder(shape=in_shape, name="A", dtype=dtype) + A = te.placeholder(shape=in_shape, name="A", dtype=dtype) A1 = topi.sqrt(topi.exp(A)) out_dtype = dtype if type == "sum": diff --git a/topi/tests/python/test_topi_relu.py b/topi/tests/python/test_topi_relu.py index 8ef3549..4d4166f 100644 --- a/topi/tests/python/test_topi_relu.py +++ b/topi/tests/python/test_topi_relu.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -26,7 +27,7 @@ from tvm.contrib.nvcc import have_fp16 from common import get_all_backend def verify_relu(m, n, dtype="float32"): - A = tvm.placeholder((m, n), name='A', dtype=dtype) + A = te.placeholder((m, n), name='A', dtype=dtype) B = topi.nn.relu(A) a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype) @@ -55,9 +56,9 @@ def verify_relu(m, n, dtype="float32"): def verify_leaky_relu(m, alpha): - A = tvm.placeholder((m,), name='A') + A = te.placeholder((m,), name='A') B = topi.nn.leaky_relu(A, alpha) - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) b_np = a_np * (a_np > 0) + a_np * (a_np < 0) * alpha @@ -70,8 +71,8 @@ def verify_leaky_relu(m, alpha): def verify_prelu(x, w, axis, weight_reshape): - X = tvm.placeholder((x), name='X') - W = tvm.placeholder((w), name='W') + X = te.placeholder((x), name='X') + W = te.placeholder((w), name='W') x_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(X.shape)).astype(X.dtype) w_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(W.shape)).astype(W.dtype) @@ -79,7 +80,7 @@ def verify_prelu(x, w, axis, weight_reshape): return (x < 0) * (x *W.reshape(weight_reshape)) + (x>=0) * x B = topi.nn.prelu(X, W, axis) - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) ctx = tvm.cpu(0) x_tvm = tvm.nd.array(x_np, ctx) diff --git a/topi/tests/python/test_topi_reorg.py b/topi/tests/python/test_topi_reorg.py index c4cd2b5..09c2f2f 100644 --- a/topi/tests/python/test_topi_reorg.py +++ b/topi/tests/python/test_topi_reorg.py @@ -19,6 +19,7 @@ import numpy as np import topi from topi.util import get_const_tuple import tvm +from tvm import te import topi.testing _reorg_schedule = { @@ -30,7 +31,7 @@ def verify_reorg(batch, in_size, in_channel, stride): '''Verify reorg operator by comparing outputs from tvm and numpy implementation''' in_height = in_width = in_size - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') B = topi.vision.reorg(A, stride) a_shape = get_const_tuple(A.shape) diff --git a/topi/tests/python/test_topi_softmax.py b/topi/tests/python/test_topi_softmax.py index 5396b6b..4857387 100644 --- a/topi/tests/python/test_topi_softmax.py +++ b/topi/tests/python/test_topi_softmax.py @@ -18,6 +18,7 @@ import os import numpy as np import tvm +from tvm import te import topi import topi.testing import logging @@ -50,10 +51,10 @@ def check_device(A, B, a_np, b_np, device, name): tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) def verify_softmax(m, n, dtype="float32"): - A = tvm.placeholder((m, n), dtype=dtype, name='A') + A = te.placeholder((m, n), dtype=dtype, name='A') B = topi.nn.softmax(A) # confirm lower works - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) tvm.lower(s, [A, B], simple_mode=True) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) @@ -63,7 +64,7 @@ def verify_softmax(m, n, dtype="float32"): check_device(A, B, a_np, b_np, device, "softmax") def verify_softmax_4d(shape, dtype="float32"): - A = tvm.placeholder(shape, dtype=dtype, name='A') + A = te.placeholder(shape, dtype=dtype, name='A') B = topi.nn.softmax(A, axis=1) _, c, h, w = shape @@ -81,10 +82,10 @@ def test_softmax(): verify_softmax_4d((1, 16, 256, 256)) def verify_log_softmax(m, n, dtype="float32"): - A = tvm.placeholder((m, n), dtype=dtype, name='A') + A = te.placeholder((m, n), dtype=dtype, name='A') B = topi.nn.log_softmax(A) # confirm lower works - s = tvm.create_schedule([B.op]) + s = te.create_schedule([B.op]) tvm.lower(s, [A, B], simple_mode=True) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) b_np = topi.testing.log_softmax_python(a_np) diff --git a/topi/tests/python/test_topi_sort.py b/topi/tests/python/test_topi_sort.py index 74e55ec..2728733 100644 --- a/topi/tests/python/test_topi_sort.py +++ b/topi/tests/python/test_topi_sort.py @@ -18,6 +18,7 @@ from __future__ import print_function import numpy as np import tvm +from tvm import te import topi import topi.testing @@ -34,7 +35,7 @@ _topk_implement = { def verify_argsort(axis, is_ascend): dshape = (20, 100) data_dtype = "float32" - data = tvm.placeholder(dshape, name="data", dtype=data_dtype) + data = te.placeholder(dshape, name="data", dtype=data_dtype) perm = np.arange(dshape[0] * dshape[1], dtype=data_dtype) np.random.shuffle(perm) @@ -74,7 +75,7 @@ def verify_argsort(axis, is_ascend): def verify_topk(k, axis, ret_type, is_ascend, dtype): shape = (20, 100) data_dtype = "float32" - data = tvm.placeholder(shape, name="data", dtype=data_dtype) + data = te.placeholder(shape, name="data", dtype=data_dtype) np_data = np.random.uniform(size=shape).astype(data_dtype) if is_ascend: diff --git a/topi/tests/python/test_topi_space_to_depth.py b/topi/tests/python/test_topi_space_to_depth.py index 99a798e..11a009d 100644 --- a/topi/tests/python/test_topi_space_to_depth.py +++ b/topi/tests/python/test_topi_space_to_depth.py @@ -17,6 +17,7 @@ """Test code for space to depth""" import numpy as np import tvm +from tvm import te import topi import topi.testing @@ -37,7 +38,7 @@ def verify_space_to_depth(block_size, batch, in_channel, in_height, in_width, la else: raise NotImplementedError('Layout not supported {}'.format(layout)) - A = tvm.placeholder(in_shape, name='A', dtype='float32') + A = te.placeholder(in_shape, name='A', dtype='float32') dtype = A.dtype a_np = np.random.uniform(size=in_shape).astype(dtype) diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py index fc28859..fc2d26b 100644 --- a/topi/tests/python/test_topi_sparse.py +++ b/topi/tests/python/test_topi_sparse.py @@ -17,6 +17,7 @@ """Test code for sparse operator""" import numpy as np import tvm +from tvm import te import topi import topi.testing from topi.util import get_const_tuple @@ -26,13 +27,13 @@ import time import scipy.sparse as sp def verify_dynamic_csrmv(batch, in_dim, out_dim, use_bias=True): - nr, nc, n = tvm.var("nr"), tvm.var("nc"), tvm.var("n") + nr, nc, n = te.var("nr"), te.var("nc"), te.var("n") dtype = 'float32' A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, dtype=dtype, name='A') - B = tvm.placeholder((in_dim, 1), name='B') - C = tvm.placeholder((nr,), name='C') + B = te.placeholder((in_dim, 1), name='B') + C = te.placeholder((nr,), name='C') D = topi.sparse.csrmv(A, B, C if use_bias else None) - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) dtype = A.dtype # get the test data @@ -70,13 +71,13 @@ def verify_dynamic_csrmv(batch, in_dim, out_dim, use_bias=True): check_device(device) def verify_dynamic_csrmm(batch, in_dim, out_dim, use_bias=True): - nr, nc, n = tvm.var("nr"), tvm.var("nc"), tvm.var("n") + nr, nc, n = te.var("nr"), te.var("nc"), te.var("n") dtype = 'float32' A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, dtype=dtype, name='A') - B = tvm.placeholder((in_dim, out_dim), name='B') - C = tvm.placeholder((nr,), name='C') + B = te.placeholder((in_dim, out_dim), name='B') + C = te.placeholder((nr,), name='C') D = topi.sparse.csrmm(A, B, C if use_bias else None) - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) dtype = A.dtype # get the test data @@ -112,12 +113,12 @@ def verify_dynamic_csrmm(batch, in_dim, out_dim, use_bias=True): check_device(device) def verify_dense_si(batch, in_dim, out_dim, use_bias=True, dtype='float32'): - nonzeros = tvm.var('nonzeros') + nonzeros = te.var('nonzeros') A = tvmsp.placeholder(shape=(batch, in_dim), nonzeros=nonzeros, dtype=dtype, name='A') - B = tvm.placeholder((out_dim, in_dim), dtype=dtype, name='B') - C = tvm.placeholder((out_dim,), dtype=dtype, name='C') + B = te.placeholder((out_dim, in_dim), dtype=dtype, name='B') + C = te.placeholder((out_dim,), dtype=dtype, name='C') D = topi.sparse.dense(A, B, C if use_bias else None) - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) # get the test data def get_ref_data(): @@ -149,12 +150,12 @@ def verify_dense_si(batch, in_dim, out_dim, use_bias=True, dtype='float32'): check_device('llvm') def verify_dense_sw(batch, in_dim, out_dim, use_bias=True, dtype='float32'): - nonzeros = tvm.var('nonzeros') - A = tvm.placeholder((batch, in_dim), dtype=dtype, name='A') + nonzeros = te.var('nonzeros') + A = te.placeholder((batch, in_dim), dtype=dtype, name='A') B = tvmsp.placeholder(shape=(out_dim, in_dim), nonzeros=nonzeros, dtype=dtype, name='B') - C = tvm.placeholder((out_dim,), dtype=dtype, name='C') + C = te.placeholder((out_dim,), dtype=dtype, name='C') D = topi.sparse.dense(A, B, C if use_bias else None) - s = tvm.create_schedule(D.op) + s = te.create_schedule(D.op) # get the test data def get_ref_data(): @@ -224,12 +225,12 @@ def test_sparse_dense_csr(): W_np = W_sp_np.todense() Y_np = X_np.dot(W_np.T) - W_data = tvm.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype)) - W_indices = tvm.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype)) - W_indptr = tvm.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype)) - X = tvm.placeholder(shape=X_np.shape, dtype=str(X_np.dtype)) + W_data = te.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype)) + W_indices = te.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype)) + W_indptr = te.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype)) + X = te.placeholder(shape=X_np.shape, dtype=str(X_np.dtype)) Y = topi.nn.sparse_dense(X, W_data, W_indices, W_indptr) - s = tvm.create_schedule(Y.op) + s = te.create_schedule(Y.op) func = tvm.build(s, [X, W_data, W_indices, W_indptr, Y]) Y_tvm = tvm.nd.array(np.zeros(Y_np.shape, dtype=Y_np.dtype)) func(tvm.nd.array(X_np), tvm.nd.array(W_sp_np.data), tvm.nd.array(W_sp_np.indices), tvm.nd.array(W_sp_np.indptr), Y_tvm) @@ -243,12 +244,12 @@ def test_sparse_transpose_csr(): X_sp_T = X_sp.transpose() X_np_T = X_sp_T.todense() - X_data = tvm.placeholder(shape=X_sp.data.shape, dtype=str(X_sp.data.dtype)) - X_indices = tvm.placeholder(shape=X_sp.indices.shape, dtype=str(X_sp.indices.dtype)) - X_indptr = tvm.placeholder(shape=X_sp.indptr.shape, dtype=str(X_sp.indptr.dtype)) + X_data = te.placeholder(shape=X_sp.data.shape, dtype=str(X_sp.data.dtype)) + X_indices = te.placeholder(shape=X_sp.indices.shape, dtype=str(X_sp.indices.dtype)) + X_indptr = te.placeholder(shape=X_sp.indptr.shape, dtype=str(X_sp.indptr.dtype)) X_T_data, X_T_indices, X_T_indptr = topi.nn.sparse_transpose(X_data, X_indices, X_indptr) - s = tvm.create_schedule([X_T_data.op, X_T_indices.op, X_T_indptr.op]) + s = te.create_schedule([X_T_data.op, X_T_indices.op, X_T_indptr.op]) func = tvm.build(s, [X_data, X_indices, X_indptr, X_T_data, X_T_indices, X_T_indptr]) @@ -288,12 +289,12 @@ def test_sparse_dense_bsr(): W_np = W_sp_np.todense() Y_np = X_np.dot(W_np.T) - W_data = tvm.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype)) - W_indices = tvm.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype)) - W_indptr = tvm.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype)) - X = tvm.placeholder(shape=X_np.shape, dtype=str(X_np.dtype)) + W_data = te.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype)) + W_indices = te.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype)) + W_indptr = te.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype)) + X = te.placeholder(shape=X_np.shape, dtype=str(X_np.dtype)) Y = topi.nn.sparse_dense(X, W_data, W_indices, W_indptr) - s = tvm.create_schedule(Y.op) + s = te.create_schedule(Y.op) func = tvm.build(s, [X, W_data, W_indices, W_indptr, Y]) Y_tvm = tvm.nd.array(np.zeros(Y_np.shape, dtype=Y_np.dtype)) func(tvm.nd.array(X_np), @@ -317,12 +318,12 @@ def test_sparse_dense_bsr_randomized(): W_np = W_sp_np.todense() Y_np = np.array(X_np.dot(W_np.T)) - W_data = tvm.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype)) - W_indices = tvm.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype)) - W_indptr = tvm.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype)) - X = tvm.placeholder(shape=X_np.shape, dtype=str(X_np.dtype)) + W_data = te.placeholder(shape=W_sp_np.data.shape, dtype=str(W_sp_np.data.dtype)) + W_indices = te.placeholder(shape=W_sp_np.indices.shape, dtype=str(W_sp_np.indices.dtype)) + W_indptr = te.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype)) + X = te.placeholder(shape=X_np.shape, dtype=str(X_np.dtype)) Y = topi.nn.sparse_dense(X, W_data, W_indices, W_indptr) - s = tvm.create_schedule(Y.op) + s = te.create_schedule(Y.op) func = tvm.build(s, [X, W_data, W_indices, W_indptr, Y]) Y_tvm = tvm.nd.array(np.zeros(Y_np.shape, dtype=Y_np.dtype)) func(tvm.nd.array(X_np), diff --git a/topi/tests/python/test_topi_tensor.py b/topi/tests/python/test_topi_tensor.py index 0509842..68ea7ab 100644 --- a/topi/tests/python/test_topi_tensor.py +++ b/topi/tests/python/test_topi_tensor.py @@ -17,6 +17,7 @@ """Test code for tensor operator""" import numpy as np import tvm +from tvm import te import topi import topi.testing from tvm.contrib.pickle_memoize import memoize @@ -28,9 +29,9 @@ def verify_elemwise_sum(num_args, dtype): tvm_placeholders = [] for i in range(num_args): tvm_placeholders.append( - tvm.placeholder(shape, name="data"+str(i), dtype=dtype)) + te.placeholder(shape, name="data"+str(i), dtype=dtype)) esum = topi.elemwise_sum(tvm_placeholders) - s = tvm.create_schedule([esum.op]) + s = te.create_schedule([esum.op]) @memoize("topi.tests.test_topi_elemwise_sum") def get_ref_data(): @@ -57,11 +58,11 @@ def verify_elemwise_sum(num_args, dtype): def verify_full(shape, dtype, fill_value): - A = tvm.placeholder(shape, dtype=dtype, name="A") + A = te.placeholder(shape, dtype=dtype, name="A") B = topi.full_like(A, fill_value=fill_value) C = topi.full(shape=shape, dtype=dtype, fill_value=fill_value) - s1 = tvm.create_schedule([B.op]) - s2 = tvm.create_schedule([C.op]) + s1 = te.create_schedule([B.op]) + s2 = te.create_schedule([C.op]) @memoize("topi.tests.test_topi_full") def get_ref_data(): @@ -96,9 +97,9 @@ def verify_vectorization(n, m, dtype): return with tvm.target.create(device): ctx = tvm.context(device, 0) - A = tvm.placeholder((n, m), name='A', dtype=dtype) - B = tvm.compute((n, m), lambda i, j: - A[i, j] + tvm.const(1, A.dtype), name='B') + A = te.placeholder((n, m), name='A', dtype=dtype) + B = te.compute((n, m), lambda i, j: + A[i, j] + tvm.tir.const(1, A.dtype), name='B') S = topi.testing.get_elemwise_schedule(device)(B) fun = tvm.build(S, [A, B], device) diff --git a/topi/tests/python/test_topi_transform.py b/topi/tests/python/test_topi_transform.py index 880e86d..097c87d 100644 --- a/topi/tests/python/test_topi_transform.py +++ b/topi/tests/python/test_topi_transform.py @@ -17,6 +17,7 @@ """Test code for broadcasting operators.""" import numpy as np import tvm +from tvm import te import topi import topi.testing from tvm.contrib.nvcc import have_fp16 @@ -24,7 +25,7 @@ from tvm.contrib.nvcc import have_fp16 from common import get_all_backend def verify_expand_dims(in_shape, out_shape, axis, num_newaxis): - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") B = topi.expand_dims(A, axis, num_newaxis) def check_device(device): ctx = tvm.context(device, 0) @@ -47,7 +48,7 @@ def verify_expand_dims(in_shape, out_shape, axis, num_newaxis): def verify_reinterpret(in_shape, in_dtype, out_dtype, generator): - A = tvm.placeholder(shape=in_shape, name="A", dtype=in_dtype) + A = te.placeholder(shape=in_shape, name="A", dtype=in_dtype) B = topi.reinterpret(A, out_dtype) def check_device(device): ctx = tvm.context(device, 0) @@ -73,7 +74,7 @@ def verify_reinterpret(in_shape, in_dtype, out_dtype, generator): def verify_transpose(in_shape, axes): - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") B = topi.transpose(A, axes) def check_device(device): ctx = tvm.context(device, 0) @@ -96,7 +97,7 @@ def verify_transpose(in_shape, axes): def verify_reshape(src_shape, dst_shape): - A = tvm.placeholder(shape=src_shape, name="A") + A = te.placeholder(shape=src_shape, name="A") B = topi.reshape(A, dst_shape) def check_device(device): ctx = tvm.context(device, 0) @@ -119,7 +120,7 @@ def verify_reshape(src_shape, dst_shape): def verify_squeeze(src_shape, axis): - A = tvm.placeholder(shape=src_shape, name="A") + A = te.placeholder(shape=src_shape, name="A") B = topi.squeeze(A, axis=axis) def check_device(device): ctx = tvm.context(device, 0) @@ -158,7 +159,7 @@ def verify_concatenate(shapes, axis): tensor_l = [] for i, shape in enumerate(shapes): - tensor_l.append(tvm.placeholder(shape, name="A" + str(i))) + tensor_l.append(te.placeholder(shape, name="A" + str(i))) out_tensor = topi.concatenate(a_tuple=tensor_l, axis=axis) def check_device(device): ctx = tvm.context(device, 0) @@ -183,7 +184,7 @@ def verify_concatenate(shapes, axis): def verify_stack(shapes, axis): tensor_l = [] for i, shape in enumerate(shapes): - tensor_l.append(tvm.placeholder(shape, name="A" + str(i))) + tensor_l.append(te.placeholder(shape, name="A" + str(i))) out_tensor = topi.stack(tensor_l, axis) def check_device(device): ctx = tvm.context(device, 0) @@ -207,7 +208,7 @@ def verify_stack(shapes, axis): def verify_split(src_shape, indices_or_sections, axis): - A = tvm.placeholder(shape=src_shape, name="A") + A = te.placeholder(shape=src_shape, name="A") tensor_l = topi.split(A, indices_or_sections, axis=axis) def check_device(device): ctx = tvm.context(device, 0) @@ -232,10 +233,10 @@ def verify_split(src_shape, indices_or_sections, axis): def verify_expand_like(in_shape, out_shape, axis): - A = tvm.placeholder(shape=in_shape, name="A") - B = tvm.placeholder(shape=out_shape, name="B") + A = te.placeholder(shape=in_shape, name="A") + B = te.placeholder(shape=out_shape, name="B") C = topi.expand_like(A, B, axis) - s = tvm.create_schedule([C.op]) + s = te.create_schedule([C.op]) def check_device(device): if not tvm.runtime.enabled(device): @@ -266,7 +267,7 @@ def verify_expand_like(in_shape, out_shape, axis): check_device(device) def verify_flip(in_shape, axis): - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") B = topi.flip(A, axis) + 1 def check_device(device): ctx = tvm.context(device, 0) @@ -292,8 +293,8 @@ def verify_take(src_shape, indices_src, axis=None, mode="clip"): src_dtype = "float32" indices_dtype = "int32" indices_src = np.array(indices_src, dtype=indices_dtype) - A = tvm.placeholder(shape=src_shape, dtype=src_dtype, name="A") - indices = tvm.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices") + A = te.placeholder(shape=src_shape, dtype=src_dtype, name="A") + indices = te.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices") if axis is None: out_tensor = topi.take(a=A, indices=indices, mode=mode) else: @@ -330,7 +331,7 @@ def verify_take(src_shape, indices_src, axis=None, mode="clip"): check_device(device) def verify_strided_slice(in_shape, begin, end, strides=None): - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") strides = [1,1,1] if strides is None else strides B = topi.strided_slice(A, begin, end, strides) + 1 @@ -356,12 +357,12 @@ def verify_strided_slice(in_shape, begin, end, strides=None): check_device(device) def verify_strided_set(in_shape, v_shape, begin, end, strides=None): - A = tvm.placeholder(shape=in_shape, name="A") - V = tvm.placeholder(shape=v_shape, name="V") - b = tvm.placeholder(shape=(len(begin),), name="b", dtype='int32') - e = tvm.placeholder(shape=(len(end),), name="e", dtype='int32') + A = te.placeholder(shape=in_shape, name="A") + V = te.placeholder(shape=v_shape, name="V") + b = te.placeholder(shape=(len(begin),), name="b", dtype='int32') + e = te.placeholder(shape=(len(end),), name="e", dtype='int32') if strides is not None: - st = tvm.placeholder(shape=(len(strides),), name="st", dtype='int32') + st = te.placeholder(shape=(len(strides),), name="st", dtype='int32') B = topi.strided_set(A, V, b, e, st) + 1 else: B = topi.strided_set(A, V, b, e) + 1 @@ -404,8 +405,8 @@ def verify_strided_set(in_shape, v_shape, begin, end, strides=None): def verify_gather_nd(src_shape, indices_src, indices_dtype): src_dtype = "float32" indices_src = np.array(indices_src, dtype=indices_dtype) - A = tvm.placeholder(shape=src_shape, dtype=src_dtype, name="A") - indices = tvm.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices") + A = te.placeholder(shape=src_shape, dtype=src_dtype, name="A") + indices = te.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices") out_tensor = topi.gather_nd(a=A, indices=indices) def check_device(device): @@ -464,7 +465,7 @@ def verify_arange(start, stop, step): check_device(device) def verify_repeat(in_shape, repeats, axis): - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") B = topi.repeat(A, repeats, axis) def check_device(device): ctx = tvm.context(device, 0) @@ -486,7 +487,7 @@ def verify_repeat(in_shape, repeats, axis): check_device(device) def verify_tile(in_shape, reps): - A = tvm.placeholder(shape=in_shape, name="A") + A = te.placeholder(shape=in_shape, name="A") B = topi.tile(A, reps) def check_device(device): ctx = tvm.context(device, 0) @@ -508,10 +509,10 @@ def verify_tile(in_shape, reps): check_device(device) def verify_where(in_shape): - Cond = tvm.placeholder(shape=in_shape, name="cond") + Cond = te.placeholder(shape=in_shape, name="cond") dtype = Cond.dtype - A = tvm.placeholder(shape=in_shape, name="A") - B = tvm.placeholder(shape=in_shape, name="B") + A = te.placeholder(shape=in_shape, name="A") + B = te.placeholder(shape=in_shape, name="B") C = topi.where(Cond, A, B) def check_device(device): ctx = tvm.context(device, 0) @@ -537,9 +538,9 @@ def verify_where(in_shape): check_device(device) def verify_one_hot(indices_shape, depth, on_value, off_value, axis, dtype): - indices = tvm.placeholder(shape=indices_shape, name="indices", dtype="int32") - on_value_const = tvm.const(on_value, dtype) - off_value_const = tvm.const(off_value, dtype) + indices = te.placeholder(shape=indices_shape, name="indices", dtype="int32") + on_value_const = tvm.tir.const(on_value, dtype) + off_value_const = tvm.tir.const(off_value, dtype) one_hot_result = topi.transform.one_hot(indices, on_value_const, off_value_const, depth, axis, dtype) def check_device(device): ctx = tvm.context(device, 0) @@ -624,9 +625,9 @@ def test_squeeze(): verify_squeeze((1, 1, 1, 1), None) # a special case to trigger inline let expression - A = tvm.placeholder((2,), 'float32', 'A') + A = te.placeholder((2,), 'float32', 'A') E = topi.squeeze(A) - C = tvm.compute((1,), lambda i: E[(2 * A[0] - 1).astype('int32')]) + C = te.compute((1,), lambda i: E[(2 * A[0] - 1).astype('int32')]) for device in ['cuda', 'opencl']: ctx = tvm.context(device, 0) if ctx.exist: @@ -737,7 +738,7 @@ def test_tile(): def test_layout_transform(): in_shape = (1, 32, 8, 8) - A = tvm.placeholder(shape=in_shape, dtype="float32", name="A") + A = te.placeholder(shape=in_shape, dtype="float32", name="A") B = topi.layout_transform(A, "NCHW", "NCHW16c") input = np.random.uniform(size=in_shape).astype(A.dtype) @@ -766,7 +767,7 @@ def test_layout_transform(): def test_shape(): in_shape = (8, 7, 13) dtype = "int32" - A = tvm.placeholder(shape=in_shape, dtype="float32", name="A") + A = te.placeholder(shape=in_shape, dtype="float32", name="A") B = topi.shape(A, dtype) input = np.random.uniform(size=in_shape).astype(A.dtype) @@ -796,8 +797,8 @@ def test_sequence_mask(): for mask_value in [0.0, 1.0]: max_length = in_shape[axis] batch_size = in_shape[1 - axis] - A = tvm.placeholder(shape=in_shape, dtype="float32", name="A") - B = tvm.placeholder(shape=(batch_size,), dtype="int32", name="B") + A = te.placeholder(shape=in_shape, dtype="float32", name="A") + B = te.placeholder(shape=(batch_size,), dtype="int32", name="B") C = topi.sequence_mask(A, B, axis=axis, mask_value=mask_value) A_data = np.random.normal(0, 1, in_shape).astype(np.float32) B_data = np.random.randint(1, max_length, (batch_size,)).astype(np.int32) @@ -823,7 +824,7 @@ def test_sequence_mask(): def test_ndarray_size(): in_shape = (5, 11, 7) dtype = "int32" - A = tvm.placeholder(shape=in_shape, dtype="float32", name="A") + A = te.placeholder(shape=in_shape, dtype="float32", name="A") B = topi.ndarray_size(A, dtype) input = np.random.uniform(size=in_shape).astype(A.dtype) @@ -857,13 +858,13 @@ def test_where_fusion(): return print("Running on target: %s" % device) conv2d_compute, conv2d_schedule = topi.testing.get_conv2d_nchw_implement(device) - data = tvm.placeholder((2, 1, 2, 4), 'int8', 'data') - w = tvm.placeholder((3, 1, 2, 2), 'int8', 'w') + data = te.placeholder((2, 1, 2, 4), 'int8', 'data') + w = te.placeholder((3, 1, 2, 2), 'int8', 'w') conv1 = conv2d_compute(data, w, 1, 0, 1, 'int32') - zeros = topi.full((2, 3, 1, 3), 'int32', tvm.const(0, dtype='int32')) + zeros = topi.full((2, 3, 1, 3), 'int32', tvm.tir.const(0, dtype='int32')) gt = topi.greater_equal(conv1, zeros) - one = topi.full((2, 3, 1, 3), 'int32', tvm.const(1, dtype='int32')) - two = topi.full((2, 3, 1, 3), 'int32', tvm.const(2, dtype='int32')) + one = topi.full((2, 3, 1, 3), 'int32', tvm.tir.const(1, dtype='int32')) + two = topi.full((2, 3, 1, 3), 'int32', tvm.tir.const(2, dtype='int32')) where = topi.where(gt, one, two) add = topi.add(conv1, where) outs = [add] diff --git a/topi/tests/python/test_topi_upsampling.py b/topi/tests/python/test_topi_upsampling.py index 0037487..874471b 100644 --- a/topi/tests/python/test_topi_upsampling.py +++ b/topi/tests/python/test_topi_upsampling.py @@ -17,6 +17,7 @@ """Test code for upsampling""" import numpy as np import tvm +from tvm import te import topi import topi.testing import math @@ -28,12 +29,12 @@ def verify_upsampling(batch, in_channel, in_height, in_width, scale_h, scale_w, layout='NCHW', method="nearest_neighbor", in_batch_block = 0, in_channel_block = 0): if layout == 'NCHW': - A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') + A = te.placeholder((batch, in_channel, in_height, in_width), name='A') dtype = A.dtype out_shape = (batch, in_channel, int(round(in_height*scale_h)), int(round(in_width*scale_w))) a_np = np.random.uniform(size=(batch, in_channel, in_height, in_width)).astype(dtype) elif nchw_pack_layout(layout): - A = tvm.placeholder((batch, in_channel, in_height, in_width, in_batch_block, in_channel_block), + A = te.placeholder((batch, in_channel, in_height, in_width, in_batch_block, in_channel_block), name='A') dtype = A.dtype out_shape = (batch, in_channel, int(round(in_height*scale_h)), int(round(in_width*scale_w)), @@ -41,7 +42,7 @@ def verify_upsampling(batch, in_channel, in_height, in_width, scale_h, scale_w, a_np = np.random.uniform(size=(batch, in_channel, in_height, in_width, in_batch_block, in_channel_block)).astype(dtype) elif layout == 'NHWC': - A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A') + A = te.placeholder((batch, in_height, in_width, in_channel), name='A') dtype = A.dtype out_shape = (batch, int(round(in_height*scale_h)), int(round(in_width*scale_w)), in_channel) a_np = np.random.uniform(size=(batch, in_height, in_width, in_channel)).astype(dtype) @@ -115,13 +116,13 @@ def test_upsampling(): def verify_upsampling3d(batch, in_channel, in_depth, in_height, in_width, scale_d, scale_h, scale_w, layout='NCDHW', method="nearest_neighbor"): if layout == 'NCDHW': - A = tvm.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A') + A = te.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A') dtype = A.dtype out_shape = (batch, in_channel, int(round(in_depth*scale_d)), int(round(in_height*scale_h)), int(round(in_width*scale_w))) a_np = np.random.uniform(size=(batch, in_channel, in_depth, in_height, in_width)).astype(dtype) elif layout == 'NDHWC': - A = tvm.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A') + A = te.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A') dtype = A.dtype out_shape = (batch, int(round(in_depth*scale_d)), int(round(in_height*scale_h)), int(round(in_width*scale_w)), in_channel) diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index 7d27b82..0aa410d 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -19,6 +19,7 @@ from __future__ import print_function import math import numpy as np import tvm +from tvm import te import topi import topi.testing @@ -90,7 +91,7 @@ def verify_get_valid_counts(dshape, score_threshold, id_index, score_index): print("Running on target: %s" % device) with tvm.target.create(device): fcompute, fschedule = topi.testing.dispatch(device, _get_valid_counts_implement) - data = tvm.placeholder(dshape, name="data", dtype=dtype) + data = te.placeholder(dshape, name="data", dtype=dtype) outs = fcompute(data, score_threshold, id_index, score_index) s = fschedule(outs) @@ -121,8 +122,8 @@ def verify_non_max_suppression(np_data, np_valid_count, np_result, np_indices_re dshape = np_data.shape batch, num_anchors, _ = dshape indices_dshape = (batch, num_anchors) - data = tvm.placeholder(dshape, name="data") - valid_count = tvm.placeholder((batch,), dtype="int32", name="valid_count") + data = te.placeholder(dshape, name="data") + valid_count = te.placeholder((batch,), dtype="int32", name="valid_count") def check_device(device): ctx = tvm.context(device, 0) @@ -182,7 +183,7 @@ def test_non_max_suppression(): def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5, 0.5), clip=False): - data = tvm.placeholder(dshape, name="data") + data = te.placeholder(dshape, name="data") dtype = data.dtype input_data = np.random.uniform(size=dshape).astype(dtype) @@ -223,7 +224,7 @@ def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1), offse print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) - + fcompute, fschedule = topi.testing.dispatch(device, _multibox_prior_implement) with tvm.target.create(device): out = fcompute(data, sizes, ratios, steps, offsets, clip) @@ -249,9 +250,9 @@ def test_multibox_detection(): batch_size = 1 num_anchors = 3 num_classes = 3 - cls_prob = tvm.placeholder((batch_size, num_anchors, num_classes), name="cls_prob") - loc_preds = tvm.placeholder((batch_size, num_anchors * 4), name="loc_preds") - anchors = tvm.placeholder((1, num_anchors, 4), name="anchors") + cls_prob = te.placeholder((batch_size, num_anchors, num_classes), name="cls_prob") + loc_preds = te.placeholder((batch_size, num_anchors * 4), name="loc_preds") + anchors = te.placeholder((1, num_anchors, 4), name="anchors") # Manually create test case np_cls_prob = np.array([[[0.2, 0.5, 0.3], [0.25, 0.3, 0.45], [0.7, 0.1, 0.2]]]) @@ -290,8 +291,8 @@ def verify_roi_align(batch, in_channel, in_size, num_roi, pooled_size, spatial_s a_shape = (batch, in_channel, in_size, in_size) rois_shape = (num_roi, 5) - a = tvm.placeholder(a_shape) - rois = tvm.placeholder(rois_shape) + a = te.placeholder(a_shape) + rois = te.placeholder(rois_shape) @memoize("topi.tests.test_topi_vision.verify_roi_align") def get_ref_data(): @@ -342,8 +343,8 @@ def verify_roi_pool(batch, in_channel, in_size, num_roi, pooled_size, spatial_sc a_shape = (batch, in_channel, in_size, in_size) rois_shape = (num_roi, 5) - a = tvm.placeholder(a_shape) - rois = tvm.placeholder(rois_shape) + a = te.placeholder(a_shape) + rois = te.placeholder(rois_shape) @memoize("topi.tests.test_topi_vision.verify_roi_pool") def get_ref_data(): @@ -387,9 +388,9 @@ def test_roi_pool(): def verify_proposal(np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs): - cls_prob = tvm.placeholder(np_cls_prob.shape) - bbox_pred = tvm.placeholder(np_bbox_pred.shape) - im_info = tvm.placeholder(np_im_info.shape) + cls_prob = te.placeholder(np_cls_prob.shape) + bbox_pred = te.placeholder(np_bbox_pred.shape) + im_info = te.placeholder(np_im_info.shape) def check_device(device): ctx = tvm.context(device, 0) diff --git a/tutorials/autotvm/tune_conv2d_cuda.py b/tutorials/autotvm/tune_conv2d_cuda.py index 0e26dcb..260cf5a 100644 --- a/tutorials/autotvm/tune_conv2d_cuda.py +++ b/tutorials/autotvm/tune_conv2d_cuda.py @@ -49,6 +49,7 @@ import sys import numpy as np import tvm +from tvm import te import topi from topi.testing import conv2d_nchw_python @@ -82,10 +83,10 @@ from tvm import autotvm def conv2d_no_batching(N, H, W, CO, CI, KH, KW, stride, padding): assert N == 1, "Only consider batch_size = 1 in this template" - data = tvm.placeholder((N, CI, H, W), name='data') - kernel = tvm.placeholder((CO, CI, KH, KW), name='kernel') + data = te.placeholder((N, CI, H, W), name='data') + kernel = te.placeholder((CO, CI, KH, KW), name='kernel') conv = topi.nn.conv2d_nchw(data, kernel, stride, padding, dilation=1, out_dtype='float32') - s = tvm.create_schedule([conv.op]) + s = te.create_schedule([conv.op]) ##### space definition begin ##### n, f, y, x = s[conv].op.axis @@ -123,15 +124,15 @@ def conv2d_no_batching(N, H, W, CO, CI, KH, KW, stride, padding): bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x) kernel_scope = n # this is the scope to attach global config inside this kernel - s[output].bind(bf, tvm.thread_axis("blockIdx.z")) - s[output].bind(by, tvm.thread_axis("blockIdx.y")) - s[output].bind(bx, tvm.thread_axis("blockIdx.x")) - s[output].bind(vf, tvm.thread_axis("vthread")) - s[output].bind(vy, tvm.thread_axis("vthread")) - s[output].bind(vx, tvm.thread_axis("vthread")) - s[output].bind(tf, tvm.thread_axis("threadIdx.z")) - s[output].bind(ty, tvm.thread_axis("threadIdx.y")) - s[output].bind(tx, tvm.thread_axis("threadIdx.x")) + s[output].bind(bf, te.thread_axis("blockIdx.z")) + s[output].bind(by, te.thread_axis("blockIdx.y")) + s[output].bind(bx, te.thread_axis("blockIdx.x")) + s[output].bind(vf, te.thread_axis("vthread")) + s[output].bind(vy, te.thread_axis("vthread")) + s[output].bind(vx, te.thread_axis("vthread")) + s[output].bind(tf, te.thread_axis("threadIdx.z")) + s[output].bind(ty, te.thread_axis("threadIdx.y")) + s[output].bind(tx, te.thread_axis("threadIdx.x")) s[output].reorder(n, bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi) s[OL].compute_at(s[output], tx) @@ -155,9 +156,9 @@ def conv2d_no_batching(N, H, W, CO, CI, KH, KW, stride, padding): tz, fused = s[load].split(fused, nparts=cfg["tile_f"].size[2]) ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[2]) tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[2]) - s[load].bind(tz, tvm.thread_axis("threadIdx.z")) - s[load].bind(ty, tvm.thread_axis("threadIdx.y")) - s[load].bind(tx, tvm.thread_axis("threadIdx.x")) + s[load].bind(tz, te.thread_axis("threadIdx.z")) + s[load].bind(ty, te.thread_axis("threadIdx.y")) + s[load].bind(tx, te.thread_axis("threadIdx.x")) # tune unroll s[output].pragma(kernel_scope, 'auto_unroll_max_step', cfg['auto_unroll_max_step'].val) diff --git a/tutorials/autotvm/tune_relay_arm.py b/tutorials/autotvm/tune_relay_arm.py index ea24b16..ffd3e8b 100644 --- a/tutorials/autotvm/tune_relay_arm.py +++ b/tutorials/autotvm/tune_relay_arm.py @@ -62,6 +62,7 @@ import os import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm import relay import tvm.relay.testing diff --git a/tutorials/autotvm/tune_relay_cuda.py b/tutorials/autotvm/tune_relay_cuda.py index 58c8751..4195075 100644 --- a/tutorials/autotvm/tune_relay_cuda.py +++ b/tutorials/autotvm/tune_relay_cuda.py @@ -60,6 +60,7 @@ import os import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm import relay import tvm.relay.testing diff --git a/tutorials/autotvm/tune_relay_mobile_gpu.py b/tutorials/autotvm/tune_relay_mobile_gpu.py index 5425f1b..ad74608 100644 --- a/tutorials/autotvm/tune_relay_mobile_gpu.py +++ b/tutorials/autotvm/tune_relay_mobile_gpu.py @@ -61,6 +61,7 @@ import os import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm import relay import tvm.relay.testing diff --git a/tutorials/autotvm/tune_relay_x86.py b/tutorials/autotvm/tune_relay_x86.py index f44773e..15ce2de 100644 --- a/tutorials/autotvm/tune_relay_x86.py +++ b/tutorials/autotvm/tune_relay_x86.py @@ -28,6 +28,7 @@ import os import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm import relay from tvm.relay import testing diff --git a/tutorials/autotvm/tune_simple_template.py b/tutorials/autotvm/tune_simple_template.py index 8efeed4..dd3b9dc 100644 --- a/tutorials/autotvm/tune_simple_template.py +++ b/tutorials/autotvm/tune_simple_template.py @@ -55,6 +55,7 @@ import sys import numpy as np import tvm +from tvm import te # the module is called `autotvm` from tvm import autotvm @@ -70,12 +71,12 @@ from tvm import autotvm # Matmul V0: Constant tiling factor def matmul_v0(N, L, M, dtype): - A = tvm.placeholder((N, L), name='A', dtype=dtype) - B = tvm.placeholder((L, M), name='B', dtype=dtype) + A = te.placeholder((N, L), name='A', dtype=dtype) + B = te.placeholder((L, M), name='B', dtype=dtype) - k = tvm.reduce_axis((0, L), name='k') - C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C') - s = tvm.create_schedule(C.op) + k = te.reduce_axis((0, L), name='k') + C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C') + s = te.create_schedule(C.op) # schedule y, x = s[C].op.axis @@ -104,12 +105,12 @@ def matmul_v0(N, L, M, dtype): # Matmul V1: List candidate values @autotvm.register_customized_task("tutorial/matmul_v1") # 1. use a decorator def matmul_v1(N, L, M, dtype): - A = tvm.placeholder((N, L), name='A', dtype=dtype) - B = tvm.placeholder((L, M), name='B', dtype=dtype) + A = te.placeholder((N, L), name='A', dtype=dtype) + B = te.placeholder((L, M), name='B', dtype=dtype) - k = tvm.reduce_axis((0, L), name='k') - C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C') - s = tvm.create_schedule(C.op) + k = te.reduce_axis((0, L), name='k') + C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C') + s = te.create_schedule(C.op) # schedule y, x = s[C].op.axis @@ -184,12 +185,12 @@ def matmul_v1(N, L, M, dtype): @autotvm.register_customized_task("tutorial/matmul") def matmul(N, L, M, dtype): - A = tvm.placeholder((N, L), name='A', dtype=dtype) - B = tvm.placeholder((L, M), name='B', dtype=dtype) + A = te.placeholder((N, L), name='A', dtype=dtype) + B = te.placeholder((L, M), name='B', dtype=dtype) - k = tvm.reduce_axis((0, L), name='k') - C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C') - s = tvm.create_schedule(C.op) + k = te.reduce_axis((0, L), name='k') + C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C') + s = te.create_schedule(C.op) # schedule y, x = s[C].op.axis diff --git a/tutorials/cross_compilation_and_rpc.py b/tutorials/cross_compilation_and_rpc.py index 2ffcb11..553d77d 100644 --- a/tutorials/cross_compilation_and_rpc.py +++ b/tutorials/cross_compilation_and_rpc.py @@ -96,13 +96,14 @@ and the Firefly-RK3399 for an OpenCL example. import numpy as np import tvm +from tvm import te from tvm import rpc from tvm.contrib import util -n = tvm.convert(1024) -A = tvm.placeholder((n,), name='A') -B = tvm.compute((n,), lambda i: A[i] + 1.0, name='B') -s = tvm.create_schedule(B.op) +n = tvm.runtime.convert(1024) +A = te.placeholder((n,), name='A') +B = te.compute((n,), lambda i: A[i] + 1.0, name='B') +s = te.create_schedule(B.op) ###################################################################### # Then we cross compile the kernel. @@ -228,10 +229,10 @@ def run_opencl(): opencl_device_port = 9090 # create schedule for the above "add one" compute declaration - s = tvm.create_schedule(B.op) + s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=32) - s[B].bind(xo, tvm.thread_axis("blockIdx.x")) - s[B].bind(xi, tvm.thread_axis("threadIdx.x")) + s[B].bind(xo, te.thread_axis("blockIdx.x")) + s[B].bind(xi, te.thread_axis("threadIdx.x")) func = tvm.build(s, [A, B], "opencl", target_host=target_host) remote = rpc.connect(opencl_device_host, opencl_device_port) diff --git a/tutorials/dev/low_level_custom_pass.py b/tutorials/dev/low_level_custom_pass.py index 97c4a1f..298b24f 100644 --- a/tutorials/dev/low_level_custom_pass.py +++ b/tutorials/dev/low_level_custom_pass.py @@ -43,6 +43,7 @@ Before reading this tutorial, we assume readers have already known these topics from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np ###################################################################### @@ -50,12 +51,12 @@ import numpy as np # our customized lowering pass to manipulate the IR directly instead of using schedule primitives. # -n = tvm.const(128, "int32") -a = tvm.placeholder((n, ), name="a") -b = tvm.placeholder((n, ), name="b") -c = tvm.compute((n, ), lambda i: a[i] + b[i], name='c') +n = tvm.tir.const(128, "int32") +a = te.placeholder((n, ), name="a") +b = te.placeholder((n, ), name="b") +c = te.compute((n, ), lambda i: a[i] + b[i], name='c') -sch = tvm.create_schedule(c.op) +sch = te.create_schedule(c.op) ir = tvm.lower(sch, [a, b, c], simple_mode=True) print(ir) @@ -71,7 +72,7 @@ print(ir) # # IR Visitor # ~~~~~~~~~~ -# We can use ``tvm.ir_pass.PostOrderVisit(stmt, func)`` to gather information from the Halide IR. +# We can use ``tvm.tir.ir_pass.PostOrderVisit(stmt, func)`` to gather information from the Halide IR. # ``func`` is a function callback. This function will be called before exiting the current IR node, # i.e. post-order visit. Then we leverage side effects to store the result of IR visit, because the # return value of ``func`` will be ignored. @@ -111,8 +112,8 @@ def vectorize8(op): if op in loops: extent = op.extent.value name = op.loop_var.name - lo, li = tvm.var(name + '.outer'), tvm.var(name + '.inner') - body = tvm.ir_pass.Substitute(op.body, {op.loop_var: lo * 8 + li}) + lo, li = te.var(name + '.outer'), te.var(name + '.inner') + body = tvm.tir.ir_pass.Substitute(op.body, {op.loop_var: lo * 8 + li}) body = tvm.tir.For(li, 0, 8, tvm.tir.For.Vectorized, 0, body) body = tvm.tir.For(lo, 0, extent // 8, tvm.tir.For.Serial, 0, body) return body @@ -121,14 +122,14 @@ def vectorize8(op): def vectorize(stmt): global loops - tvm.ir_pass.PostOrderVisit(stmt, find_width8) + tvm.tir.ir_pass.PostOrderVisit(stmt, find_width8) if not loops: return stmt # The last list arugment indicates what kinds of nodes will be transformed. # Thus, in this case only `For` nodes will call `vectorize8` - stmt = tvm.ir_pass.IRTransform(stmt, None, vectorize8, ['For']) + stmt = tvm.tir.ir_pass.IRTransform(stmt, None, vectorize8, ['For']) return stmt @@ -158,15 +159,15 @@ print(vectorize(ir)) # Thus, a good place to put this transformation pass is just after Phase 1. # -with tvm.build_config(add_lower_pass=[(1, vectorize)]) as cfg: +with tvm.target.build_config(add_lower_pass=[(1, vectorize)]) as cfg: print(tvm.lower(sch, [a, b, c], simple_mode=True)) ##################################################################### # Quick View # ---------- # This tutorial gives a quick view of writing a customized IR transformation pass: -# - Use ``tvm.ir_pass.PostOrderVisit`` to gather information on each IR nodes. -# - Use ``tvm.ir_pass.IRTransform`` to transform IR nodes. +# - Use ``tvm.tir.ir_pass.PostOrderVisit`` to gather information on each IR nodes. +# - Use ``tvm.tir.ir_pass.IRTransform`` to transform IR nodes. # - Wrap up two above to write an IR-transformation function. -# - Use ``tvm.build_config`` to put this function to TVM lowering pass +# - Use ``tvm.target.build_config`` to put this function to TVM lowering pass # diff --git a/tutorials/dev/relay_pass_infra.py b/tutorials/dev/relay_pass_infra.py index 494593e..7f818cf 100644 --- a/tutorials/dev/relay_pass_infra.py +++ b/tutorials/dev/relay_pass_infra.py @@ -49,6 +49,7 @@ a certain optimization and create an optimization pipeline. import numpy as np import tvm +from tvm import te import tvm.relay as relay ############################################################################### diff --git a/tutorials/frontend/build_gcn.py b/tutorials/frontend/build_gcn.py index d385dc9..e0d0aa0 100644 --- a/tutorials/frontend/build_gcn.py +++ b/tutorials/frontend/build_gcn.py @@ -186,6 +186,7 @@ print("Test accuracy of DGL results: {:.2%}".format(acc)) from tvm import relay from tvm.contrib import graph_runtime import tvm +from tvm import te def GraphConv(layer_name, input_dim, diff --git a/tutorials/frontend/deploy_model_on_android.py b/tutorials/frontend/deploy_model_on_android.py index 3d0e83d..f516004 100644 --- a/tutorials/frontend/deploy_model_on_android.py +++ b/tutorials/frontend/deploy_model_on_android.py @@ -31,6 +31,7 @@ from PIL import Image import keras from keras.applications.mobilenet_v2 import MobileNetV2 import tvm +from tvm import te import tvm.relay as relay from tvm import rpc from tvm.contrib import util, ndk, graph_runtime as runtime diff --git a/tutorials/frontend/deploy_model_on_rasp.py b/tutorials/frontend/deploy_model_on_rasp.py index e78c736..ef707fe 100644 --- a/tutorials/frontend/deploy_model_on_rasp.py +++ b/tutorials/frontend/deploy_model_on_rasp.py @@ -27,6 +27,7 @@ it on Raspberry Pi. """ import tvm +from tvm import te import tvm.relay as relay from tvm import rpc from tvm.contrib import util, graph_runtime as runtime diff --git a/tutorials/frontend/deploy_quantized.py b/tutorials/frontend/deploy_quantized.py index 0e09ba9..5af9fc9 100644 --- a/tutorials/frontend/deploy_quantized.py +++ b/tutorials/frontend/deploy_quantized.py @@ -28,6 +28,7 @@ Relay, quantize the Relay model and then perform the inference. """ import tvm +from tvm import te from tvm import relay import mxnet as mx from tvm.contrib.download import download_testdata diff --git a/tutorials/frontend/deploy_ssd_gluoncv.py b/tutorials/frontend/deploy_ssd_gluoncv.py index 78bb0ca..6126df0 100644 --- a/tutorials/frontend/deploy_ssd_gluoncv.py +++ b/tutorials/frontend/deploy_ssd_gluoncv.py @@ -24,6 +24,7 @@ This article is an introductory tutorial to deploy SSD models with TVM. We will use GluonCV pre-trained SSD model and convert it to Relay IR """ import tvm +from tvm import te from matplotlib import pyplot as plt from tvm.relay.testing.config import ctx_list @@ -47,7 +48,7 @@ from gluoncv import model_zoo, data, utils # # To get best inference performance on Intel graphics, # change target argument to :code:`opencl -device=intel_graphics`. -# But when using Intel graphics on Mac, target needs to +# But when using Intel graphics on Mac, target needs to # be set to `opencl` only for the reason that Intel subgroup # extension is not supported on Mac. # diff --git a/tutorials/frontend/from_caffe2.py b/tutorials/frontend/from_caffe2.py index aadee02..8fad80d 100644 --- a/tutorials/frontend/from_caffe2.py +++ b/tutorials/frontend/from_caffe2.py @@ -96,6 +96,7 @@ with relay.build_config(opt_level=3): # --------------- # The process is no different from other examples. import tvm +from tvm import te from tvm.contrib import graph_runtime # context x86 CPU, use tvm.gpu(0) if you run on GPU ctx = tvm.cpu(0) diff --git a/tutorials/frontend/from_coreml.py b/tutorials/frontend/from_coreml.py index 2f70353..2a0c8db 100644 --- a/tutorials/frontend/from_coreml.py +++ b/tutorials/frontend/from_coreml.py @@ -35,6 +35,7 @@ or please refer to official site https://github.com/apple/coremltools """ import tvm +from tvm import te import tvm.relay as relay from tvm.contrib.download import download_testdata import coremltools as cm diff --git a/tutorials/frontend/from_darknet.py b/tutorials/frontend/from_darknet.py index e90c8bb..e2c1ea5 100644 --- a/tutorials/frontend/from_darknet.py +++ b/tutorials/frontend/from_darknet.py @@ -38,6 +38,7 @@ import sys # tvm, relay import tvm +from tvm import te from tvm import relay from ctypes import * from tvm.contrib.download import download_testdata diff --git a/tutorials/frontend/from_keras.py b/tutorials/frontend/from_keras.py index c1f3471..928a8ac 100644 --- a/tutorials/frontend/from_keras.py +++ b/tutorials/frontend/from_keras.py @@ -35,6 +35,7 @@ or please refer to official site https://keras.io/#installation """ import tvm +from tvm import te import tvm.relay as relay from tvm.contrib.download import download_testdata import keras diff --git a/tutorials/frontend/from_mxnet.py b/tutorials/frontend/from_mxnet.py index d0e4c4a..bf53db5 100644 --- a/tutorials/frontend/from_mxnet.py +++ b/tutorials/frontend/from_mxnet.py @@ -38,6 +38,7 @@ https://mxnet.incubator.apache.org/versions/master/install/index.html # some standard imports import mxnet as mx import tvm +from tvm import te import tvm.relay as relay import numpy as np diff --git a/tutorials/frontend/from_onnx.py b/tutorials/frontend/from_onnx.py index 7a61593..766451c 100644 --- a/tutorials/frontend/from_onnx.py +++ b/tutorials/frontend/from_onnx.py @@ -35,6 +35,7 @@ https://github.com/onnx/onnx import onnx import numpy as np import tvm +from tvm import te import tvm.relay as relay from tvm.contrib.download import download_testdata diff --git a/tutorials/frontend/from_tensorflow.py b/tutorials/frontend/from_tensorflow.py index 55eb3d0..0ebd733 100644 --- a/tutorials/frontend/from_tensorflow.py +++ b/tutorials/frontend/from_tensorflow.py @@ -26,6 +26,7 @@ Please refer to https://www.tensorflow.org/install # tvm, relay import tvm +from tvm import te from tvm import relay # os and numpy diff --git a/tutorials/frontend/from_tflite.py b/tutorials/frontend/from_tflite.py index e93a71c..50fd69f 100644 --- a/tutorials/frontend/from_tflite.py +++ b/tutorials/frontend/from_tflite.py @@ -151,6 +151,7 @@ with relay.build_config(opt_level=3): # Execute on TVM # -------------- import tvm +from tvm import te from tvm.contrib import graph_runtime as runtime # Create a runtime executor module diff --git a/tutorials/frontend/using_external_lib.py b/tutorials/frontend/using_external_lib.py index 71aceda..7063c0e 100644 --- a/tutorials/frontend/using_external_lib.py +++ b/tutorials/frontend/using_external_lib.py @@ -32,6 +32,7 @@ For example, to use cuDNN, USE_CUDNN option in `cmake/config.cmake` needs to be To begin with, we import Relay and TVM. """ import tvm +from tvm import te import numpy as np from tvm.contrib import graph_runtime as runtime from tvm import relay diff --git a/tutorials/language/extern_op.py b/tutorials/language/extern_op.py index 2ad3e30..64e9880 100644 --- a/tutorials/language/extern_op.py +++ b/tutorials/language/extern_op.py @@ -32,13 +32,14 @@ or pointer to DLTensor as argument. from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np from tvm.contrib import cblas ###################################################################### # Use Extern Tensor Function # -------------------------- -# In the example below, we use :any:`tvm.extern` to add an extern +# In the example below, we use :any:`te.extern` to add an extern # array function call. In the extern call, we declare the shape # of output tensors. In the second argument we provide the list of inputs. # @@ -53,15 +54,15 @@ from tvm.contrib import cblas n = 1024 l = 128 m = 235 -bias = tvm.var('bias', dtype=tvm.float32) -A = tvm.placeholder((n, l), name='A') -B = tvm.placeholder((l, m), name='B') -C = tvm.extern((n, m), [A, B], - lambda ins, outs: tvm.call_packed( +bias = te.var('bias', dtype="float32") +A = te.placeholder((n, l), name='A') +B = te.placeholder((l, m), name='B') +C = te.extern((n, m), [A, B], + lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.cblas.matmul", ins[0], ins[1], outs[0], False, False), name="C") -D = tvm.compute(C.shape, lambda i, j: C[i,j] + bias, name="D") -s = tvm.create_schedule(D.op) +D = te.compute(C.shape, lambda i, j: C[i,j] + bias, name="D") +s = te.create_schedule(D.op) ###################################################################### # Verify the Result @@ -86,8 +87,8 @@ tvm.testing.assert_allclose( # from tvm.contrib import cblas C = cblas.matmul(A, B) -D = tvm.compute(C.shape, lambda i, j: C[i,j] + bias, name="D") -s = tvm.create_schedule(D.op) +D = te.compute(C.shape, lambda i, j: C[i,j] + bias, name="D") +s = te.create_schedule(D.op) ###################################################################### # Hook Python Function as Extern @@ -106,10 +107,10 @@ def my_tvm_addone(x, y): print("my_tvm_addone signatures: %s, %s" % (type(x), type(y))) tvm.nd.array(x.asnumpy() + 1).copyto(y) -A = tvm.placeholder((n,), name='A') -B = tvm.extern(A.shape, [A], lambda ins, outs: tvm.call_packed( +A = te.placeholder((n,), name='A') +B = te.extern(A.shape, [A], lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.my_tvm_addone", ins[0], outs[0]), name="C") -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) f = tvm.build(s, [A, B], "llvm") a = tvm.nd.array(np.random.uniform(size=(n,)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(n,)).astype(B.dtype), ctx) @@ -119,7 +120,7 @@ tvm.testing.assert_allclose(b.asnumpy(), a.asnumpy() + 1, rtol=1e-5) ###################################################################### # Summary # ------- -# - TVM calls extern tensor function via :any:`tvm.extern` +# - TVM calls extern tensor function via :any:`te.extern` # - Use contrib wrappers for short sugars of extern tensor calls. # - We can hook front-end function as extern tensor callbacks. # diff --git a/tutorials/language/intrin_math.py b/tutorials/language/intrin_math.py index 59bf79d..eebab3f 100644 --- a/tutorials/language/intrin_math.py +++ b/tutorials/language/intrin_math.py @@ -31,6 +31,7 @@ the interface via tvm's intrinsic API. from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np ###################################################################### @@ -38,19 +39,19 @@ import numpy as np # ------------------------------- # The most straight-forward way to call target specific function is via # extern function call construct in tvm. -# In the following example, we use :any:`tvm.call_pure_extern` to call +# In the following example, we use :any:`tvm.tir.call_pure_extern` to call # :code:`__expf` function, which is only available under CUDA. # -n = tvm.var("n") -A = tvm.placeholder((n,), name='A') -B = tvm.compute(A.shape, - lambda i: tvm.call_pure_extern("float32", "__expf", A[i]), +n = te.var("n") +A = te.placeholder((n,), name='A') +B = te.compute(A.shape, + lambda i: tvm.tir.call_pure_extern("float32", "__expf", A[i]), name="B") -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) num_thread = 64 bx, tx = s[B].split(B.op.axis[0], factor=num_thread) -s[B].bind(bx, tvm.thread_axis("blockIdx.x")) -s[B].bind(tx, tvm.thread_axis("threadIdx.x")) +s[B].bind(bx, te.thread_axis("blockIdx.x")) +s[B].bind(tx, te.thread_axis("threadIdx.x")) f = tvm.build(s, [A, B], "cuda", name="myexp") print(f.imported_modules[0].get_source()) @@ -64,22 +65,22 @@ print(f.imported_modules[0].get_source()) # # TVM intrinsic provides the user a mechanism to achieve this, and this # is the recommended way to solve the problem. -# The following code use tvm.exp instead, which create an intrinsic call -# :any:`tvm.exp` to do the exponential. +# The following code use te.exp instead, which create an intrinsic call +# :any:`te.exp` to do the exponential. # -n = tvm.var("n") -A = tvm.placeholder((n,), name='A') -B = tvm.compute(A.shape, lambda i: tvm.exp(A[i]), name="B") -s = tvm.create_schedule(B.op) +n = te.var("n") +A = te.placeholder((n,), name='A') +B = te.compute(A.shape, lambda i: te.exp(A[i]), name="B") +s = te.create_schedule(B.op) num_thread = 64 bx, tx = s[B].split(B.op.axis[0], factor=num_thread) -s[B].bind(bx, tvm.thread_axis("blockIdx.x")) -s[B].bind(tx, tvm.thread_axis("threadIdx.x")) +s[B].bind(bx, te.thread_axis("blockIdx.x")) +s[B].bind(tx, te.thread_axis("threadIdx.x")) fcuda = tvm.build(s, [A, B], "cuda", name="myexp") print(fcuda.imported_modules[0].get_source()) ###################################################################### # We can find that the code works for both CUDA and opencl. -# The same tvm.exp can also be used for float64 data types. +# The same te.exp can also be used for float64 data types. # fopencl = tvm.build(s, [A, B], "opencl", name="myexp") print(fopencl.imported_modules[0].get_source()) @@ -87,7 +88,7 @@ print(fopencl.imported_modules[0].get_source()) ###################################################################### # Intrinsic Lowering Rule # ----------------------- -# When :any:`tvm.exp` is called, TVM creates an intrinsic Call Expr. +# When :any:`te.exp` is called, TVM creates an intrinsic Call Expr. # TVM uses transformation rules to transform the intrinsic # call to device specific extern calls. # @@ -101,10 +102,10 @@ def my_cuda_math_rule(op): assert isinstance(op, tvm.tir.Call) if op.dtype == "float32": # call float function - return tvm.call_pure_extern("float32", "%sf" % op.name, op.args[0]) + return tvm.tir.call_pure_extern("float32", "%sf" % op.name, op.args[0]) elif op.dtype == "float64": # call double function - return tvm.call_pure_extern("float32", op.name, op.args[0]) + return tvm.tir.call_pure_extern("float32", op.name, op.args[0]) else: # cannot do translation, return self. return op @@ -131,29 +132,29 @@ print(fcuda.imported_modules[0].get_source()) def mylog(x): """customized log intrinsic function""" - return tvm.call_pure_intrin(x.dtype, "mylog", x) + return tvm.tir.call_pure_intrin(x.dtype, "mylog", x) def my_cuda_mylog_rule(op): """CUDA lowering rule for log""" if op.dtype == "float32": - return tvm.call_pure_extern("float32", "logf", op.args[0]) + return tvm.tir.call_pure_extern("float32", "logf", op.args[0]) elif op.dtype == "float64": - return tvm.call_pure_extern("float64", "log", op.args[0]) + return tvm.tir.call_pure_extern("float64", "log", op.args[0]) else: return op tvm.target.register_intrin_rule("cuda", "mylog", my_cuda_mylog_rule, override=True) -n = tvm.var("n") -A = tvm.placeholder((n,), name='A') -B = tvm.compute(A.shape, lambda i: mylog(A[i]), name="B") -s = tvm.create_schedule(B.op) +n = te.var("n") +A = te.placeholder((n,), name='A') +B = te.compute(A.shape, lambda i: mylog(A[i]), name="B") +s = te.create_schedule(B.op) num_thread = 64 bx, tx = s[B].split(B.op.axis[0], factor=num_thread) -s[B].bind(bx, tvm.thread_axis("blockIdx.x")) -s[B].bind(tx, tvm.thread_axis("threadIdx.x")) +s[B].bind(bx, te.thread_axis("blockIdx.x")) +s[B].bind(tx, te.thread_axis("threadIdx.x")) fcuda = tvm.build(s, [A, B], "cuda", name="mylog") print(fcuda.imported_modules[0].get_source()) @@ -162,6 +163,6 @@ print(fcuda.imported_modules[0].get_source()) # ------- # - TVM can call extern target dependent math function. # - Use intrinsic to defined a unified interface for the functions. -# - For more intrinsics available in tvm, take a look at :any:`tvm.intrin` +# - For more intrinsics available in tvm, take a look at :any:`tvm.tir` # - You can customize the intrinsic behavior by defining your own rules. # diff --git a/tutorials/language/reduction.py b/tutorials/language/reduction.py index 0b631cb..cdfc94e 100644 --- a/tutorials/language/reduction.py +++ b/tutorials/language/reduction.py @@ -28,6 +28,7 @@ In this tutorial, we will demonstrate how to do reduction in TVM. from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np ###################################################################### @@ -38,8 +39,8 @@ import numpy as np # # The following lines describe the row sum operation. # To create a reduction formula, we declare a reduction axis using -# :any:`tvm.reduce_axis`. :any:`tvm.reduce_axis` takes in the range of reductions. -# :any:`tvm.sum` takes in the expression to be reduced as well as the reduction +# :any:`te.reduce_axis`. :any:`te.reduce_axis` takes in the range of reductions. +# :any:`te.sum` takes in the expression to be reduced as well as the reduction # axis and compute the sum of value over all k in the declared range. # # The equivalent C code is as follows: @@ -53,11 +54,11 @@ import numpy as np # } # } # -n = tvm.var("n") -m = tvm.var("m") -A = tvm.placeholder((n, m), name='A') -k = tvm.reduce_axis((0, m), "k") -B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="B") +n = te.var("n") +m = te.var("m") +A = te.placeholder((n, m), name='A') +k = te.reduce_axis((0, m), "k") +B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B") ###################################################################### # Schedule the Reduction @@ -65,7 +66,7 @@ B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="B") # There are several ways to schedule a reduction. # Before doing anything, let us print out the IR code of default schedule. # -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) print(tvm.lower(s, [A, B], simple_mode=True)) ###################################################################### @@ -81,8 +82,8 @@ print(tvm.lower(s, [A, B], simple_mode=True)) ###################################################################### # If we are building a GPU kernel, we can bind the rows of B to GPU threads. -s[B].bind(xo, tvm.thread_axis("blockIdx.x")) -s[B].bind(xi, tvm.thread_axis("threadIdx.x")) +s[B].bind(xo, te.thread_axis("blockIdx.x")) +s[B].bind(xi, te.thread_axis("threadIdx.x")) print(tvm.lower(s, [A, B], simple_mode=True)) ###################################################################### @@ -97,7 +98,7 @@ print(tvm.lower(s, [A, B], simple_mode=True)) # In the following schedule, the result of B is written to a temporary # result B.rf. The factored dimension becomes the first dimension of B.rf. # -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) ko, ki = s[B].split(B.op.reduce_axis[0], factor=16) BF = s.rfactor(B, ki) print(tvm.lower(s, [A, B], simple_mode=True)) @@ -122,9 +123,9 @@ print(s[B].op.body) # columns by threadIdx.x and finally do a cross thread reduction over threadIdx.x # xo, xi = s[B].split(s[B].op.axis[0], factor=32) -s[B].bind(xo, tvm.thread_axis("blockIdx.x")) -s[B].bind(xi, tvm.thread_axis("threadIdx.y")) -tx = tvm.thread_axis("threadIdx.x") +s[B].bind(xo, te.thread_axis("blockIdx.x")) +s[B].bind(xi, te.thread_axis("threadIdx.y")) +tx = te.thread_axis("threadIdx.x") s[B].bind(s[B].op.reduce_axis[0], tx) s[BF].compute_at(s[B], s[B].op.reduce_axis[0]) s[B].set_store_predicate(tx.var.equal(0)) @@ -148,16 +149,16 @@ tvm.testing.assert_allclose( # In TVM, we can describe convolution via 2D reduction in a simple way. # Here is an example for 2D convolution with filter size = [3, 3] and strides = [1, 1]. # -n = tvm.var('n') -Input = tvm.placeholder((n, n), name='Input') -Filter = tvm.placeholder((3, 3), name='Filter') -di = tvm.reduce_axis((0, 3), name='di') -dj = tvm.reduce_axis((0, 3), name='dj') -Output = tvm.compute( +n = te.var('n') +Input = te.placeholder((n, n), name='Input') +Filter = te.placeholder((3, 3), name='Filter') +di = te.reduce_axis((0, 3), name='di') +dj = te.reduce_axis((0, 3), name='dj') +Output = te.compute( (n - 2, n - 2), - lambda i, j: tvm.sum(Input[i + di, j + dj] * Filter[di, dj], axis=[di, dj]), + lambda i, j: te.sum(Input[i + di, j + dj] * Filter[di, dj], axis=[di, dj]), name='Output') -s = tvm.create_schedule(Output.op) +s = te.create_schedule(Output.op) print(tvm.lower(s, [Input, Filter, Output], simple_mode=True)) ###################################################################### @@ -165,18 +166,18 @@ print(tvm.lower(s, [Input, Filter, Output], simple_mode=True)) # # Define General Commutative Reduction Operation # ---------------------------------------------- -# Besides the built-in reduction operations like :any:`tvm.sum`, -# :any:`tvm.min` and :any:`tvm.max`, you can also define your -# commutative reduction operation by :any:`tvm.comm_reducer`. +# Besides the built-in reduction operations like :any:`te.sum`, +# :any:`tvm.te.min` and :any:`tvm.te.max`, you can also define your +# commutative reduction operation by :any:`te.comm_reducer`. # -n = tvm.var('n') -m = tvm.var('m') -product = tvm.comm_reducer(lambda x, y: x*y, - lambda t: tvm.const(1, dtype=t), name="product") -A = tvm.placeholder((n, m), name='A') -k = tvm.reduce_axis((0, m), name='k') -B = tvm.compute((n,), lambda i: product(A[i, k], axis=k), name='B') +n = te.var('n') +m = te.var('m') +product = te.comm_reducer(lambda x, y: x*y, + lambda t: tvm.tir.const(1, dtype=t), name="product") +A = te.placeholder((n, m), name='A') +k = te.reduce_axis((0, m), name='k') +B = te.compute((n,), lambda i: product(A[i, k], axis=k), name='B') ###################################################################### # .. note:: @@ -192,4 +193,4 @@ B = tvm.compute((n,), lambda i: product(A[i, k], axis=k), name='B') # # - Describe reduction with reduce_axis. # - Use rfactor to factor out axis if we need parallelism. -# - Define new reduction operation by :any:`tvm.comm_reducer` +# - Define new reduction operation by :any:`te.comm_reducer` diff --git a/tutorials/language/scan.py b/tutorials/language/scan.py index 2fa9c21..73790da 100644 --- a/tutorials/language/scan.py +++ b/tutorials/language/scan.py @@ -25,6 +25,7 @@ Recurrent computing is a typical pattern in neural networks. from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np ###################################################################### @@ -46,13 +47,13 @@ import numpy as np # The result of the scan is a tensor, giving the result of :code:`s_state` after the # update over the time domain. # -m = tvm.var("m") -n = tvm.var("n") -X = tvm.placeholder((m, n), name="X") -s_state = tvm.placeholder((m, n)) -s_init = tvm.compute((1, n), lambda _, i: X[0, i]) -s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) -s_scan = tvm.scan(s_init, s_update, s_state, inputs=[X]) +m = te.var("m") +n = te.var("n") +X = te.placeholder((m, n), name="X") +s_state = te.placeholder((m, n)) +s_init = te.compute((1, n), lambda _, i: X[0, i]) +s_update = te.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i]) +s_scan = tvm.te.scan(s_init, s_update, s_state, inputs=[X]) ###################################################################### # Schedule the Scan Cell @@ -62,10 +63,10 @@ s_scan = tvm.scan(s_init, s_update, s_state, inputs=[X]) # first iteration dimension of the update part. # To split on the time iteration, user can schedule on scan_op.scan_axis instead. # -s = tvm.create_schedule(s_scan.op) +s = te.create_schedule(s_scan.op) num_thread = 256 -block_x = tvm.thread_axis("blockIdx.x") -thread_x = tvm.thread_axis("threadIdx.x") +block_x = te.thread_axis("blockIdx.x") +thread_x = te.thread_axis("threadIdx.x") xo, xi = s[s_init].split(s_init.op.axis[1], factor=num_thread) s[s_init].bind(xo, block_x) s[s_init].bind(xi, thread_x) @@ -100,21 +101,21 @@ tvm.testing.assert_allclose(b.asnumpy(), np.cumsum(a_np, axis=0)) # The following lines demonstrate a scan with two stage operations # in the scan cell. # -m = tvm.var("m") -n = tvm.var("n") -X = tvm.placeholder((m, n), name="X") -s_state = tvm.placeholder((m, n)) -s_init = tvm.compute((1, n), lambda _, i: X[0, i]) -s_update_s1 = tvm.compute((m, n), lambda t, i: s_state[t-1, i] * 2, name="s1") -s_update_s2 = tvm.compute((m, n), lambda t, i: s_update_s1[t, i] + X[t, i], name="s2") -s_scan = tvm.scan(s_init, s_update_s2, s_state, inputs=[X]) +m = te.var("m") +n = te.var("n") +X = te.placeholder((m, n), name="X") +s_state = te.placeholder((m, n)) +s_init = te.compute((1, n), lambda _, i: X[0, i]) +s_update_s1 = te.compute((m, n), lambda t, i: s_state[t-1, i] * 2, name="s1") +s_update_s2 = te.compute((m, n), lambda t, i: s_update_s1[t, i] + X[t, i], name="s2") +s_scan = tvm.te.scan(s_init, s_update_s2, s_state, inputs=[X]) ###################################################################### # These intermediate tensors can also be scheduled normally. # To ensure correctness, TVM creates a group constraint to forbid # the body of scan to be compute_at locations outside the scan loop. # -s = tvm.create_schedule(s_scan.op) +s = te.create_schedule(s_scan.op) xo, xi = s[s_update_s2].split(s_update_s2.op.axis[1], factor=32) s[s_update_s1].compute_at(s[s_update_s2], xo) print(tvm.lower(s, [X, s_scan], simple_mode=True)) @@ -126,20 +127,20 @@ print(tvm.lower(s, [X, s_scan], simple_mode=True)) # recurrent state. Scan support multiple recurrent states. # The following example demonstrates how we can build recurrence with two states. # -m = tvm.var("m") -n = tvm.var("n") -l = tvm.var("l") -X = tvm.placeholder((m, n), name="X") -s_state1 = tvm.placeholder((m, n)) -s_state2 = tvm.placeholder((m, l)) -s_init1 = tvm.compute((1, n), lambda _, i: X[0, i]) -s_init2 = tvm.compute((1, l), lambda _, i: 0.0) -s_update1 = tvm.compute((m, n), lambda t, i: s_state1[t-1, i] + X[t, i]) -s_update2 = tvm.compute((m, l), lambda t, i: s_state2[t-1, i] + s_state1[t-1, 0]) -s_scan1, s_scan2 = tvm.scan([s_init1, s_init2], +m = te.var("m") +n = te.var("n") +l = te.var("l") +X = te.placeholder((m, n), name="X") +s_state1 = te.placeholder((m, n)) +s_state2 = te.placeholder((m, l)) +s_init1 = te.compute((1, n), lambda _, i: X[0, i]) +s_init2 = te.compute((1, l), lambda _, i: 0.0) +s_update1 = te.compute((m, n), lambda t, i: s_state1[t-1, i] + X[t, i]) +s_update2 = te.compute((m, l), lambda t, i: s_state2[t-1, i] + s_state1[t-1, 0]) +s_scan1, s_scan2 = tvm.te.scan([s_init1, s_init2], [s_update1, s_update2], [s_state1, s_state2], inputs=[X]) -s = tvm.create_schedule(s_scan1.op) +s = te.create_schedule(s_scan1.op) print(tvm.lower(s, [X, s_scan1, s_scan2], simple_mode=True)) ###################################################################### diff --git a/tutorials/language/schedule_primitives.py b/tutorials/language/schedule_primitives.py index e59264f..61bfcad 100644 --- a/tutorials/language/schedule_primitives.py +++ b/tutorials/language/schedule_primitives.py @@ -27,6 +27,7 @@ various primitives provided by TVM. from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np ###################################################################### @@ -41,19 +42,19 @@ import numpy as np # # declare some variables for use later -n = tvm.var('n') -m = tvm.var('m') +n = te.var('n') +m = te.var('m') ###################################################################### # A schedule can be created from a list of ops, by default the # schedule computes tensor in a serial manner in a row-major order. # declare a matrix element-wise multiply -A = tvm.placeholder((m, n), name='A') -B = tvm.placeholder((m, n), name='B') -C = tvm.compute((m, n), lambda i, j: A[i, j] * B[i, j], name='C') +A = te.placeholder((m, n), name='A') +B = te.placeholder((m, n), name='B') +C = te.compute((m, n), lambda i, j: A[i, j] * B[i, j], name='C') -s = tvm.create_schedule([C.op]) +s = te.create_schedule([C.op]) # lower will transform the computation from definition to the real # callable function. With argument `simple_mode=True`, it will # return you a readable C like statement, we use it here to print the @@ -70,20 +71,20 @@ print(tvm.lower(s, [A, B, C], simple_mode=True)) # ----- # :code:`split` can split a specified axis into two axises by # :code:`factor`. -A = tvm.placeholder((m,), name='A') -B = tvm.compute((m,), lambda i: A[i]*2, name='B') +A = te.placeholder((m,), name='A') +B = te.compute((m,), lambda i: A[i]*2, name='B') -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=32) print(tvm.lower(s, [A, B], simple_mode=True)) ###################################################################### # You can also split a axis by :code:`nparts`, which splits the axis # contrary with :code:`factor`. -A = tvm.placeholder((m,), name='A') -B = tvm.compute((m,), lambda i: A[i], name='B') +A = te.placeholder((m,), name='A') +B = te.compute((m,), lambda i: A[i], name='B') -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) bx, tx = s[B].split(B.op.axis[0], nparts=32) print(tvm.lower(s, [A, B], simple_mode=True)) @@ -92,10 +93,10 @@ print(tvm.lower(s, [A, B], simple_mode=True)) # ---- # :code:`tile` help you execute the computation tile by tile over two # axises. -A = tvm.placeholder((m, n), name='A') -B = tvm.compute((m, n), lambda i, j: A[i, j], name='B') +A = te.placeholder((m, n), name='A') +B = te.compute((m, n), lambda i, j: A[i, j], name='B') -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=10, y_factor=5) print(tvm.lower(s, [A, B], simple_mode=True)) @@ -103,10 +104,10 @@ print(tvm.lower(s, [A, B], simple_mode=True)) # fuse # ---- # :code:`fuse` can fuse two consecutive axises of one computation. -A = tvm.placeholder((m, n), name='A') -B = tvm.compute((m, n), lambda i, j: A[i, j], name='B') +A = te.placeholder((m, n), name='A') +B = te.compute((m, n), lambda i, j: A[i, j], name='B') -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) # tile to four axises first: (i.outer, j.outer, i.inner, j.inner) xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=10, y_factor=5) # then fuse (i.inner, j.inner) into one axis: (i.inner.j.inner.fused) @@ -117,10 +118,10 @@ print(tvm.lower(s, [A, B], simple_mode=True)) # reorder # ------- # :code:`reorder` can reorder the axises in the specified order. -A = tvm.placeholder((m, n), name='A') -B = tvm.compute((m, n), lambda i, j: A[i, j], name='B') +A = te.placeholder((m, n), name='A') +B = te.compute((m, n), lambda i, j: A[i, j], name='B') -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) # tile to four axises first: (i.outer, j.outer, i.inner, j.inner) xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=10, y_factor=5) # then reorder the axises: (i.inner, j.outer, i.outer, j.inner) @@ -132,13 +133,13 @@ print(tvm.lower(s, [A, B], simple_mode=True)) # ---- # :code:`bind` can bind a specified axis with a thread axis, often used # in gpu programming. -A = tvm.placeholder((n,), name='A') -B = tvm.compute(A.shape, lambda i: A[i] * 2, name='B') +A = te.placeholder((n,), name='A') +B = te.compute(A.shape, lambda i: A[i] * 2, name='B') -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) bx, tx = s[B].split(B.op.axis[0], factor=64) -s[B].bind(bx, tvm.thread_axis("blockIdx.x")) -s[B].bind(tx, tvm.thread_axis("threadIdx.x")) +s[B].bind(bx, te.thread_axis("blockIdx.x")) +s[B].bind(tx, te.thread_axis("threadIdx.x")) print(tvm.lower(s, [A, B], simple_mode=True)) ###################################################################### @@ -146,21 +147,21 @@ print(tvm.lower(s, [A, B], simple_mode=True)) # ---------- # For a schedule that consists of multiple operators, TVM will compute # tensors at the root separately by default. -A = tvm.placeholder((m,), name='A') -B = tvm.compute((m,), lambda i: A[i]+1, name='B') -C = tvm.compute((m,), lambda i: B[i]*2, name='C') +A = te.placeholder((m,), name='A') +B = te.compute((m,), lambda i: A[i]+1, name='B') +C = te.compute((m,), lambda i: B[i]*2, name='C') -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) print(tvm.lower(s, [A, B, C], simple_mode=True)) ###################################################################### # :code:`compute_at` can move computation of `B` into the first axis # of computation of `C`. -A = tvm.placeholder((m,), name='A') -B = tvm.compute((m,), lambda i: A[i]+1, name='B') -C = tvm.compute((m,), lambda i: B[i]*2, name='C') +A = te.placeholder((m,), name='A') +B = te.compute((m,), lambda i: A[i]+1, name='B') +C = te.compute((m,), lambda i: B[i]*2, name='C') -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) s[B].compute_at(s[C], C.op.axis[0]) print(tvm.lower(s, [A, B, C], simple_mode=True)) @@ -170,11 +171,11 @@ print(tvm.lower(s, [A, B, C], simple_mode=True)) # :code:`compute_inline` can mark one stage as inline, then the body of # computation will be expanded and inserted at the address where the # tensor is required. -A = tvm.placeholder((m,), name='A') -B = tvm.compute((m,), lambda i: A[i]+1, name='B') -C = tvm.compute((m,), lambda i: B[i]*2, name='C') +A = te.placeholder((m,), name='A') +B = te.compute((m,), lambda i: A[i]+1, name='B') +C = te.compute((m,), lambda i: B[i]*2, name='C') -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) s[B].compute_inline() print(tvm.lower(s, [A, B, C], simple_mode=True)) @@ -182,11 +183,11 @@ print(tvm.lower(s, [A, B, C], simple_mode=True)) # compute_root # ------------ # :code:`compute_root` can move computation of one stage to the root. -A = tvm.placeholder((m,), name='A') -B = tvm.compute((m,), lambda i: A[i]+1, name='B') -C = tvm.compute((m,), lambda i: B[i]*2, name='C') +A = te.placeholder((m,), name='A') +B = te.compute((m,), lambda i: A[i]+1, name='B') +C = te.compute((m,), lambda i: B[i]*2, name='C') -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) s[B].compute_at(s[C], C.op.axis[0]) s[B].compute_root() print(tvm.lower(s, [A, B, C], simple_mode=True)) diff --git a/tutorials/language/tedd.py b/tutorials/language/tedd.py index 3614688..a6cd801 100644 --- a/tutorials/language/tedd.py +++ b/tutorials/language/tedd.py @@ -37,9 +37,8 @@ TEDD renders these three graphs from a given schedule. This tutorial demonstrat how to use TEDD and how to interpret the rendered graphs. """ -from __future__ import absolute_import, print_function - import tvm +from tvm import te import topi from tvm.contrib import tedd @@ -58,11 +57,13 @@ kernel = 3 stride = 1 padding = "SAME" dilation=1 -A = tvm.placeholder((in_size, in_size, in_channel, batch), name='A') -W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W') -B = tvm.placeholder((1, num_filter, 1), name='bias') + +A = te.placeholder((in_size, in_size, in_channel, batch), name='A') +W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W') +B = te.placeholder((1, num_filter, 1), name='bias') + with tvm.target.create("llvm"): - t_conv = topi.nn.conv2d(A, W, stride, padding, dilation, layout='HWCN') + t_conv = topi.nn.conv2d_hwcn(A, W, stride, padding, dilation) t_bias = topi.add(t_conv, B) t_relu = topi.nn.relu(t_bias) s = topi.generic.schedule_conv2d_hwcn([t_relu]) diff --git a/tutorials/language/tensorize.py b/tutorials/language/tensorize.py index afc708e..4290606 100644 --- a/tutorials/language/tensorize.py +++ b/tutorials/language/tensorize.py @@ -35,6 +35,7 @@ and usage of tensorize instead of providing an efficient solution. from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np ###################################################################### @@ -46,12 +47,12 @@ import numpy as np # The following lines describe the computation :code:`A * B^T` in TVM. # N, M, L = 1024, 512, 64 -A = tvm.placeholder((N, L), name='A') -B = tvm.placeholder((M, L), name='B') -k = tvm.reduce_axis((0, L), name='k') -C = tvm.compute((N, M), lambda i, j: - tvm.sum(A[i, k] * B[j, k], axis=k), name='C') -s = tvm.create_schedule(C.op) +A = te.placeholder((N, L), name='A') +B = te.placeholder((M, L), name='B') +k = te.reduce_axis((0, L), name='k') +C = te.compute((N, M), lambda i, j: + te.sum(A[i, k] * B[j, k], axis=k), name='C') +s = te.create_schedule(C.op) print(tvm.lower(s, [A, B, C], simple_mode=True)) ###################################################################### @@ -88,37 +89,37 @@ print(tvm.lower(s, [A, B, C], simple_mode=True)) # which is done in :code:`intrin_func` below. # def intrin_gemv(m, l): - a = tvm.placeholder((l,), name='a') - b = tvm.placeholder((m, l), name='b') - k = tvm.reduce_axis((0, l), name='k') - c = tvm.compute((m,), lambda i: tvm.sum(a[k] * b[i, k], axis=k), name='c') - Ab = tvm.decl_buffer(a.shape, a.dtype, + a = te.placeholder((l,), name='a') + b = te.placeholder((m, l), name='b') + k = te.reduce_axis((0, l), name='k') + c = te.compute((m,), lambda i: te.sum(a[k] * b[i, k], axis=k), name='c') + Ab = tvm.tir.decl_buffer(a.shape, a.dtype, name="A", offset_factor=1, strides=[1]) - Bb = tvm.decl_buffer(b.shape, b.dtype, + Bb = tvm.tir.decl_buffer(b.shape, b.dtype, name="B", offset_factor=1, - strides=[tvm.var("s1"), 1]) - Cb = tvm.decl_buffer(c.shape, c.dtype, + strides=[te.var("s1"), 1]) + Cb = tvm.tir.decl_buffer(c.shape, c.dtype, name="C", offset_factor=1, strides=[1]) def intrin_func(ins, outs): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() aa, bb = ins cc = outs[0] - ib.emit(tvm.call_extern("int32", "gemv_update", + ib.emit(tvm.tir.call_extern("int32", "gemv_update", cc.access_ptr("w"), aa.access_ptr("r"), bb.access_ptr("r"), m, l, bb.strides[0])) return ib.get() - with tvm.build_config(offset_factor=1): - return tvm.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb}) + with tvm.target.build_config(offset_factor=1): + return te.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb}) ###################################################################### -# Here :code:`tvm.decl_tensor_intrin` declares how to execute the computation :code:`c.op`. +# Here :code:`te.decl_tensor_intrin` declares how to execute the computation :code:`c.op`. # Our implementation simply takes the inputs and outputs, # converts them to pointers and emit an external function call. # Note that tensorization requires user to specify :code:`offset_factor`, @@ -134,7 +135,7 @@ def intrin_gemv(m, l): # For now :code:`bb.strides[0] == l`, # but later we will see how they can differ with more complicated schedules. # -# Note that we use :code:`tvm.var("s1")` as the first stride dimension for :code:`B`. +# Note that we use :code:`te.var("s1")` as the first stride dimension for :code:`B`. # If the strides can be inferred # - in this case, TVM knows tensor B is compact thus the strides are :code:`[L, 1]` - # such placeholder can be put to let TVM automatically bind the inferred value for us. @@ -233,20 +234,20 @@ def gemv_impl(): return ll_code def intrin_gemv(m, l): - a = tvm.placeholder((l,), name='a') - b = tvm.placeholder((m, l), name='b') - k = tvm.reduce_axis((0, l), name='k') - c = tvm.compute((m,), lambda i: - tvm.sum(a[k] * b[i, k], axis=k), name='c') - Ab = tvm.decl_buffer(a.shape, a.dtype, + a = te.placeholder((l,), name='a') + b = te.placeholder((m, l), name='b') + k = te.reduce_axis((0, l), name='k') + c = te.compute((m,), lambda i: + te.sum(a[k] * b[i, k], axis=k), name='c') + Ab = tvm.tir.decl_buffer(a.shape, a.dtype, name="A", offset_factor=1, strides=[1]) - Bb = tvm.decl_buffer(b.shape, b.dtype, + Bb = tvm.tir.decl_buffer(b.shape, b.dtype, name="B", offset_factor=1, - strides=[tvm.var("s1"), 1]) - Cb = tvm.decl_buffer(c.shape, c.dtype, + strides=[te.var("s1"), 1]) + Cb = tvm.tir.decl_buffer(c.shape, c.dtype, name="C", offset_factor=1, strides=[1]) @@ -254,22 +255,22 @@ def intrin_gemv(m, l): aa, bb = ins cc = outs[0] def _body(): - ib = tvm.ir_builder.create() - ib.emit(tvm.call_extern("int32", "gemv_update", + ib = tvm.tir.ir_builder.create() + ib.emit(tvm.tir.call_extern("int32", "gemv_update", cc.access_ptr("w"), aa.access_ptr("r"), bb.access_ptr("r"), m, l, bb.strides[0])) return ib.get() def _reduce_reset(): - ib = tvm.ir_builder.create() - ib.emit(tvm.call_extern("int32", "gemv_reset", cc.access_ptr("w"), m)) + ib = tvm.tir.ir_builder.create() + ib.emit(tvm.tir.call_extern("int32", "gemv_reset", cc.access_ptr("w"), m)) return ib.get() def _reduce_update(): return _body() return _body(), _reduce_reset(), _reduce_update() - with tvm.build_config(offset_factor=1): - return tvm.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb}) + with tvm.target.build_config(offset_factor=1): + return te.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb}) ###################################################################### # Note that :code:`intrin_func` now returns a triplet: diff --git a/tutorials/language/tuple_inputs.py b/tutorials/language/tuple_inputs.py index 715e2ef..828797a 100644 --- a/tutorials/language/tuple_inputs.py +++ b/tutorials/language/tuple_inputs.py @@ -28,23 +28,24 @@ In this tutorial, we will introduce the usage of tuple inputs in TVM. from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np ###################################################################### # Describe Batchwise Computation # ------------------------------ # For operators which have the same shape, we can put them together as -# the inputs of :any:`tvm.compute`, if we want them to be scheduled +# the inputs of :any:`te.compute`, if we want them to be scheduled # together in the next schedule procedure. # -n = tvm.var("n") -m = tvm.var("m") -A0 = tvm.placeholder((m, n), name='A0') -A1 = tvm.placeholder((m, n), name='A1') -B0, B1 = tvm.compute((m, n), lambda i, j: (A0[i, j] + 2, A1[i, j] * 3), name='B') +n = te.var("n") +m = te.var("m") +A0 = te.placeholder((m, n), name='A0') +A1 = te.placeholder((m, n), name='A1') +B0, B1 = te.compute((m, n), lambda i, j: (A0[i, j] + 2, A1[i, j] * 3), name='B') # The generated IR code would be: -s = tvm.create_schedule(B0.op) +s = te.create_schedule(B0.op) print(tvm.lower(s, [A0, A1, B0, B1], simple_mode=True)) ###################################################################### @@ -56,7 +57,7 @@ print(tvm.lower(s, [A0, A1, B0, B1], simple_mode=True)) # operators, and the inputs will collaborate together, e.g. :code:`argmax`. # In the reduction procedure, :code:`argmax` need to compare the value of # operands, also need to keep the index of operand. It can be expressed -# with :py:func:`tvm.comm_reducer` as below: +# with :py:func:`te.comm_reducer` as below: # x and y are the operands of reduction, both of them is a tuple of index # and value. @@ -68,20 +69,20 @@ def fcombine(x, y): # our identity element also need to be a tuple, so `fidentity` accepts # two types as inputs. def fidentity(t0, t1): - return tvm.const(-1, t0), tvm.min_value(t1) + return tvm.tir.const(-1, t0), tvm.te.min_value(t1) -argmax = tvm.comm_reducer(fcombine, fidentity, name='argmax') +argmax = te.comm_reducer(fcombine, fidentity, name='argmax') # describe the reduction computation -m = tvm.var('m') -n = tvm.var('n') -idx = tvm.placeholder((m, n), name='idx', dtype='int32') -val = tvm.placeholder((m, n), name='val', dtype='int32') -k = tvm.reduce_axis((0, n), 'k') -T0, T1 = tvm.compute((m, ), lambda i: argmax((idx[i, k], val[i, k]), axis=k), name='T') +m = te.var('m') +n = te.var('n') +idx = te.placeholder((m, n), name='idx', dtype='int32') +val = te.placeholder((m, n), name='val', dtype='int32') +k = te.reduce_axis((0, n), 'k') +T0, T1 = te.compute((m, ), lambda i: argmax((idx[i, k], val[i, k]), axis=k), name='T') # the generated IR code would be: -s = tvm.create_schedule(T0.op) +s = te.create_schedule(T0.op) print(tvm.lower(s, [idx, val, T0, T1], simple_mode=True)) ###################################################################### @@ -97,14 +98,14 @@ print(tvm.lower(s, [idx, val, T0, T1], simple_mode=True)) # with one batch operation, but they can only be scheduled together # in terms of operation. -n = tvm.var("n") -m = tvm.var("m") -A0 = tvm.placeholder((m, n), name='A0') -B0, B1 = tvm.compute((m, n), lambda i, j: (A0[i, j] + 2, A0[i, j] * 3), name='B') -A1 = tvm.placeholder((m, n), name='A1') -C = tvm.compute((m, n), lambda i, j: A1[i, j] + B0[i, j], name='C') +n = te.var("n") +m = te.var("m") +A0 = te.placeholder((m, n), name='A0') +B0, B1 = te.compute((m, n), lambda i, j: (A0[i, j] + 2, A0[i, j] * 3), name='B') +A1 = te.placeholder((m, n), name='A1') +C = te.compute((m, n), lambda i, j: A1[i, j] + B0[i, j], name='C') -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) s[B0].compute_at(s[C], C.op.axis[0]) # as you can see in the below generated IR code: print(tvm.lower(s, [A0, A1, C], simple_mode=True)) diff --git a/tutorials/optimize/opt_conv_cuda.py b/tutorials/optimize/opt_conv_cuda.py index 74d1e6d..025e53e 100644 --- a/tutorials/optimize/opt_conv_cuda.py +++ b/tutorials/optimize/opt_conv_cuda.py @@ -42,6 +42,7 @@ channel, batch. import numpy as np import tvm +from tvm import te # The sizes of inputs and filters batch = 256 @@ -53,25 +54,25 @@ pad = 1 stride = 1 # Algorithm -A = tvm.placeholder((in_size, in_size, in_channel, batch), name='A') -W = tvm.placeholder((kernel, kernel, in_channel, out_channel), name='W') +A = te.placeholder((in_size, in_size, in_channel, batch), name='A') +W = te.placeholder((kernel, kernel, in_channel, out_channel), name='W') out_size = (in_size - kernel + 2*pad) // stride + 1 # Pad input -Apad = tvm.compute( +Apad = te.compute( (in_size + 2*pad, in_size + 2*pad, in_channel, batch), - lambda yy, xx, cc, nn: tvm.if_then_else( - tvm.all(yy >= pad, yy - pad < in_size, + lambda yy, xx, cc, nn: tvm.tir.if_then_else( + tvm.tir.all(yy >= pad, yy - pad < in_size, xx >= pad, xx - pad < in_size), - A[yy - pad, xx - pad, cc, nn], tvm.const(0., "float32")), + A[yy - pad, xx - pad, cc, nn], tvm.tir.const(0., "float32")), name='Apad') # Create reduction variables -rc = tvm.reduce_axis((0, in_channel), name='rc') -ry = tvm.reduce_axis((0, kernel), name='ry') -rx = tvm.reduce_axis((0, kernel), name='rx') +rc = te.reduce_axis((0, in_channel), name='rc') +ry = te.reduce_axis((0, kernel), name='ry') +rx = te.reduce_axis((0, kernel), name='rx') # Compute the convolution -B = tvm.compute( +B = te.compute( (out_size, out_size, out_channel, batch), - lambda yy, xx, ff, nn: tvm.sum( + lambda yy, xx, ff, nn: te.sum( Apad[yy * stride + ry, xx * stride + rx, rc, nn] * W[ry, rx, rc, ff], axis=[ry, rx, rc]), name='B') @@ -101,7 +102,7 @@ B = tvm.compute( # # Designate the memory hierarchy -s = tvm.create_schedule(B.op) +s = te.create_schedule(B.op) s[Apad].compute_inline() # compute Apad inline AA = s.cache_read(Apad, 'shared', [B]) WW = s.cache_read(W, "shared", [B]) @@ -135,13 +136,13 @@ step = 8 vthread = 2 # Get the GPU thread indices -block_x = tvm.thread_axis("blockIdx.x") -block_y = tvm.thread_axis("blockIdx.y") -block_z = tvm.thread_axis("blockIdx.z") -thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") -thread_y = tvm.thread_axis((0, num_thread), "threadIdx.y") -thread_xz = tvm.thread_axis((0, vthread), "vthread", name="vx") -thread_yz = tvm.thread_axis((0, vthread), "vthread", name="vy") +block_x = te.thread_axis("blockIdx.x") +block_y = te.thread_axis("blockIdx.y") +block_z = te.thread_axis("blockIdx.z") +thread_x = te.thread_axis((0, num_thread), "threadIdx.x") +thread_y = te.thread_axis((0, num_thread), "threadIdx.y") +thread_xz = te.thread_axis((0, vthread), "vthread", name="vx") +thread_yz = te.thread_axis((0, vthread), "vthread", name="vy") # Split the workloads hi, wi, fi, ni = s[B].op.axis diff --git a/tutorials/optimize/opt_conv_tensorcore.py b/tutorials/optimize/opt_conv_tensorcore.py index ef84089..44b9de3 100644 --- a/tutorials/optimize/opt_conv_tensorcore.py +++ b/tutorials/optimize/opt_conv_tensorcore.py @@ -52,6 +52,7 @@ convolution has a large batch. We strongly recommend covering the :ref:`opt-conv # NHWCnc memory layout.The following code defines the convolution algorithm in TVM. import tvm +from tvm import te import numpy as np from tvm.contrib import nvcc @@ -98,30 +99,30 @@ output_shape = (batch_size // block_size, block_size) # Reduction axes -kh = tvm.reduce_axis((0, kernel_h), name='kh') -kw = tvm.reduce_axis((0, kernel_w), name='kw') -ic = tvm.reduce_axis((0, in_channels // block_size), name='ic') -ii = tvm.reduce_axis((0, block_size), name='ii') +kh = te.reduce_axis((0, kernel_h), name='kh') +kw = te.reduce_axis((0, kernel_w), name='kw') +ic = te.reduce_axis((0, in_channels // block_size), name='ic') +ii = te.reduce_axis((0, block_size), name='ii') # Algorithm -A = tvm.placeholder(data_shape, name='A', dtype="float16") -W = tvm.placeholder(kernel_shape, name='W', dtype="float16") -Apad = tvm.compute( +A = te.placeholder(data_shape, name='A', dtype="float16") +W = te.placeholder(kernel_shape, name='W', dtype="float16") +Apad = te.compute( (batch_size // block_size, height + 2 * pad_h, width + 2 * pad_w, in_channels // block_size, block_size, block_size), - lambda n, h, w, i, nn, ii: tvm.if_then_else( - tvm.all(h >= pad_h, h - pad_h < height, + lambda n, h, w, i, nn, ii: tvm.tir.if_then_else( + tvm.tir.all(h >= pad_h, h - pad_h < height, w >= pad_w, w - pad_w < width), - A[n, h - pad_h, w - pad_w, i, nn, ii], tvm.const(0., "float16")), + A[n, h - pad_h, w - pad_w, i, nn, ii], tvm.tir.const(0., "float16")), name='Apad') -Conv = tvm.compute(output_shape, - lambda n, h, w, o, nn, oo: tvm.sum( +Conv = te.compute(output_shape, + lambda n, h, w, o, nn, oo: te.sum( Apad[n, h * stride_h + kh, w * stride_w + kw, ic, nn, ii].astype("float32") * W[kh, kw, ic, o, ii, oo].astype("float32"), axis=[ic, kh, kw, ii]), name="Conv") -s = tvm.create_schedule(Conv.op) +s = te.create_schedule(Conv.op) s[Apad].compute_inline() ############################################################################### @@ -152,49 +153,49 @@ ConvF = s.cache_write(Conv, 'wmma.accumulator') def intrin_wmma_load_matrix(scope): n = 16 - A = tvm.placeholder((n, n), name='A', dtype='float16') - BA = tvm.decl_buffer(A.shape, A.dtype, scope='shared', data_alignment=32, offset_factor=256) - C = tvm.compute((n, n), lambda i, j: A[i, j], name='C') - BC = tvm.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=256) + A = te.placeholder((n, n), name='A', dtype='float16') + BA = tvm.tir.decl_buffer(A.shape, A.dtype, scope='shared', data_alignment=32, offset_factor=256) + C = te.compute((n, n), lambda i, j: A[i, j], name='C') + BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=256) def intrin_func(ins, outs): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() BA = ins[0] BC = outs[0] - ib.emit(tvm.call_intrin('handle', 'tvm_load_matrix_sync', + ib.emit(tvm.tir.call_intrin('handle', 'tvm_load_matrix_sync', BC.data, n, n, n, BC.elem_offset // 256, BA.access_ptr('r'), n, 'row_major')) return ib.get() - return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC}) + return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC}) def intrin_wmma_gemm(): n = 16 - A = tvm.placeholder((n, n), name='A', dtype='float16') - B = tvm.placeholder((n, n), name='B', dtype='float16') - k = tvm.reduce_axis((0, n), name="k") - C = tvm.compute((n, n), + A = te.placeholder((n, n), name='A', dtype='float16') + B = te.placeholder((n, n), name='B', dtype='float16') + k = te.reduce_axis((0, n), name="k") + C = te.compute((n, n), lambda ii, jj: - tvm.sum(A[ii, k].astype('float') * B[k, jj].astype('float'), axis=k), + te.sum(A[ii, k].astype('float') * B[k, jj].astype('float'), axis=k), name='C') - BA = tvm.decl_buffer(A.shape, A.dtype, name='BA', scope='wmma.matrix_a', data_alignment=32, offset_factor=256) - BB = tvm.decl_buffer(B.shape, B.dtype, name='BB', scope='wmma.matrix_b', data_alignment=32, offset_factor=256) - BC = tvm.decl_buffer(C.shape, C.dtype, name='BC', scope='wmma.accumulator', data_alignment=32, offset_factor=256) + BA = tvm.tir.decl_buffer(A.shape, A.dtype, name='BA', scope='wmma.matrix_a', data_alignment=32, offset_factor=256) + BB = tvm.tir.decl_buffer(B.shape, B.dtype, name='BB', scope='wmma.matrix_b', data_alignment=32, offset_factor=256) + BC = tvm.tir.decl_buffer(C.shape, C.dtype, name='BC', scope='wmma.accumulator', data_alignment=32, offset_factor=256) def intrin_func(ins, outs): BA, BB = ins BC, = outs def init(): - ib = tvm.ir_builder.create() - ib.emit(tvm.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, n, n, BC.elem_offset // 256, 0.0)) + ib = tvm.tir.ir_builder.create() + ib.emit(tvm.tir.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, n, n, BC.elem_offset // 256, 0.0)) return ib.get() def update(): - ib = tvm.ir_builder.create() - ib.emit(tvm.call_intrin('handle', 'tvm_mma_sync', + ib = tvm.tir.ir_builder.create() + ib.emit(tvm.tir.call_intrin('handle', 'tvm_mma_sync', BC.data, BC.elem_offset // 256, BA.data, BA.elem_offset // 256, BB.data, BB.elem_offset // 256, @@ -203,26 +204,26 @@ def intrin_wmma_gemm(): return update(), init(), update() - return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, B: BB, C: BC}) + return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, B: BB, C: BC}) def intrin_wmma_store_matrix(): n = 16 - A = tvm.placeholder((n, n), name='A', dtype='float32') - BA = tvm.decl_buffer(A.shape, A.dtype, scope='wmma.accumulator', data_alignment=32, offset_factor=256) - C = tvm.compute((n, n), lambda i, j: A[i, j], name='C') - BC = tvm.decl_buffer(C.shape, C.dtype, scope='global', data_alignment=32, offset_factor=256) + A = te.placeholder((n, n), name='A', dtype='float32') + BA = tvm.tir.decl_buffer(A.shape, A.dtype, scope='wmma.accumulator', data_alignment=32, offset_factor=256) + C = te.compute((n, n), lambda i, j: A[i, j], name='C') + BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope='global', data_alignment=32, offset_factor=256) def intrin_func(ins, outs): - ib = tvm.ir_builder.create() + ib = tvm.tir.ir_builder.create() BA = ins[0] BC = outs[0] - ib.emit(tvm.call_intrin('handle', 'tvm_store_matrix_sync', + ib.emit(tvm.tir.call_intrin('handle', 'tvm_store_matrix_sync', BA.data, n, n, n, BA.elem_offset // 256, BC.access_ptr('w'), n, 'row_major')) return ib.get() - return tvm.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC}) + return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC}) ############################################################################### # Scheduling the Computation @@ -255,12 +256,12 @@ warp_col_tiles = 4 warp_size = 32 chunk = 2 -block_x = tvm.thread_axis('blockIdx.x') -block_y = tvm.thread_axis('blockIdx.y') -block_z = tvm.thread_axis('blockIdx.z') -thread_x = tvm.thread_axis('threadIdx.x') -thread_y = tvm.thread_axis('threadIdx.y') -thread_z = tvm.thread_axis('threadIdx.z') +block_x = te.thread_axis('blockIdx.x') +block_y = te.thread_axis('blockIdx.y') +block_z = te.thread_axis('blockIdx.z') +thread_x = te.thread_axis('threadIdx.x') +thread_y = te.thread_axis('threadIdx.y') +thread_z = te.thread_axis('threadIdx.z') nc, hc, wc, oc, nnc, ooc = Conv.op.axis block_k = s[Conv].fuse(hc, wc) @@ -330,7 +331,7 @@ print(tvm.lower(s, [A, W, Conv], simple_mode=True)) ctx = tvm.gpu(0) if nvcc.have_tensorcore(ctx.compute_version): - with tvm.build_config(auto_unroll_max_step=16): + with tvm.target.build_config(auto_unroll_max_step=16): func = tvm.build(s, [A, W, Conv], 'cuda') a_np = np.random.uniform(size=data_shape).astype(A.dtype) w_np = np.random.uniform(size=kernel_shape).astype(W.dtype) diff --git a/tutorials/optimize/opt_gemm.py b/tutorials/optimize/opt_gemm.py index 8ed152a..daca89b 100644 --- a/tutorials/optimize/opt_gemm.py +++ b/tutorials/optimize/opt_gemm.py @@ -56,6 +56,7 @@ Intel i7-4770HQ CPU. The cache line size should be 64 bytes for all the x86 CPUs # Then we write a baseline implementation, the simplest way to write a matrix multiplication in TVM. import tvm +from tvm import te import numpy import timeit @@ -94,16 +95,16 @@ print("Numpy running time: %f" % (np_runing_time / np_repeat)) answer = numpy.dot(a.asnumpy(), b.asnumpy()) # Algorithm -k = tvm.reduce_axis((0, K), 'k') -A = tvm.placeholder((M, K), name='A') -B = tvm.placeholder((K, N), name='B') -C = tvm.compute( +k = te.reduce_axis((0, K), 'k') +A = te.placeholder((M, K), name='A') +B = te.placeholder((K, N), name='B') +C = te.compute( (M, N), - lambda x, y: tvm.sum(A[x, k] * B[k, y], axis=k), + lambda x, y: te.sum(A[x, k] * B[k, y], axis=k), name='C') # Default schedule -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) func = tvm.build(s, [A, B, C], target=target, name='mmult') assert func @@ -129,7 +130,7 @@ print(tvm.lower(s, [A, B, C], simple_mode=True)) # fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB (L1 data cache) bn = 32 -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) # Blocking by loop tiling xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn) @@ -165,7 +166,7 @@ print(tvm.lower(s, [A, B, C], simple_mode=True)) # # In this tutorial, we chose to vectorize the inner loop row data since it is cache friendly. -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn) k, = s[C].op.reduce_axis ko, ki = s[C].split(k, factor=4) @@ -199,7 +200,7 @@ print(tvm.lower(s, [A, B, C], simple_mode=True)) # which is not cache friendly. If we change the nested loop order of ki and inner axes xi, # the access pattern for A matrix is more cache friendly. -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn) k, = s[C].op.reduce_axis ko, ki = s[C].split(k, factor=4) @@ -244,12 +245,12 @@ print(tvm.lower(s, [A, B, C], simple_mode=True)) # # We have to re-write the algorithm slightly. -packedB = tvm.compute((N / bn, K, bn), lambda x, y, z: B[y, x * bn + z], name='packedB') -C = tvm.compute((M, N), - lambda x, y: tvm.sum(A[x, k] * packedB[y // bn, k, tvm.indexmod(y, bn)], axis=k), +packedB = te.compute((N / bn, K, bn), lambda x, y, z: B[y, x * bn + z], name='packedB') +C = te.compute((M, N), + lambda x, y: te.sum(A[x, k] * packedB[y // bn, k, tvm.tir.indexmod(y, bn)], axis=k), name = 'C') -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn) k, = s[C].op.reduce_axis @@ -285,7 +286,7 @@ print(tvm.lower(s, [A, B, C], simple_mode=True)) # write to C when all the block results are ready. # -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) # Allocate write cache CC = s.cache_write(C, 'global') @@ -328,7 +329,7 @@ print(tvm.lower(s, [A, B, C], simple_mode=True)) # -------- # Futhermore, we can also utilize multi-core processors to do the thread-level parallelization. -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) CC = s.cache_write(C, 'global') diff --git a/tutorials/optimize/opt_matmul_auto_tensorcore.py b/tutorials/optimize/opt_matmul_auto_tensorcore.py index a4658eb..490ccdb 100644 --- a/tutorials/optimize/opt_matmul_auto_tensorcore.py +++ b/tutorials/optimize/opt_matmul_auto_tensorcore.py @@ -46,12 +46,13 @@ import sys import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.contrib import nvcc def matmul_nn(A, B, L, dtype='float16', layout='NN'): - k = tvm.reduce_axis((0, L), name='k') + k = te.reduce_axis((0, L), name='k') if dtype == 'float16': out_type = 'float' elif dtype == 'int8': @@ -59,13 +60,13 @@ def matmul_nn(A, B, L, dtype='float16', layout='NN'): elif dtype == 'int4' or dtype == 'int1': out_type = 'int' if (layout == 'NN'): - return tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k].astype(out_type) * B[k, j].astype(out_type), axis=k)) + return te.compute((N, M), lambda i, j: te.sum(A[i, k].astype(out_type) * B[k, j].astype(out_type), axis=k)) if (layout == 'NT'): - return tvm.compute((N, M), lambda i, j: tvm.sum(A[k, i].astype(out_type) * B[k, j].astype(out_type), axis=k)) + return te.compute((N, M), lambda i, j: te.sum(A[k, i].astype(out_type) * B[k, j].astype(out_type), axis=k)) if (layout == 'TN'): - return tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k].astype(out_type) * B[j, k].astype(out_type), axis=k)) + return te.compute((N, M), lambda i, j: te.sum(A[i, k].astype(out_type) * B[j, k].astype(out_type), axis=k)) if (layout == 'TT'): - return tvm.compute((N, M), lambda i, j: tvm.sum(A[k, i].astype(out_type) * B[j, k].astype(out_type), axis=k)) + return te.compute((N, M), lambda i, j: te.sum(A[k, i].astype(out_type) * B[j, k].astype(out_type), axis=k)) ############################################################################### # Scheduling the Computation @@ -85,7 +86,7 @@ def matmul_nn(A, B, L, dtype='float16', layout='NN'): # (2) The warp tile size is not 16x16x16 on CUDA9, or not one of {16x16x16, 32x8x16, 8x32x16} on CUDA version >= 10.0. # # In this schedule, storage_align is used to reduce bank conflicts of shared memory. Please refer to this -# `doc `_ +# `doc `_ # for the usage of storage_align primitive. In short, we need to add an offset to some shared memory buffer # to reduce bank conflicts. # According to the `wmma doc `_, @@ -111,11 +112,11 @@ def test_gemm(N, L, M, dtype, layout): else: print ("Unsupported layout:", layout) sys.exit(1); - A = tvm.placeholder(shape_a, name='A', dtype=dtype) - B = tvm.placeholder(shape_b, name='B', dtype=dtype) + A = te.placeholder(shape_a, name='A', dtype=dtype) + B = te.placeholder(shape_b, name='B', dtype=dtype) C = matmul_nn(A, B, L, dtype, layout) - s = tvm.create_schedule(C.op) + s = te.create_schedule(C.op) y, x = s[C].op.axis k = s[C].op.reduce_axis[0] @@ -182,11 +183,11 @@ def test_gemm(N, L, M, dtype, layout): tz, xi = s[C].split(xi, WX) tx, xi = s[C].split(xi, TX) s[C].reorder(yo, xo, tz, ty, tx, yi, xi) - s[C].bind(yo, tvm.thread_axis("blockIdx.y")) - s[C].bind(xo, tvm.thread_axis("blockIdx.x")) - s[C].bind(ty, tvm.thread_axis("threadIdx.y")) - s[C].bind(tz, tvm.thread_axis("threadIdx.z")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(yo, te.thread_axis("blockIdx.y")) + s[C].bind(xo, te.thread_axis("blockIdx.x")) + s[C].bind(ty, te.thread_axis("threadIdx.y")) + s[C].bind(tz, te.thread_axis("threadIdx.z")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) # schedule for CL stage ko, ki = s[CL].split(k, step_k * warp_tile_k) @@ -202,9 +203,9 @@ def test_gemm(N, L, M, dtype, layout): tx, vec = s[AA].split(tx, factor=v) fused = s[AA].fuse(s[AA].op.axis[0], xo) _, ty = s[AA].split(fused, factor=by) - s[AA].bind(ty, tvm.thread_axis("threadIdx.y")) - s[AA].bind(tz, tvm.thread_axis("threadIdx.z")) - s[AA].bind(tx, tvm.thread_axis("threadIdx.x")) + s[AA].bind(ty, te.thread_axis("threadIdx.y")) + s[AA].bind(tz, te.thread_axis("threadIdx.z")) + s[AA].bind(tx, te.thread_axis("threadIdx.x")) # vectorization is very important for float16/int8 inputs s[AA].vectorize(vec) @@ -215,9 +216,9 @@ def test_gemm(N, L, M, dtype, layout): tx, vec = s[BB].split(tx, factor=v) fused = s[BB].fuse(s[BB].op.axis[0], xo) _, ty = s[BB].split(fused, factor=by) - s[BB].bind(ty, tvm.thread_axis("threadIdx.y")) - s[BB].bind(tz, tvm.thread_axis("threadIdx.z")) - s[BB].bind(tx, tvm.thread_axis("threadIdx.x")) + s[BB].bind(ty, te.thread_axis("threadIdx.y")) + s[BB].bind(tz, te.thread_axis("threadIdx.z")) + s[BB].bind(tx, te.thread_axis("threadIdx.x")) s[BB].vectorize(vec) s[AL].compute_at(s[CL], kl) @@ -286,7 +287,7 @@ def tune_and_evaluate(M, N, L, dtype, layout): print(best_config) with autotvm.apply_history_best('matmul.log'): with tvm.target.create("cuda"): - with tvm.build_config(): + with tvm.target.build_config(): s, arg_bufs = test_gemm(N, L, M, dtype, layout) print(tvm.lower(s, arg_bufs, simple_mode=True)) func = tvm.build(s, arg_bufs) diff --git a/tutorials/relay_quick_start.py b/tutorials/relay_quick_start.py index d272a0e..b258d1b 100644 --- a/tutorials/relay_quick_start.py +++ b/tutorials/relay_quick_start.py @@ -42,6 +42,7 @@ import numpy as np from tvm import relay from tvm.relay import testing import tvm +from tvm import te from tvm.contrib import graph_runtime ###################################################################### diff --git a/tutorials/tensor_expr_get_started.py b/tutorials/tensor_expr_get_started.py index ca92b3b..ecd3f2b 100644 --- a/tutorials/tensor_expr_get_started.py +++ b/tutorials/tensor_expr_get_started.py @@ -28,6 +28,7 @@ the tensor expression language. from __future__ import absolute_import, print_function import tvm +from tvm import te import numpy as np # Global declarations of environment. @@ -62,10 +63,10 @@ tgt="cuda" # No computation happens during this phase, as we are only declaring how # the computation should be done. # -n = tvm.var("n") -A = tvm.placeholder((n,), name='A') -B = tvm.placeholder((n,), name='B') -C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") +n = te.var("n") +A = te.placeholder((n,), name='A') +B = te.placeholder((n,), name='B') +C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") print(type(C)) ###################################################################### @@ -88,7 +89,7 @@ print(type(C)) # C[i] = A[i] + B[i]; # } # -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) ###################################################################### # We used the split construct to split the first axis of C, @@ -114,8 +115,8 @@ bx, tx = s[C].split(C.op.axis[0], factor=64) # to generate code that runs on GPU. # if tgt == "cuda" or tgt == "rocm" or tgt.startswith('opencl'): - s[C].bind(bx, tvm.thread_axis("blockIdx.x")) - s[C].bind(tx, tvm.thread_axis("threadIdx.x")) + s[C].bind(bx, te.thread_axis("blockIdx.x")) + s[C].bind(tx, te.thread_axis("threadIdx.x")) ###################################################################### # Compilation @@ -188,7 +189,7 @@ else: # arrays with different shapes into fadd, an error will be raised. # # We can do more specializations. For example, we can write -# :code:`n = tvm.convert(1024)` instead of :code:`n = tvm.var("n")`, +# :code:`n = tvm.runtime.convert(1024)` instead of :code:`n = te.var("n")`, # in the computation declaration. The generated function will # only take vectors with length 1024. # diff --git a/tutorials/topi/intro_topi.py b/tutorials/topi/intro_topi.py index 2e04982..5bb5f0a 100644 --- a/tutorials/topi/intro_topi.py +++ b/tutorials/topi/intro_topi.py @@ -26,6 +26,7 @@ In this tutorial, we will see how TOPI can save us from writing boilerplates cod from __future__ import absolute_import, print_function import tvm +from tvm import te import topi import numpy as np @@ -36,12 +37,12 @@ import numpy as np # To compute the sum of rows of a two dimensional TVM tensor A, we should # specify the symbolic operation as well as schedule as follows # -n = tvm.var("n") -m = tvm.var("m") -A = tvm.placeholder((n, m), name='A') -k = tvm.reduce_axis((0, m), "k") -B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="B") -s = tvm.create_schedule(B.op) +n = te.var("n") +m = te.var("m") +A = te.placeholder((n, m), name='A') +k = te.reduce_axis((0, m), "k") +B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B") +s = te.create_schedule(B.op) ###################################################################### # and to examine the IR code in human readable format, we can do @@ -50,11 +51,11 @@ print(tvm.lower(s, [A], simple_mode=True)) ###################################################################### # However, for such a common operation we had to define the reduce axis ourselves as well as explicit computation with -# :code:`tvm.compute`. Imagine for more complicated operations how much details we need to provide. +# :code:`te.compute`. Imagine for more complicated operations how much details we need to provide. # Fortunately, we can replace those two lines with simple :code:`topi.sum` much like :code:`numpy.sum` # C = topi.sum(A, axis=1) -ts = tvm.create_schedule(C.op) +ts = te.create_schedule(C.op) print(tvm.lower(ts, [A], simple_mode=True)) ###################################################################### @@ -64,8 +65,8 @@ print(tvm.lower(ts, [A], simple_mode=True)) # Even shorter, TOPI provides operator overloading for such common operations. For example, # x, y = 100, 10 -a = tvm.placeholder((x, y, y), name="a") -b = tvm.placeholder((y, y), name="b") +a = te.placeholder((x, y, y), name="a") +b = te.placeholder((y, y), name="b") c = a + b # same as topi.broadcast_add d = a * b # same as topi.broadcast_mul @@ -110,7 +111,7 @@ tvm.testing.assert_allclose(g_nd.asnumpy(), g_np, rtol=1e-5) ###################################################################### # TOPI also provides common neural nets operations such as _softmax_ with optimized schedule # -tarray = tvm.placeholder((512, 512), name="tarray") +tarray = te.placeholder((512, 512), name="tarray") softmax_topi = topi.nn.softmax(tarray) with tvm.target.create("cuda"): sst = topi.cuda.schedule_softmax(softmax_topi) @@ -129,8 +130,8 @@ with tvm.target.create("cuda"): # compute declaration and schedule. TVM will choose the right function to call with # the target information. -data = tvm.placeholder((1, 3, 224, 224)) -kernel = tvm.placeholder((10, 3, 5, 5)) +data = te.placeholder((1, 3, 224, 224)) +kernel = te.placeholder((10, 3, 5, 5)) with tvm.target.create("cuda"): conv = topi.cuda.conv2d_nchw(data, kernel, 1, 2, 1) diff --git a/vta/apps/gemm/python/tsim.py b/vta/apps/gemm/python/tsim.py index c0f7b13..85fd463 100644 --- a/vta/apps/gemm/python/tsim.py +++ b/vta/apps/gemm/python/tsim.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import ctypes import os.path as osp from sys import platform diff --git a/vta/apps/gemm/tests/python/chisel_accel.py b/vta/apps/gemm/tests/python/chisel_accel.py index 4666661..441f36d 100644 --- a/vta/apps/gemm/tests/python/chisel_accel.py +++ b/vta/apps/gemm/tests/python/chisel_accel.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np import tsim import sys @@ -32,7 +33,7 @@ C: 2d matrix where each cloumn (because of bit packing) represents each bit slic """ def slice(A, slice_width): assert np.log2(slice_width) % 1 == 0, "only power of 2 is supported" - dtype = type(A[0]) + dtype = type(A[0]) row = 0 # currently only supports uint if dtype is np.uint8: row = 8 // slice_width @@ -45,7 +46,7 @@ def slice(A, slice_width): else: dtype = 'uint8' - C = np.zeros((row, len(A))).astype(dtype) # sliced and transform + C = np.zeros((row, len(A))).astype(dtype) # sliced and transform # create mask slice_mask = 2**(slice_width)-1 @@ -57,7 +58,7 @@ def slice(A, slice_width): def slice_mat(A, slice_width): assert np.log2(slice_width) % 1 == 0, "only power of 2 is supported" - dtype = type(A[0][0]) + dtype = type(A[0][0]) row = 0 # currently only supports uint if dtype is np.uint8: row = 8 // slice_width @@ -71,7 +72,7 @@ def slice_mat(A, slice_width): dtype = 'uint8' # 3d array (bits, row, clmn) - C = np.zeros((row, A.shape[0], A.shape[1])).astype(dtype) # sliced and transform + C = np.zeros((row, A.shape[0], A.shape[1])).astype(dtype) # sliced and transform # create mask slice_mask = 2**(slice_width)-1 @@ -162,16 +163,16 @@ def test_accel(A, B, i_width, w_width): for i in range(len(a_arr)): for j in range(len(b_arr)): shift = np.uint8(i*i_width + j*w_width) - if i == 0 and j == 0: + if i == 0 and j == 0: cycles += f(b_arr[j], a_arr[i], shift, accum, np.uint32(1)) # reset accumulator - else: + else: cycles += f(b_arr[j], a_arr[i], shift, accum, np.uint32(0)) # no reset return (accum.asnumpy(), cycles) """ Matrix Generator Parameters ----------- +---------- dtype : String, datatype generated (supports only uint) i_width : weight bit slices(needs to be less than actual bit width) w_width : activation bit slices(needs to be less than actual bit width) @@ -179,9 +180,9 @@ w_width : activation bit slices(needs to be less than actual bit width) def top_test(dtype, i_width, w_width): # only supports positive values (up to 2**(bits-1)) - rmax = 127 + rmax = 127 # (m,16) * (16,16) GEMM - rrow = np.random.randint(7) + 1 + rrow = np.random.randint(7) + 1 clmn = 16 A = np.random.randint(rmax, size=(rrow,clmn)).astype(dtype) B = np.random.randint(rmax, size=(clmn,clmn)).astype(dtype) @@ -196,8 +197,8 @@ if __name__ == "__main__": for i in range(1): # reg1 and reg2 bits in hardware/chisel/src/main/Compute.scala must be modified for slices greater than 8 bits if sys.argv[1] == 'serial': - # generates a random uint8 GEMM with 2-bit(8/4) input and 4-bit(8/2) weight + # generates a random uint8 GEMM with 2-bit(8/4) input and 4-bit(8/2) weight top_test("uint8", 4, 2) elif sys.argv[1] == 'parallel': - # generates a random uint8 GEMM with 8-bit input and 8-bit weight (bit parallel) + # generates a random uint8 GEMM with 8-bit input and 8-bit weight (bit parallel) top_test('uint8', 8, 8) diff --git a/vta/apps/tsim_example/python/tsim.py b/vta/apps/tsim_example/python/tsim.py index c0f7b13..85fd463 100644 --- a/vta/apps/tsim_example/python/tsim.py +++ b/vta/apps/tsim_example/python/tsim.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import ctypes import os.path as osp from sys import platform diff --git a/vta/apps/tsim_example/tests/python/chisel_accel.py b/vta/apps/tsim_example/tests/python/chisel_accel.py index 1749aaa..370ac40 100644 --- a/vta/apps/tsim_example/tests/python/chisel_accel.py +++ b/vta/apps/tsim_example/tests/python/chisel_accel.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np import tsim diff --git a/vta/apps/tsim_example/tests/python/verilog_accel.py b/vta/apps/tsim_example/tests/python/verilog_accel.py index 578a7c3..3489ff2 100644 --- a/vta/apps/tsim_example/tests/python/verilog_accel.py +++ b/vta/apps/tsim_example/tests/python/verilog_accel.py @@ -16,6 +16,7 @@ # under the License. import tvm +from tvm import te import numpy as np import tsim diff --git a/vta/python/vta/build_module.py b/vta/python/vta/build_module.py index 4a62d35..4c33d36 100644 --- a/vta/python/vta/build_module.py +++ b/vta/python/vta/build_module.py @@ -16,8 +16,6 @@ # under the License. # pylint: disable=unused-argument """VTA specific buildin for runtime.""" -from __future__ import absolute_import as _abs - import tvm from . import ir_pass from .environment import get_env @@ -26,13 +24,13 @@ from .environment import get_env def lift_coproc_scope(x): """Lift coprocessings cope to the """ x = ir_pass.lift_alloc_to_scope_begin(x) - x = tvm.ir_pass.LiftAttrScope(x, "coproc_scope", False) + x = tvm.tir.ir_pass.LiftAttrScope(x, "coproc_scope", False) return x def early_rewrite(stmt): """Try to do storage rewrite in early pass.""" try: - return tvm.ir_pass.StorageRewrite(stmt) + return tvm.tir.ir_pass.StorageRewrite(stmt) except tvm.error.TVMError: return stmt @@ -63,7 +61,7 @@ def build_config(debug_flag=0, **kwargs): """ env = get_env() def add_debug(stmt): - debug = tvm.call_extern( + debug = tvm.tir.call_extern( "int32", "VTASetDebugMode", env.dev.command_handle, debug_flag) @@ -73,17 +71,17 @@ def build_config(debug_flag=0, **kwargs): (1, ir_pass.inject_dma_intrin), (1, ir_pass.inject_skip_copy), (1, ir_pass.annotate_alu_coproc_scope), - (1, lambda x: tvm.ir_pass.LiftAttrScope(x, "coproc_uop_scope", True)), + (1, lambda x: tvm.tir.ir_pass.LiftAttrScope(x, "coproc_uop_scope", True)), (1, lift_coproc_scope), (1, ir_pass.inject_coproc_sync), (1, early_rewrite)] if debug_flag: pass_list.append((1, add_debug)) pass_list.append((2, ir_pass.inject_alu_intrin)) - pass_list.append((3, tvm.ir_pass.LowerStorageAccessInfo)) + pass_list.append((3, tvm.tir.ir_pass.LowerStorageAccessInfo)) pass_list.append((3, ir_pass.fold_uop_loop)) pass_list.append((3, ir_pass.cpu_access_rewrite)) - return tvm.build_config(add_lower_pass=pass_list, **kwargs) + return tvm.target.build_config(add_lower_pass=pass_list, **kwargs) def lower(*args, **kwargs): diff --git a/vta/python/vta/environment.py b/vta/python/vta/environment.py index 8d58958..49b78b3 100644 --- a/vta/python/vta/environment.py +++ b/vta/python/vta/environment.py @@ -22,6 +22,7 @@ import os import json import copy import tvm +from tvm import te from . import intrin from .pkg_config import PkgConfig @@ -61,9 +62,9 @@ class DevContext(object): QID_COMPUTE = 2 def __init__(self, env): - self.vta_axis = tvm.thread_axis("vta") + self.vta_axis = te.thread_axis("vta") self.vta_push_uop = tvm.tir.StringImm("VTAPushGEMMOp") - ctx = tvm.call_extern("handle", "VTATLSCommandHandle") + ctx = tvm.tir.call_extern("handle", "VTATLSCommandHandle") self.command_handle = tvm.tir.Call( "handle", "tvm_thread_context", [ctx], tvm.tir.Call.Intrinsic, None, 0) @@ -284,14 +285,14 @@ def mem_info_acc_buffer(): @tvm.register_func("tvm.intrin.rule.default.vta.coproc_sync") def coproc_sync(op): _ = op - return tvm.call_extern( + return tvm.tir.call_extern( "int32", "VTASynchronize", get_env().dev.command_handle, 1<<31) @tvm.register_func("tvm.intrin.rule.default.vta.coproc_dep_push") def coproc_dep_push(op): - return tvm.call_extern( + return tvm.tir.call_extern( "int32", "VTADepPush", get_env().dev.command_handle, op.args[0], op.args[1]) @@ -299,7 +300,7 @@ def coproc_dep_push(op): @tvm.register_func("tvm.intrin.rule.default.vta.coproc_dep_pop") def coproc_dep_pop(op): - return tvm.call_extern( + return tvm.tir.call_extern( "int32", "VTADepPop", get_env().dev.command_handle, op.args[0], op.args[1]) diff --git a/vta/python/vta/intrin.py b/vta/python/vta/intrin.py index a43fc75..8532ffa 100644 --- a/vta/python/vta/intrin.py +++ b/vta/python/vta/intrin.py @@ -18,6 +18,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te def gemm(env, mock=False): """Matrix-matrix multiply intrinsic @@ -45,26 +46,26 @@ def gemm(env, mock=False): out_shape = (env.BATCH, env.BLOCK_OUT) assert out_shape[0] * out_shape[1] == out_lanes - wgt = tvm.placeholder((wgt_shape[0], wgt_shape[1]), - dtype="int%d" % env.WGT_WIDTH, - name=env.wgt_scope) - inp = tvm.placeholder((inp_shape[0], inp_shape[1]), - dtype="int%d" % env.INP_WIDTH, - name=env.inp_scope) - k = tvm.reduce_axis((0, wgt_shape[1]), name="k") + wgt = te.placeholder((wgt_shape[0], wgt_shape[1]), + dtype="int%d" % env.WGT_WIDTH, + name=env.wgt_scope) + inp = te.placeholder((inp_shape[0], inp_shape[1]), + dtype="int%d" % env.INP_WIDTH, + name=env.inp_scope) + k = te.reduce_axis((0, wgt_shape[1]), name="k") out_dtype = "int%d" % env.ACC_WIDTH - out = tvm.compute((out_shape[0], out_shape[1]), - lambda i, j: tvm.sum(inp[i, k].astype(out_dtype) * - wgt[j, k].astype(out_dtype), - axis=[k]), - name="out") - wgt_layout = tvm.decl_buffer( + out = te.compute((out_shape[0], out_shape[1]), + lambda i, j: te.sum(inp[i, k].astype(out_dtype) * + wgt[j, k].astype(out_dtype), + axis=[k]), + name="out") + wgt_layout = tvm.tir.decl_buffer( wgt.shape, wgt.dtype, env.wgt_scope, scope=env.wgt_scope, offset_factor=wgt_lanes, data_alignment=wgt_lanes) - inp_layout = tvm.decl_buffer( + inp_layout = tvm.tir.decl_buffer( inp.shape, inp.dtype, env.inp_scope, scope=env.inp_scope, offset_factor=inp_lanes, data_alignment=inp_lanes) - out_layout = tvm.decl_buffer( + out_layout = tvm.tir.decl_buffer( out.shape, out.dtype, env.acc_scope, scope=env.acc_scope, offset_factor=out_lanes, data_alignment=out_lanes) @@ -74,14 +75,14 @@ def gemm(env, mock=False): dout = outs[0] def instr(index): """Generate matrix-matrix multiply VTA instruction""" - irb = tvm.ir_builder.create() + irb = tvm.tir.ir_builder.create() dev = env.dev irb.scope_attr(dev.vta_axis, "coproc_scope", dev.get_task_qid(dev.QID_COMPUTE)) irb.scope_attr(dev.vta_axis, "coproc_uop_scope", dev.vta_push_uop) if index in (0, 2): - irb.emit(tvm.call_extern( + irb.emit(tvm.tir.call_extern( "int32", "VTAUopPush", 0, 0, dout.access_ptr("rw", "int32"), @@ -89,7 +90,7 @@ def gemm(env, mock=False): dwgt.access_ptr("r", "int32"), 0, 0, 0)) else: - irb.emit(tvm.call_extern( + irb.emit(tvm.tir.call_extern( "int32", "VTAUopPush", 0, 1, dout.access_ptr("rw", "int32"), @@ -103,8 +104,8 @@ def gemm(env, mock=False): return (nop, nop, nop) return (instr(0), instr(1), instr(2)) - return tvm.decl_tensor_intrin(out.op, intrin_func, - name="GEMM", - binds={inp: inp_layout, - wgt: wgt_layout, - out: out_layout}) + return te.decl_tensor_intrin(out.op, intrin_func, + name="GEMM", + binds={inp: inp_layout, + wgt: wgt_layout, + out: out_layout}) diff --git a/vta/python/vta/ir_pass.py b/vta/python/vta/ir_pass.py index 0c9b2ea..4f8deff 100644 --- a/vta/python/vta/ir_pass.py +++ b/vta/python/vta/ir_pass.py @@ -17,6 +17,7 @@ """Additional IR Pass for VTA""" # pylint: disable=len-as-condition, no-else-return import tvm +from tvm import te from topi import util from .environment import get_env @@ -82,7 +83,7 @@ def fold_uop_loop(stmt_in): fail[0] = True return op if gemm_offsets[i] is not None: - if not tvm.ir_pass.Equal(m[0], gemm_offsets[i]): + if not tvm.tir.ir_pass.Equal(m[0], gemm_offsets[i]): fail[0] = True return op args.append(m[1]) @@ -90,23 +91,23 @@ def fold_uop_loop(stmt_in): gemm_offsets[i] = m[0] args.append(m[1]) args += op.args[base_args+3:] - return tvm.call_extern("int32", "VTAUopPush", *args) + return tvm.tir.call_extern("int32", "VTAUopPush", *args) if op.name not in ("VTATLSCommandHandle", "tvm_thread_context"): raise RuntimeError("unexpected op %s" % op) return op - ret = tvm.ir_pass.IRTransform( + ret = tvm.tir.ir_pass.IRTransform( stmt.body, None, _post_order, ["Call"]) if not fail[0] and all(x is not None for x in gemm_offsets): def _visit(op): if op.same_as(loop_var): fail[0] = True - tvm.ir_pass.PostOrderVisit(ret, _visit) + tvm.tir.ir_pass.PostOrderVisit(ret, _visit) if not fail[0]: - begin = tvm.call_extern( + begin = tvm.tir.call_extern( "int32", "VTAUopLoopBegin", stmt.extent, *gemm_offsets) - end = tvm.call_extern("int32", "VTAUopLoopEnd") + end = tvm.tir.call_extern("int32", "VTAUopLoopEnd") return [begin, ret, end] raise ValueError("Failed to fold the GEMM instructions..") @@ -137,7 +138,7 @@ def fold_uop_loop(stmt_in): return tvm.tir.AttrStmt( stmt.node, stmt.attr_key, stmt.value, body) return None - out = tvm.ir_pass.IRTransform( + out = tvm.tir.ir_pass.IRTransform( stmt_in, _do_fold, None, ["AttrStmt"]) return out @@ -169,7 +170,7 @@ def cpu_access_rewrite(stmt_in): return None new_var = rw_info[buffer_var] let_stmt = tvm.tir.LetStmt( - new_var, tvm.call_extern( + new_var, tvm.tir.call_extern( "handle", "VTABufferCPUPtr", env.dev.command_handle, buffer_var), op.body) @@ -181,23 +182,23 @@ def cpu_access_rewrite(stmt_in): if isinstance(op, tvm.tir.Load): buffer_var = op.buffer_var if not buffer_var in rw_info: - rw_info[buffer_var] = tvm.var( + rw_info[buffer_var] = te.var( buffer_var.name + "_ptr", "handle") new_var = rw_info[buffer_var] return tvm.tir.Load(op.dtype, new_var, op.index) if isinstance(op, tvm.tir.Store): buffer_var = op.buffer_var if not buffer_var in rw_info: - rw_info[buffer_var] = tvm.var( + rw_info[buffer_var] = te.var( buffer_var.name + "_ptr", "handle") new_var = rw_info[buffer_var] return tvm.tir.Store(new_var, op.value, op.index) raise RuntimeError("not reached") - stmt = tvm.ir_pass.IRTransform( + stmt = tvm.tir.ir_pass.IRTransform( stmt_in, None, _post_order, ["Allocate", "Load", "Store"]) for buffer_var, new_var in rw_info.items(): stmt = tvm.tir.LetStmt( - new_var, tvm.call_extern( + new_var, tvm.tir.call_extern( "handle", "VTABufferCPUPtr", env.dev.command_handle, buffer_var), stmt) @@ -259,7 +260,7 @@ def lift_alloc_to_scope_begin(stmt_in): if isinstance(op, tvm.tir.For): return _merge_block(lift_stmt.pop() + [op], op.body) raise RuntimeError("not reached") - stmt = tvm.ir_pass.IRTransform( + stmt = tvm.tir.ir_pass.IRTransform( stmt_in, _pre_order, _post_order, ["Allocate", "AttrStmt", "For"]) assert len(lift_stmt) == 1 return _merge_block(lift_stmt[0], stmt) @@ -282,7 +283,7 @@ def inject_skip_copy(stmt_in): if _match_pragma(stmt, "skip_dma_copy"): return tvm.tir.Evaluate(0) return None - return tvm.ir_pass.IRTransform( + return tvm.tir.ir_pass.IRTransform( stmt_in, _do_fold, None, ["AttrStmt"]) @@ -313,9 +314,9 @@ def inject_coproc_sync(stmt_in): op.loop_var, op.min, 2, op.for_type, op.device_api, op.body) return None - stmt = tvm.ir_pass.IRTransform( + stmt = tvm.tir.ir_pass.IRTransform( stmt_in, None, _do_fold, ["AttrStmt"]) - stmt = tvm.ir_pass.CoProcSync(stmt) + stmt = tvm.tir.ir_pass.CoProcSync(stmt) return stmt @@ -333,12 +334,12 @@ def inject_dma_intrin(stmt_in): Transformed statement """ env = get_env() - idxd = tvm.indexdiv - idxm = tvm.indexmod + idxd = tvm.tir.indexdiv + idxm = tvm.tir.indexmod def _check_compact(buf): ndim = len(buf.shape) - size = tvm.const(1, buf.shape[0].dtype) + size = tvm.tir.const(1, buf.shape[0].dtype) for i in reversed(range(ndim)): if not util.equal_const_int(size - buf.strides[i], 0): raise RuntimeError( @@ -380,7 +381,7 @@ def inject_dma_intrin(stmt_in): break x_size = x_size * buf.shape[k] next_base = i + 1 - shape.append(tvm.ir_pass.Simplify(x_size)) + shape.append(tvm.tir.ir_pass.Simplify(x_size)) strides.append(x_stride) assert next_base != base base = next_base @@ -491,10 +492,10 @@ def inject_dma_intrin(stmt_in): _check_compact(src) x_size, y_size, x_stride, offset = _get_2d_pattern( dst, elem_width, elem_bytes, data_type, src.scope, allow_fold=True) - irb = tvm.ir_builder.create() + irb = tvm.tir.ir_builder.create() irb.scope_attr(env.dev.vta_axis, "coproc_scope", env.dev.get_task_qid(task_qid)) - irb.emit(tvm.call_extern( + irb.emit(tvm.tir.call_extern( "int32", "VTAStoreBuffer2D", env.dev.command_handle, src.access_ptr("r", "int32"), @@ -561,11 +562,11 @@ def inject_dma_intrin(stmt_in): src, elem_width, elem_bytes, data_type, dst.scope, allow_fold=allow_fold) - irb = tvm.ir_builder.create() + irb = tvm.tir.ir_builder.create() irb.scope_attr(env.dev.vta_axis, "coproc_scope", env.dev.get_task_qid(task_qid)) - irb.emit(tvm.call_extern( + irb.emit(tvm.tir.call_extern( "int32", "VTALoadBuffer2D", env.dev.command_handle, src.data, offset, x_size, y_size, x_stride, @@ -577,7 +578,7 @@ def inject_dma_intrin(stmt_in): else: raise RuntimeError("Do not support copy %s->%s" % (src.scope, dst.scope)) - return tvm.ir_pass.InjectCopyIntrin(stmt_in, "dma_copy", _inject_copy) + return tvm.tir.ir_pass.InjectCopyIntrin(stmt_in, "dma_copy", _inject_copy) def _get_gemm_intrin_buffer(): @@ -594,26 +595,26 @@ def _get_gemm_intrin_buffer(): assert out_lanes == env.BATCH * env.BLOCK_OUT out_shape = (env.BATCH, env.BLOCK_OUT) assert out_shape[0] * out_shape[1] == out_lanes - wgt = tvm.placeholder((wgt_shape[0], wgt_shape[1]), - dtype="int%d" % env.WGT_WIDTH, - name=env.wgt_scope) - inp = tvm.placeholder((inp_shape[0], inp_shape[1]), - dtype="int%d" % env.INP_WIDTH, - name=env.inp_scope) - k = tvm.reduce_axis((0, wgt_shape[1]), name="k") + wgt = te.placeholder((wgt_shape[0], wgt_shape[1]), + dtype="int%d" % env.WGT_WIDTH, + name=env.wgt_scope) + inp = te.placeholder((inp_shape[0], inp_shape[1]), + dtype="int%d" % env.INP_WIDTH, + name=env.inp_scope) + k = te.reduce_axis((0, wgt_shape[1]), name="k") out_dtype = "int%d" % env.ACC_WIDTH - out = tvm.compute((out_shape[0], out_shape[1]), - lambda i, j: tvm.sum(inp[i, k].astype(out_dtype) * - wgt[j, k].astype(out_dtype), - axis=[k]), - name="out") - wgt_layout = tvm.decl_buffer( + out = te.compute((out_shape[0], out_shape[1]), + lambda i, j: te.sum(inp[i, k].astype(out_dtype) * + wgt[j, k].astype(out_dtype), + axis=[k]), + name="out") + wgt_layout = tvm.tir.decl_buffer( wgt.shape, wgt.dtype, env.wgt_scope, scope=env.wgt_scope, offset_factor=wgt_lanes, data_alignment=wgt_lanes) - inp_layout = tvm.decl_buffer( + inp_layout = tvm.tir.decl_buffer( inp.shape, inp.dtype, env.inp_scope, scope=env.inp_scope, offset_factor=inp_lanes, data_alignment=inp_lanes) - out_layout = tvm.decl_buffer( + out_layout = tvm.tir.decl_buffer( out.shape, out.dtype, env.acc_scope, scope=env.acc_scope, offset_factor=out_lanes, data_alignment=out_lanes) @@ -648,30 +649,30 @@ def inject_conv2d_transpose_skip(stmt_in): def _do_fold(op): if _match_pragma(op, "conv2d_transpose_gemm"): is_init = ".init" in str(op) - tvm.ir_pass.PostOrderVisit(op, _find_basics) + tvm.tir.ir_pass.PostOrderVisit(op, _find_basics) if is_init: # create inner most block - irb = tvm.ir_builder.create() + irb = tvm.tir.ir_builder.create() dev = env.dev irb.scope_attr(dev.vta_axis, "coproc_scope", dev.get_task_qid(dev.QID_COMPUTE)) irb.scope_attr(dev.vta_axis, "coproc_uop_scope", dev.vta_push_uop) - irb.emit(tvm.call_extern("int32", "VTAUopPush", - 0, 1, - dout.access_ptr("rw", "int32"), - 0, 0, - 0, 0, 0)) + irb.emit(tvm.tir.call_extern("int32", "VTAUopPush", + 0, 1, + dout.access_ptr("rw", "int32"), + 0, 0, + 0, 0, 0)) inner = irb.get() # TODO(@tmoreau89): This is only a temporary fix, please take a look. body = op.body.body - while isinstance(body, tvm.stmt.IfThenElse): + while isinstance(body, tvm.tir.IfThenElse): body = body.then_case args = body.args res_tensor = body.func.output(0) tpl = (args[0], 1, args[1], 1, args[2], 1, args[3], 1, 0, 1, 0, env.BLOCK_OUT) inner = tvm.tir.AttrStmt( [dout, res_tensor], 'buffer_bind_scope', - tvm.call_intrin('handle', 'tvm_tuple', *tpl), inner) + tvm.tir.call_intrin('handle', 'tvm_tuple', *tpl), inner) return inner else: conv_call, data_call, kernel_call = calls[-3:] @@ -682,20 +683,20 @@ def inject_conv2d_transpose_skip(stmt_in): if selects: condition = selects[0].condition else: - condition = tvm.const(1, 'int') + condition = tvm.tir.const(1, 'int') # create inner most block - irb = tvm.ir_builder.create() + irb = tvm.tir.ir_builder.create() with irb.if_scope(condition): dev = env.dev irb.scope_attr(dev.vta_axis, "coproc_scope", dev.get_task_qid(dev.QID_COMPUTE)) irb.scope_attr(dev.vta_axis, "coproc_uop_scope", dev.vta_push_uop) - irb.emit(tvm.call_extern("int32", "VTAUopPush", - 0, 0, - dout.access_ptr("rw", "int32"), - dinp.access_ptr("r", "int32"), - dwgt.access_ptr("r", "int32"), - 0, 0, 0)) + irb.emit(tvm.tir.call_extern("int32", "VTAUopPush", + 0, 0, + dout.access_ptr("rw", "int32"), + dinp.access_ptr("r", "int32"), + dwgt.access_ptr("r", "int32"), + 0, 0, 0)) inner = irb.get() args = conv_call.args @@ -703,22 +704,22 @@ def inject_conv2d_transpose_skip(stmt_in): 1, 0, 1, 0, env.BLOCK_OUT) inner = tvm.tir.AttrStmt( [dout, res_tensor], 'buffer_bind_scope', - tvm.call_intrin('handle', 'tvm_tuple', *tpl), inner) + tvm.tir.call_intrin('handle', 'tvm_tuple', *tpl), inner) args = kernel_call.args tpl = (args[0], 1, args[1], 1, args[2], 1, args[3], 1, 0, env.BLOCK_OUT, 0, env.BLOCK_IN) inner = tvm.tir.AttrStmt( [dwgt, kernel_tensor], 'buffer_bind_scope', - tvm.call_intrin('handle', 'tvm_tuple', *tpl), inner) + tvm.tir.call_intrin('handle', 'tvm_tuple', *tpl), inner) args = data_call.args tpl = (args[0], 1, args[1], 1, args[2], 1, args[3], 1, 0, 1, 0, env.BLOCK_IN) inner = tvm.tir.AttrStmt( [dinp, pad_data_tensor], 'buffer_bind_scope', - tvm.call_intrin('handle', 'tvm_tuple', *tpl), inner) + tvm.tir.call_intrin('handle', 'tvm_tuple', *tpl), inner) return inner return None - ret = tvm.ir_pass.IRTransform( + ret = tvm.tir.ir_pass.IRTransform( stmt_in, _do_fold, None, ["AttrStmt"]) return ret @@ -739,7 +740,7 @@ def annotate_alu_coproc_scope(stmt_in): env = get_env() def _do_fold(stmt): if _match_pragma(stmt, "alu"): - irb = tvm.ir_builder.create() + irb = tvm.tir.ir_builder.create() irb.scope_attr(env.dev.vta_axis, "coproc_scope", env.dev.get_task_qid(env.dev.QID_COMPUTE)) irb.scope_attr(env.dev.vta_axis, "coproc_uop_scope", @@ -750,7 +751,7 @@ def annotate_alu_coproc_scope(stmt_in): return tvm.tir.Evaluate(0) return stmt - stmt_out = tvm.ir_pass.IRTransform( + stmt_out = tvm.tir.ir_pass.IRTransform( stmt_in, None, _do_fold, ["AttrStmt"]) return stmt_out @@ -770,11 +771,11 @@ def inject_alu_intrin(stmt_in): Transformed statement """ env = get_env() - idxm = tvm.indexmod + idxm = tvm.tir.indexmod def _do_fold(stmt): def _equal(x, y): - return tvm.ir_pass.Equal(tvm.ir_pass.Simplify(x - y), 0) + return tvm.tir.ir_pass.Equal(tvm.tir.ir_pass.Simplify(x - y), 0) def _flatten_loop(src_coeff, dst_coeff, extents): src_coeff = list(src_coeff) @@ -793,7 +794,7 @@ def inject_alu_intrin(stmt_in): next_ext = extents.pop() if _equal(next_src, vsrc * vext) and _equal(next_dst, vdst * vext): - vext = tvm.ir_pass.Simplify(vext * next_ext) + vext = tvm.tir.ir_pass.Simplify(vext * next_ext) else: rev_src_coeff.append(vsrc) rev_dst_coeff.append(vdst) @@ -853,7 +854,7 @@ def inject_alu_intrin(stmt_in): if loop_body.value.name == 'shift_left': alu_opcode = env.dev.ALU_OPCODE_SHR lhs = loop_body.value.args[0] - rhs = tvm.ir_pass.Simplify(-loop_body.value.args[1]) + rhs = tvm.tir.ir_pass.Simplify(-loop_body.value.args[1]) elif loop_body.value.name == 'shift_right': alu_opcode = env.dev.ALU_OPCODE_SHR lhs = loop_body.value.args[0] @@ -864,7 +865,7 @@ def inject_alu_intrin(stmt_in): elif isinstance(loop_body.value, tvm.tir.Load): alu_opcode = env.dev.ALU_OPCODE_SHR lhs = loop_body.value - rhs = tvm.const(0, "int32") + rhs = tvm.tir.const(0, "int32") else: raise RuntimeError( "Expression not recognized %s, %s, %s" % ( @@ -894,9 +895,9 @@ def inject_alu_intrin(stmt_in): lhs_equal = True rhs_equal = True for i, coef in enumerate(dst_coeff): - if not tvm.ir_pass.Equal(coef, src_lhs_coeff[i]): + if not tvm.tir.ir_pass.Equal(coef, src_lhs_coeff[i]): lhs_equal = False - if not tvm.ir_pass.Equal(coef, src_rhs_coeff[i]): + if not tvm.tir.ir_pass.Equal(coef, src_rhs_coeff[i]): rhs_equal = False # Make sure at least one of the source is identical to the # destination (in-place computation) @@ -915,20 +916,20 @@ def inject_alu_intrin(stmt_in): assert len(src_coeff) > 1 assert len(dst_coeff) > 1 assert len(extents) != 0 - assert tvm.ir_pass.Equal( - tvm.ir_pass.Simplify( + assert tvm.tir.ir_pass.Equal( + tvm.tir.ir_pass.Simplify( idxm(src_coeff[-1], env.BATCH * env.BLOCK_OUT)), 0) - assert tvm.ir_pass.Equal( - tvm.ir_pass.Simplify( + assert tvm.tir.ir_pass.Equal( + tvm.tir.ir_pass.Simplify( idxm(dst_coeff[-1], env.BATCH * env.BLOCK_OUT)), 0) - assert tvm.ir_pass.Equal(src_coeff[-2], 1) - assert tvm.ir_pass.Equal(dst_coeff[-2], 1) + assert tvm.tir.ir_pass.Equal(src_coeff[-2], 1) + assert tvm.tir.ir_pass.Equal(dst_coeff[-2], 1) if env.BATCH > 1: assert len(src_coeff) > 2 assert len(dst_coeff) > 2 assert len(extents) > 1 - assert tvm.ir_pass.Equal(src_coeff[-3], env.BLOCK_OUT) - assert tvm.ir_pass.Equal(dst_coeff[-3], env.BLOCK_OUT) + assert tvm.tir.ir_pass.Equal(src_coeff[-3], env.BLOCK_OUT) + assert tvm.tir.ir_pass.Equal(dst_coeff[-3], env.BLOCK_OUT) # Apply tensorization of the loop coefficients src_offset = src_coeff[-1] @@ -944,22 +945,22 @@ def inject_alu_intrin(stmt_in): src_coeff.append(src_offset) dst_coeff.append(dst_offset) src_coeff = [ - tvm.ir_pass.Simplify(c // (env.BATCH * env.BLOCK_OUT)) for c in src_coeff] + tvm.tir.ir_pass.Simplify(c // (env.BATCH * env.BLOCK_OUT)) for c in src_coeff] dst_coeff = [ - tvm.ir_pass.Simplify(c // (env.BATCH * env.BLOCK_OUT)) for c in dst_coeff] + tvm.tir.ir_pass.Simplify(c // (env.BATCH * env.BLOCK_OUT)) for c in dst_coeff] # Flatten the outer loops if extents: src_coeff, dst_coeff, extents = _flatten_loop(src_coeff, dst_coeff, extents) # Insert ALU micro-ops - irb = tvm.ir_builder.create() + irb = tvm.tir.ir_builder.create() for idx, extent in enumerate(extents): - irb.emit(tvm.call_extern( + irb.emit(tvm.tir.call_extern( "int32", "VTAUopLoopBegin", extent, dst_coeff[idx], src_coeff[idx], 0)) use_imm = int(use_imm) - irb.emit(tvm.call_extern( + irb.emit(tvm.tir.call_extern( "int32", "VTAUopPush", 1, 0, dst_coeff[len(dst_coeff)-1], @@ -967,12 +968,12 @@ def inject_alu_intrin(stmt_in): 0, alu_opcode, use_imm, imm_val)) for extent in extents: - irb.emit(tvm.call_extern( + irb.emit(tvm.tir.call_extern( "int32", "VTAUopLoopEnd")) return irb.get() return stmt - stmt_out = tvm.ir_pass.IRTransform( + stmt_out = tvm.tir.ir_pass.IRTransform( stmt_in, None, _do_fold, ["AttrStmt"]) return stmt_out diff --git a/vta/python/vta/pkg_config.py b/vta/python/vta/pkg_config.py index 0720e2f..0516e83 100644 --- a/vta/python/vta/pkg_config.py +++ b/vta/python/vta/pkg_config.py @@ -193,7 +193,7 @@ class PkgConfig(object): self.inp_mem_size = 1 << cfg["LOG_INP_BUFF_SIZE"] # bytes self.inp_mem_banks = (inp_mem_bus_width + \ max_bus_width - 1) // \ - max_bus_width + max_bus_width self.inp_mem_width = min(inp_mem_bus_width, max_bus_width) self.inp_mem_depth = self.inp_mem_size * 8 // inp_mem_bus_width self.inp_mem_axi_ratio = self.inp_mem_width // mem_bus_width @@ -204,7 +204,7 @@ class PkgConfig(object): self.wgt_mem_size = 1 << cfg["LOG_WGT_BUFF_SIZE"] # bytes self.wgt_mem_banks = (wgt_mem_bus_width + \ max_bus_width - 1) // \ - max_bus_width + max_bus_width self.wgt_mem_width = min(wgt_mem_bus_width, max_bus_width) self.wgt_mem_depth = self.wgt_mem_size * 8 // wgt_mem_bus_width self.wgt_mem_axi_ratio = self.wgt_mem_width // mem_bus_width @@ -215,7 +215,7 @@ class PkgConfig(object): self.out_mem_size = 1 << cfg["LOG_OUT_BUFF_SIZE"] # bytes self.out_mem_banks = (out_mem_bus_width + \ max_bus_width - 1) // \ - max_bus_width + max_bus_width self.out_mem_width = min(out_mem_bus_width, max_bus_width) self.out_mem_depth = self.out_mem_size * 8 // out_mem_bus_width self.out_mem_axi_ratio = self.out_mem_width // mem_bus_width @@ -235,23 +235,23 @@ class PkgConfig(object): self.macro_defs.append("-DVTA_STORE_ADDR=%s" % (self.store_base_addr)) # IP register offsets self.macro_defs.append("-DVTA_FETCH_INSN_COUNT_OFFSET=%s" % \ - (self.fetch_insn_count_offset)) + (self.fetch_insn_count_offset)) self.macro_defs.append("-DVTA_FETCH_INSN_ADDR_OFFSET=%s" % \ - (self.fetch_insn_addr_offset)) + (self.fetch_insn_addr_offset)) self.macro_defs.append("-DVTA_LOAD_INP_ADDR_OFFSET=%s" % \ - (self.load_inp_addr_offset)) + (self.load_inp_addr_offset)) self.macro_defs.append("-DVTA_LOAD_WGT_ADDR_OFFSET=%s" % \ - (self.load_wgt_addr_offset)) + (self.load_wgt_addr_offset)) self.macro_defs.append("-DVTA_COMPUTE_DONE_WR_OFFSET=%s" % \ - (self.compute_done_wr_offet)) + (self.compute_done_wr_offet)) self.macro_defs.append("-DVTA_COMPUTE_DONE_RD_OFFSET=%s" % \ - (self.compute_done_rd_offet)) + (self.compute_done_rd_offet)) self.macro_defs.append("-DVTA_COMPUTE_UOP_ADDR_OFFSET=%s" % \ - (self.compute_uop_addr_offset)) + (self.compute_uop_addr_offset)) self.macro_defs.append("-DVTA_COMPUTE_BIAS_ADDR_OFFSET=%s" % \ - (self.compute_bias_addr_offset)) + (self.compute_bias_addr_offset)) self.macro_defs.append("-DVTA_STORE_OUT_ADDR_OFFSET=%s" % \ - (self.store_out_addr_offset)) + (self.store_out_addr_offset)) # Coherency if coherent: self.macro_defs.append("-DVTA_COHERENT_ACCESSES=true") diff --git a/vta/python/vta/top/bitpack.py b/vta/python/vta/top/bitpack.py index 6e9d57b..7a07100 100644 --- a/vta/python/vta/top/bitpack.py +++ b/vta/python/vta/top/bitpack.py @@ -20,6 +20,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te from topi import util from tvm.relay.op.op import register_compute, register_injective_schedule @@ -59,7 +60,7 @@ def bitpack(data, bits, pack_type="int8", name="bitpack"): def _bitpack(*indices): ret = None - mask = tvm.const((1 << bits) - 1, pack_type) + mask = tvm.tir.const((1 << bits) - 1, pack_type) for k in range(lanes): idx = list(indices) idx[-1] = idx[-1] * lanes + k @@ -67,11 +68,11 @@ def bitpack(data, bits, pack_type="int8", name="bitpack"): if k == 0: ret = elem & mask else: - val = (elem & mask) << tvm.const(k * bits, pack_type) + val = (elem & mask) << tvm.tir.const(k * bits, pack_type) ret = ret | val return ret - return tvm.compute( + return te.compute( oshape, _bitpack, name=name, tag='bitpack') diff --git a/vta/python/vta/top/op.py b/vta/python/vta/top/op.py index 04e14b1..2198ed4 100644 --- a/vta/python/vta/top/op.py +++ b/vta/python/vta/top/op.py @@ -19,6 +19,7 @@ from __future__ import absolute_import as _abs import tvm +from tvm import te import topi from tvm.relay.op import op as reg @@ -42,13 +43,13 @@ def compute_clip_vta(attrs, inputs, output_type): x = inputs[0] a_min = attrs.a_min a_max = attrs.a_max - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - with tvm.tag_scope(topi.tag.ELEMWISE): - x = tvm.compute( - x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute( - x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + with tvm.te.tag_scope(topi.tag.ELEMWISE): + x = te.compute( + x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute( + x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return [x] def clip_strategy_vta(attrs, inputs, out_type, target): diff --git a/vta/python/vta/top/vta_conv2d.py b/vta/python/vta/top/vta_conv2d.py index ba93b05..5b23dde 100644 --- a/vta/python/vta/top/vta_conv2d.py +++ b/vta/python/vta/top/vta_conv2d.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi @@ -44,14 +45,14 @@ def conv2d_packed(cfg, data, kernel, strides, padding, dilation, layout, out_dty ishape = topi.util.get_const_tuple(data.shape) kshape = topi.util.get_const_tuple(kernel.shape) - d_i = tvm.reduce_axis((0, kshape[2]), name='d_i') - d_j = tvm.reduce_axis((0, kshape[3]), name='d_j') - k_o = tvm.reduce_axis((0, ishape[1]), name='k_o') - k_i = tvm.reduce_axis((0, ishape[-1]), name='k_i') + d_i = te.reduce_axis((0, kshape[2]), name='d_i') + d_j = te.reduce_axis((0, kshape[3]), name='d_j') + k_o = te.reduce_axis((0, ishape[1]), name='k_o') + k_i = te.reduce_axis((0, ishape[-1]), name='k_i') hstride, wstride = strides - res = tvm.compute( + res = te.compute( oshape, - lambda b_o, c_o, i, j, b_i, c_i: tvm.sum( + lambda b_o, c_o, i, j, b_i, c_i: te.sum( pad_data[b_o, k_o, i*hstride+d_i, j*wstride+d_j, b_i, k_i].astype(out_dtype) * kernel[c_o, k_o, d_i, d_j, c_i, k_i].astype(out_dtype), axis=[k_o, d_i, d_j, k_i]), @@ -81,7 +82,7 @@ def schedule_conv2d_packed(cfg, outs): else: ewise_ops.append(op) for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.PlaceholderOp): + if isinstance(tensor.op, tvm.te.PlaceholderOp): ewise_inputs.append((op, tensor)) else: _traverse(tensor.op) @@ -92,7 +93,7 @@ def schedule_conv2d_packed(cfg, outs): _traverse(output.op) assert len(conv2d_res) == 1 conv2d_stage = conv2d_res[0].output(0) - s = tvm.create_schedule(output.op) + s = te.create_schedule(output.op) ##### space definition begin ##### b, c_o, x_i, x_j, _, _ = s[conv2d_stage].op.axis @@ -107,7 +108,7 @@ def schedule_conv2d_packed(cfg, outs): ###### space definition end ###### data, kernel = conv2d_stage.op.input_tensors - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: temp = data.op.input_tensors[0] pad_data = data data = temp @@ -160,13 +161,13 @@ def schedule_conv2d_packed(cfg, outs): if cfg['oc_nthread'].val > 1: _, v_t = s[output].split(x_co0, factor=cfg['oc_nthread'].val) s[output].reorder(v_t, x_bo) - s[output].bind(v_t, tvm.thread_axis("cthread")) + s[output].bind(v_t, te.thread_axis("cthread")) # virtual threading along spatial rows if cfg['h_nthread'].val > 1: _, v_t = s[output].split(x_i0, factor=cfg['h_nthread'].val) s[output].reorder(v_t, x_bo) - s[output].bind(v_t, tvm.thread_axis("cthread")) + s[output].bind(v_t, te.thread_axis("cthread")) x_bo, x_co, x_i, x_j, x_bi, x_ci = s[conv2d_stage].op.axis k_o, d_i, d_j, k_i = s[conv2d_stage].op.reduce_axis diff --git a/vta/python/vta/top/vta_conv2d_transpose.py b/vta/python/vta/top/vta_conv2d_transpose.py index a3fd7ac..4f213f6 100644 --- a/vta/python/vta/top/vta_conv2d_transpose.py +++ b/vta/python/vta/top/vta_conv2d_transpose.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi from topi.util import get_const_tuple @@ -52,14 +53,14 @@ def conv2d_transpose_packed(cfg, data, kernel, strides, padding, out_dtype): out_h = (i_h - 1) * stride_h - fpad_top - fpad_bottom + k_h out_w = (i_w - 1) * stride_w - fpad_left - fpad_right + k_w oshape = (b, c_o, out_h, out_w, t_b, t_co) - d_c = tvm.reduce_axis((0, c_i), name='d_c') - d_h = tvm.reduce_axis((0, k_h), name='d_h') - d_w = tvm.reduce_axis((0, k_w), name='d_w') - d_ci = tvm.reduce_axis((0, t_ci), name='d_ci') + d_c = te.reduce_axis((0, c_i), name='d_c') + d_h = te.reduce_axis((0, k_h), name='d_h') + d_w = te.reduce_axis((0, k_w), name='d_w') + d_ci = te.reduce_axis((0, t_ci), name='d_ci') - out = tvm.compute( + out = te.compute( oshape, - lambda i_n, i_c, i_h, i_w, j_n, j_c: tvm.sum( + lambda i_n, i_c, i_h, i_w, j_n, j_c: te.sum( data_pad(i_n, d_c, i_h + d_h, i_w + d_w, j_n, d_ci).astype(out_dtype) * kernel[i_c, d_c, d_h, d_w, j_c, d_ci].astype(out_dtype), axis=[d_c, d_h, d_w, d_ci]), @@ -87,7 +88,7 @@ def schedule_conv2d_transpose_packed(cfg, outs): if not op.same_as(output.op): ewise_ops.append(op) for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.PlaceholderOp): + if isinstance(tensor.op, tvm.te.PlaceholderOp): ewise_inputs.append((op, tensor)) else: _traverse(tensor.op) @@ -98,7 +99,7 @@ def schedule_conv2d_transpose_packed(cfg, outs): _traverse(output.op) assert len(conv2d_res) == 1 conv2d_stage = conv2d_res[0].output(0) - s = tvm.create_schedule(output.op) + s = te.create_schedule(output.op) ##### space definition begin ##### b, c_o, x_i, x_j, _, c_i = s[conv2d_stage].op.axis @@ -113,7 +114,7 @@ def schedule_conv2d_transpose_packed(cfg, outs): ###### space definition end ###### data, kernel = conv2d_stage.op.input_tensors - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: temp = data.op.input_tensors[0] pad_data = data data = temp @@ -162,13 +163,13 @@ def schedule_conv2d_transpose_packed(cfg, outs): if cfg['oc_nthread'].val > 1: _, v_t = s[output].split(x_co0, factor=cfg['oc_nthread'].val) s[output].reorder(v_t, x_bo) - s[output].bind(v_t, tvm.thread_axis("cthread")) + s[output].bind(v_t, te.thread_axis("cthread")) # virtual threading along spatial rows if cfg['h_nthread'].val > 1: _, v_t = s[output].split(x_i0, factor=cfg['h_nthread'].val) s[output].reorder(v_t, x_bo) - s[output].bind(v_t, tvm.thread_axis("cthread")) + s[output].bind(v_t, te.thread_axis("cthread")) x_bo, x_co, x_i, x_j, x_bi, x_ci = s[conv2d_stage].op.axis k_o, d_i, d_j, k_i = s[conv2d_stage].op.reduce_axis diff --git a/vta/python/vta/top/vta_dense.py b/vta/python/vta/top/vta_dense.py index e239104..912f41f 100644 --- a/vta/python/vta/top/vta_dense.py +++ b/vta/python/vta/top/vta_dense.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi @@ -48,11 +49,11 @@ def dense_packed(cfg, data, weight, bias=None, out_dtype=None): # Reduction axes (input channel) assert ishape[1] == wshape[1] assert ishape[3] == wshape[3] - k_o = tvm.reduce_axis((0, ishape[1]), name='k_o') - k_i = tvm.reduce_axis((0, ishape[3]), name='k_i') - res = tvm.compute( + k_o = te.reduce_axis((0, ishape[1]), name='k_o') + k_i = te.reduce_axis((0, ishape[3]), name='k_i') + res = te.compute( oshape, - lambda b_o, c_o, b_i, c_i: tvm.sum( + lambda b_o, c_o, b_i, c_i: te.sum( data[b_o, k_o, b_i, k_i].astype(out_dtype) * weight[c_o, k_o, c_i, k_i].astype(out_dtype), axis=[k_o, k_i]), @@ -83,7 +84,7 @@ def schedule_dense_packed(cfg, outs): else: ewise_ops.append(op) for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.PlaceholderOp): + if isinstance(tensor.op, tvm.te.PlaceholderOp): ewise_inputs.append((op, tensor)) else: _traverse(tensor.op) @@ -94,7 +95,7 @@ def schedule_dense_packed(cfg, outs): _traverse(output.op) assert len(dense_res) == 1 dense_stage = dense_res[0].output(0) - s = tvm.create_schedule(output.op) + s = te.create_schedule(output.op) ##### space definition begin ##### b, c_o, _, _ = s[dense_stage].op.axis @@ -147,7 +148,7 @@ def schedule_dense_packed(cfg, outs): if cfg['oc_nthread'].val > 1: _, v_t = s[output].split(x_co, factor=cfg['oc_nthread'].val) s[output].reorder(v_t, x_bo) - s[output].bind(v_t, tvm.thread_axis("cthread")) + s[output].bind(v_t, te.thread_axis("cthread")) x_bo, x_co, x_bi, _ = s[dense_stage].op.axis k_o, _ = s[dense_stage].op.reduce_axis diff --git a/vta/python/vta/top/vta_group_conv2d.py b/vta/python/vta/top/vta_group_conv2d.py index aa06c61..d470fb7 100644 --- a/vta/python/vta/top/vta_group_conv2d.py +++ b/vta/python/vta/top/vta_group_conv2d.py @@ -19,6 +19,7 @@ import numpy as np import tvm +from tvm import te from tvm import autotvm import topi @@ -54,14 +55,14 @@ def group_conv2d_packed(cfg, kshape = topi.util.get_const_tuple(kernel.shape) assert group * kshape[1] == ishape[1] assert kshape[0] % group == 0 - d_i = tvm.reduce_axis((0, kshape[2]), name='d_i') - d_j = tvm.reduce_axis((0, kshape[3]), name='d_j') - k_o = tvm.reduce_axis((0, kshape[1]), name='k_o') - k_i = tvm.reduce_axis((0, kshape[-1]), name='k_i') + d_i = te.reduce_axis((0, kshape[2]), name='d_i') + d_j = te.reduce_axis((0, kshape[3]), name='d_j') + k_o = te.reduce_axis((0, kshape[1]), name='k_o') + k_i = te.reduce_axis((0, kshape[-1]), name='k_i') hstride, wstride = strides - out = tvm.compute( + out = te.compute( oshape, - lambda b_o, c_o, i, j, b_i, c_i: tvm.sum( + lambda b_o, c_o, i, j, b_i, c_i: te.sum( pad_data[b_o, c_o // (kshape[0] // group) * kshape[1] + k_o, i * hstride + d_i, j * wstride + d_j, b_i, k_i].astype(out_dtype) * kernel[c_o, k_o, d_i, d_j, c_i, k_i].astype(out_dtype), @@ -95,7 +96,7 @@ def schedule_group_conv2d_packed(cfg, outs): else: ewise_ops.append(op) for tensor in op.input_tensors: - if isinstance(tensor.op, tvm.tensor.PlaceholderOp): + if isinstance(tensor.op, tvm.te.PlaceholderOp): ewise_inputs.append((op, tensor)) else: _traverse(tensor.op) @@ -106,7 +107,7 @@ def schedule_group_conv2d_packed(cfg, outs): _traverse(output.op) assert len(conv2d_res) == 1 conv2d_stage = conv2d_res[0].output(0) - s = tvm.create_schedule(output.op) + s = te.create_schedule(output.op) ##### space definition begin ##### b, c_o, x_i, x_j, _, _ = s[conv2d_stage].op.axis @@ -121,7 +122,7 @@ def schedule_group_conv2d_packed(cfg, outs): ###### space definition end ###### data, kernel = conv2d_stage.op.input_tensors - if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag: + if isinstance(data.op, tvm.te.ComputeOp) and "pad" in data.op.tag: temp = data.op.input_tensors[0] pad_data = data data = temp @@ -174,13 +175,13 @@ def schedule_group_conv2d_packed(cfg, outs): if cfg['oc_nthread'].val > 1: _, v_t = s[output].split(x_co0, factor=cfg['oc_nthread'].val) s[output].reorder(v_t, x_bo) - s[output].bind(v_t, tvm.thread_axis("cthread")) + s[output].bind(v_t, te.thread_axis("cthread")) # virtual threading along spatial rows if cfg['h_nthread'].val > 1: _, v_t = s[output].split(x_i0, factor=cfg['h_nthread'].val) s[output].reorder(v_t, x_bo) - s[output].bind(v_t, tvm.thread_axis("cthread")) + s[output].bind(v_t, te.thread_axis("cthread")) x_bo, x_co, x_i, x_j, x_bi, x_ci = s[conv2d_stage].op.axis k_o, d_i, d_j, k_i = s[conv2d_stage].op.reduce_axis diff --git a/vta/scripts/tune_conv2d.py b/vta/scripts/tune_conv2d.py index 265a639..6d0b5d4 100644 --- a/vta/scripts/tune_conv2d.py +++ b/vta/scripts/tune_conv2d.py @@ -22,6 +22,7 @@ import logging import os import tvm +from tvm import te from tvm import autotvm import topi import vta @@ -48,13 +49,13 @@ resnet_wkls = [ ('resnet-18.C11', Workload(env.BATCH, 7, 7, 512, 512, 3, 3, 1, 1, 1, 1)), ] -@tvm.tag_scope(tag=topi.tag.ELEMWISE) +@tvm.te.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x def conv2d(N, CI, H, W, CO, KH, KW, strides, padding, dilation): @@ -62,9 +63,9 @@ def conv2d(N, CI, H, W, CO, KH, KW, strides, padding, dilation): kernel_shape = (CO//env.BLOCK_OUT, CI//env.BLOCK_IN, KH, KW, env.BLOCK_OUT, env.BLOCK_IN) bias_shape = (N//env.BATCH, CO//env.BLOCK_OUT, 1, 1, env.BATCH, env.BLOCK_OUT) - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) - bias = tvm.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + bias = te.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) with tvm.target.vta(): res = topi.nn.conv2d( @@ -83,7 +84,7 @@ def conv2d(N, CI, H, W, CO, KH, KW, strides, padding, dilation): if tvm.target.Target.current().device_name == 'vta': s = topi.generic.schedule_conv2d_nchw([res]) else: - s = tvm.create_schedule([res.op]) + s = te.create_schedule([res.op]) return s, [data, kernel, bias, res] diff --git a/vta/scripts/tune_conv2d_transpose.py b/vta/scripts/tune_conv2d_transpose.py index d6475ab..0871367 100644 --- a/vta/scripts/tune_conv2d_transpose.py +++ b/vta/scripts/tune_conv2d_transpose.py @@ -22,6 +22,7 @@ import logging import os import tvm +from tvm import te from tvm import autotvm import topi import vta @@ -41,21 +42,21 @@ dcgan_wkls = [ ('DCGAN.CT3', Workload(env.BATCH, 16, 16, 256, 128, 4, 4, 1, 1, 2, 2)), ] -@tvm.tag_scope(tag=topi.tag.ELEMWISE) +@tvm.te.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x def conv2d_transpose(N, CI, H, W, CO, KH, KW, strides, padding): data_shape = (N//env.BATCH, CI//env.BLOCK_IN, H, W, env.BATCH, env.BLOCK_IN) kernel_shape = (CO//env.BLOCK_OUT, CI//env.BLOCK_IN, KH, KW, env.BLOCK_OUT, env.BLOCK_IN) - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) with tvm.target.vta(): res = topi.nn.conv2d_transpose_nchw( @@ -71,7 +72,7 @@ def conv2d_transpose(N, CI, H, W, CO, KH, KW, strides, padding): if tvm.target.Target.current().device_name == 'vta': s = topi.generic.schedule_conv2d_transpose_nchw([res]) else: - s = tvm.create_schedule([res.op]) + s = te.create_schedule([res.op]) return s, [data, kernel, res] diff --git a/vta/scripts/tune_dense.py b/vta/scripts/tune_dense.py index fa49be7..e54de1d 100644 --- a/vta/scripts/tune_dense.py +++ b/vta/scripts/tune_dense.py @@ -22,6 +22,7 @@ import logging import os import tvm +from tvm import te from tvm import autotvm import topi import vta @@ -37,21 +38,21 @@ dense_wkls = [ ('lstm.dense.4', Workload(4, 256, 128)), ] -@tvm.tag_scope(tag=topi.tag.ELEMWISE) +@tvm.te.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x def dense(N, CI, CO): data_shape = (N//env.BATCH, CI//env.BLOCK_IN, env.BATCH, env.BLOCK_IN) kernel_shape = (CO//env.BLOCK_OUT, CI//env.BLOCK_IN, env.BLOCK_OUT, env.BLOCK_IN) - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) with tvm.target.vta(): res = topi.nn.dense(data, kernel, None, 'int32') @@ -62,7 +63,7 @@ def dense(N, CI, CO): if tvm.target.Target.current().device_name == 'vta': s = topi.generic.schedule_dense([res]) else: - s = tvm.create_schedule([res.op]) + s = te.create_schedule([res.op]) return s, [data, kernel, res] diff --git a/vta/scripts/tune_group_conv2d.py b/vta/scripts/tune_group_conv2d.py index 555154d..72f9525 100644 --- a/vta/scripts/tune_group_conv2d.py +++ b/vta/scripts/tune_group_conv2d.py @@ -22,6 +22,7 @@ import logging import os import tvm +from tvm import te from tvm import autotvm import topi import vta @@ -46,13 +47,13 @@ mobilenet_wkls = [ ('mobilenet.D9', Workload(env.BATCH, 7, 7, 1024, 1024, 64, 3, 3, 1, 1, 1, 1)), ] -@tvm.tag_scope(tag=topi.tag.ELEMWISE) +@tvm.te.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x def group_conv2d(N, CI, H, W, CO, KH, KW, strides, padding, dilation, group): @@ -62,9 +63,9 @@ def group_conv2d(N, CI, H, W, CO, KH, KW, strides, padding, dilation, group): kernel_shape = (CO//env.BLOCK_OUT, CI_G//env.BLOCK_IN, KH, KW, env.BLOCK_OUT, env.BLOCK_IN) bias_shape = (N//env.BATCH, CO//env.BLOCK_OUT, 1, 1, env.BATCH, env.BLOCK_OUT) - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) - bias = tvm.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + bias = te.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) with tvm.target.vta(): res = topi.nn.group_conv2d_nchw( @@ -83,7 +84,7 @@ def group_conv2d(N, CI, H, W, CO, KH, KW, strides, padding, dilation, group): if tvm.target.Target.current().device_name == 'vta': s = topi.generic.schedule_group_conv2d_nchw([res]) else: - s = tvm.create_schedule([res.op]) + s = te.create_schedule([res.op]) return s, [data, kernel, bias, res] diff --git a/vta/scripts/tune_resnet.py b/vta/scripts/tune_resnet.py index cf6f426..1de35c0 100644 --- a/vta/scripts/tune_resnet.py +++ b/vta/scripts/tune_resnet.py @@ -24,6 +24,7 @@ from PIL import Image import topi import tvm +from tvm import te from tvm import rpc, autotvm, relay from tvm.autotvm.measure.measure_methods import request_remote from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner @@ -60,13 +61,13 @@ def parse_arguments(): def register_vta_tuning_tasks(): from tvm.autotvm.task.topi_integration import TaskExtractEnv, deserialize_args - @tvm.tag_scope(tag=topi.tag.ELEMWISE) + @tvm.te.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x # init autotvm env to register VTA operator @@ -87,7 +88,7 @@ def register_vta_tuning_tasks(): if tvm.target.Target.current().device_name == 'vta': s = topi.generic.schedule_conv2d_nchw([res]) else: - s = tvm.create_schedule([res.op]) + s = te.create_schedule([res.op]) return s, [A, W, res] @autotvm.task.register("topi_nn_dense", override=True) @@ -105,7 +106,7 @@ def register_vta_tuning_tasks(): if tvm.target.Target.current().device_name == 'vta': s = topi.generic.schedule_dense([res]) else: - s = tvm.create_schedule([res.op]) + s = te.create_schedule([res.op]) return s, [A, W, res] diff --git a/vta/tests/python/integration/test_benchmark_gemm.py b/vta/tests/python/integration/test_benchmark_gemm.py index d4eed91..e023c20 100644 --- a/vta/tests/python/integration/test_benchmark_gemm.py +++ b/vta/tests/python/integration/test_benchmark_gemm.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import tvm +from tvm import te import numpy as np from tvm.contrib import util import vta.testing @@ -38,37 +39,37 @@ def test_gemm(): # To compute number of ops, use a x2 factor for FMA num_ops = 2 * channel * channel * batch_size - ko = tvm.reduce_axis((0, channel // env.BLOCK_IN), name='ko') - ki = tvm.reduce_axis((0, env.BLOCK_IN), name='ki') + ko = te.reduce_axis((0, channel // env.BLOCK_IN), name='ko') + ki = te.reduce_axis((0, env.BLOCK_IN), name='ki') - data = tvm.placeholder(data_shape, + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) - weight = tvm.placeholder(weight_shape, + weight = te.placeholder(weight_shape, name="weight", dtype=env.wgt_dtype) - data_buf = tvm.compute(data_shape, + data_buf = te.compute(data_shape, lambda *i: data(*i), "data_buf") - weight_buf = tvm.compute(weight_shape, + weight_buf = te.compute(weight_shape, lambda *i: weight(*i), "weight_buf") - res_gem = tvm.compute(res_shape, - lambda bo, co, bi, ci: tvm.sum( + res_gem = te.compute(res_shape, + lambda bo, co, bi, ci: te.sum( data_buf[bo, ko, bi, ki].astype(env.acc_dtype) * weight_buf[co, ko, ci, ki].astype(env.acc_dtype), axis=[ko, ki]), name="res_gem") - res_shf = tvm.compute(res_shape, + res_shf = te.compute(res_shape, lambda *i: res_gem(*i)>>8, name="res_shf") - res_max = tvm.compute(res_shape, - lambda *i: tvm.max(res_shf(*i), 0), + res_max = te.compute(res_shape, + lambda *i: tvm.te.max(res_shf(*i), 0), "res_max") #relu - res_min = tvm.compute(res_shape, - lambda *i: tvm.min(res_max(*i), (1<<(env.INP_WIDTH-1))-1), + res_min = te.compute(res_shape, + lambda *i: tvm.te.min(res_max(*i), (1<<(env.INP_WIDTH-1))-1), "res_min") #relu - res = tvm.compute(res_shape, + res = te.compute(res_shape, lambda *i: res_min(*i).astype(env.inp_dtype), name="res") @@ -128,7 +129,7 @@ def test_gemm(): store_out, print_ir, check_correctness): - s = tvm.create_schedule(res.op) + s = te.create_schedule(res.op) s[data_buf].set_scope(env.inp_scope) s[weight_buf].set_scope(env.wgt_scope) s[res_gem].set_scope(env.acc_scope) diff --git a/vta/tests/python/integration/test_benchmark_topi_conv2d.py b/vta/tests/python/integration/test_benchmark_topi_conv2d.py index 6935e47..b3c36e8 100644 --- a/vta/tests/python/integration/test_benchmark_topi_conv2d.py +++ b/vta/tests/python/integration/test_benchmark_topi_conv2d.py @@ -25,6 +25,7 @@ import numpy as np from collections import namedtuple import tvm +from tvm import te from tvm import relay from tvm import autotvm from tvm.contrib import util @@ -61,13 +62,13 @@ resnet_wkls = [ ] # FIXME: we need a custom clip operator to circumvent a pattern detection limitation -@tvm.tag_scope(tag=topi.tag.ELEMWISE) +@tvm.te.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x def run_conv2d(env, remote, wl, target, @@ -104,9 +105,9 @@ def run_conv2d(env, remote, wl, target, data_shape = a_shape kernel_shape = w_shape bias_shape = b_shape - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) - bias = tvm.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + bias = te.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) padding = relay.nn.get_pad_tuple2d((wl.hpad, wl.wpad)) # Define base computation schedule diff --git a/vta/tests/python/integration/test_benchmark_topi_conv2d_transpose.py b/vta/tests/python/integration/test_benchmark_topi_conv2d_transpose.py index 2d96a73..90cc21f 100644 --- a/vta/tests/python/integration/test_benchmark_topi_conv2d_transpose.py +++ b/vta/tests/python/integration/test_benchmark_topi_conv2d_transpose.py @@ -25,6 +25,7 @@ import numpy as np from collections import namedtuple import tvm +from tvm import te from tvm import relay from tvm import autotvm from tvm.contrib import util @@ -53,13 +54,13 @@ dcgan_wklds = [ ] # FIXME: we need a custom clip operator to circumvent a pattern detection limitation -@tvm.tag_scope(tag=topi.tag.ELEMWISE) +@tvm.te.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x # Helper function to get factors @@ -102,8 +103,8 @@ def run_conv2d_transpose(env, remote, wl, target, else: data_shape = a_shape kernel_shape = w_shape - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) padding = relay.nn.get_pad_tuple2d((wl.hpad, wl.wpad)) # Define base computation schedule diff --git a/vta/tests/python/integration/test_benchmark_topi_dense.py b/vta/tests/python/integration/test_benchmark_topi_dense.py index a0acdc3..95c491a 100644 --- a/vta/tests/python/integration/test_benchmark_topi_dense.py +++ b/vta/tests/python/integration/test_benchmark_topi_dense.py @@ -24,6 +24,7 @@ from collections import namedtuple import numpy as np import tvm +from tvm import te from tvm import autotvm from tvm.contrib import util from tvm.contrib.pickle_memoize import memoize @@ -35,13 +36,13 @@ import vta.testing from vta.testing import simulator # FIXME: we need a custom clip operator to circumvent a pattern detection limitation -@tvm.tag_scope(tag=topi.tag.ELEMWISE) +@tvm.te.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x def run_gemm(env, remote, target, @@ -70,8 +71,8 @@ def run_gemm(env, remote, target, kernel_shape = w_shape fcompute = topi.x86.dense_nopack fschedule = topi.x86.schedule_dense_nopack - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) # Define base computation schedule with target: diff --git a/vta/tests/python/integration/test_benchmark_topi_group_conv2d.py b/vta/tests/python/integration/test_benchmark_topi_group_conv2d.py index 31fef49..1d5838c 100644 --- a/vta/tests/python/integration/test_benchmark_topi_group_conv2d.py +++ b/vta/tests/python/integration/test_benchmark_topi_group_conv2d.py @@ -25,6 +25,7 @@ import numpy as np from collections import namedtuple import tvm +from tvm import te from tvm import relay from tvm import autotvm from tvm.contrib import util @@ -57,13 +58,13 @@ mobilenet_wkls = [ ] # FIXME: we need a custom clip operator to circumvent a pattern detection limitation -@tvm.tag_scope(tag=topi.tag.ELEMWISE) +@tvm.te.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x def run_group_conv2d(env, remote, wl, target, @@ -101,9 +102,9 @@ def run_group_conv2d(env, remote, wl, target, data_shape = a_shape kernel_shape = w_shape bias_shape = b_shape - data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) - kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) - bias = tvm.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) + data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) + kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) + bias = te.placeholder(bias_shape, name="bias", dtype=env.acc_dtype) padding = relay.nn.get_pad_tuple2d((wl.hpad, wl.wpad)) # Define base computation schedule diff --git a/vta/tests/python/pynq/test_program_rpc.py b/vta/tests/python/pynq/test_program_rpc.py index 2d8da5a..fb08735 100644 --- a/vta/tests/python/pynq/test_program_rpc.py +++ b/vta/tests/python/pynq/test_program_rpc.py @@ -16,6 +16,7 @@ # under the License. import os import tvm +from tvm import te from tvm import rpc from vta import get_bitstream_path, download_bitstream, program_fpga, reconfig_runtime diff --git a/vta/tests/python/unittest/test_vta_insn.py b/vta/tests/python/unittest/test_vta_insn.py index ef3c45c..c76636a 100644 --- a/vta/tests/python/unittest/test_vta_insn.py +++ b/vta/tests/python/unittest/test_vta_insn.py @@ -16,6 +16,7 @@ # under the License. """Unit test VTA's instructions """ import tvm +from tvm import te import numpy as np import topi from tvm.contrib import util @@ -30,22 +31,22 @@ def test_save_load_out(): """Test save/store output command""" def _run(env, remote): n = 6 - x = tvm.placeholder( + x = te.placeholder( (n, n, env.BATCH, env.BLOCK_OUT), name="x", dtype=env.acc_dtype) - x_buf = tvm.compute( + x_buf = te.compute( (n, n, env.BATCH, env.BLOCK_OUT), lambda *i: x(*i), "x_buf") # insert no-op that won't be optimized away - y_buf = tvm.compute( + y_buf = te.compute( (n, n, env.BATCH, env.BLOCK_OUT), lambda *i: x_buf(*i)>>0, "y_buf") - y = tvm.compute( + y = te.compute( (n, n, env.BATCH, env.BLOCK_OUT), lambda *i: y_buf(*i).astype(env.inp_dtype), "y") # schedule - s = tvm.create_schedule(y.op) + s = te.create_schedule(y.op) s[x_buf].set_scope(env.acc_scope) s[x_buf].pragma(x_buf.op.axis[0], env.dma_copy) s[y_buf].set_scope(env.acc_scope) @@ -93,22 +94,22 @@ def test_padded_load(): # declare n = 3 m = 5 - x = tvm.placeholder( + x = te.placeholder( (n, m, env.BATCH, env.BLOCK_OUT), name="x", dtype=env.acc_dtype) x_buf = topi.nn.pad(x, pad_before, pad_after, name="y") # insert no-op that won't be optimized away - y_buf = tvm.compute((n + pad_before[0] + pad_after[0], + y_buf = te.compute((n + pad_before[0] + pad_after[0], m + pad_before[1] + pad_after[1], env.BATCH, env.BLOCK_OUT), lambda *i: x_buf(*i)>>0, "y_buf") - y = tvm.compute((n + pad_before[0] + pad_after[0], + y = te.compute((n + pad_before[0] + pad_after[0], m + pad_before[1] + pad_after[1], env.BATCH, env.BLOCK_OUT), lambda *i: y_buf(*i).astype(env.inp_dtype), "y") # schedule - s = tvm.create_schedule(y.op) + s = te.create_schedule(y.op) s[x_buf].set_scope(env.acc_scope) s[x_buf].pragma(x_buf.op.axis[0], env.dma_copy) s[y_buf].set_scope(env.acc_scope) @@ -167,32 +168,32 @@ def test_gemm(): o = 4 n = 1 m = 4 - x = tvm.placeholder((o, n, env.BATCH, env.BLOCK_IN), name="x", dtype=env.inp_dtype) - w = tvm.placeholder((m, n, env.BLOCK_OUT, env.BLOCK_IN), name="w", dtype=env.wgt_dtype) - x_buf = tvm.compute((o, n, env.BATCH, env.BLOCK_IN), lambda *i: x(*i), "x_buf") - w_buf = tvm.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: w(*i), "w_buf") - ko = tvm.reduce_axis((0, n), name="ko") - ki = tvm.reduce_axis((0, env.BLOCK_IN), name="ki") - y_gem = tvm.compute( + x = te.placeholder((o, n, env.BATCH, env.BLOCK_IN), name="x", dtype=env.inp_dtype) + w = te.placeholder((m, n, env.BLOCK_OUT, env.BLOCK_IN), name="w", dtype=env.wgt_dtype) + x_buf = te.compute((o, n, env.BATCH, env.BLOCK_IN), lambda *i: x(*i), "x_buf") + w_buf = te.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: w(*i), "w_buf") + ko = te.reduce_axis((0, n), name="ko") + ki = te.reduce_axis((0, env.BLOCK_IN), name="ki") + y_gem = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), lambda bo, co, bi, ci: - tvm.sum(x_buf[bo, ko, bi, ki].astype(env.acc_dtype) * + te.sum(x_buf[bo, ko, bi, ki].astype(env.acc_dtype) * w_buf[co, ko, ci, ki].astype(env.acc_dtype), axis=[ko, ki]), name="y_gem") - y_shf = tvm.compute( + y_shf = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), lambda *i: y_gem(*i)>>8, name="y_shf") - y_max = tvm.compute( + y_max = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), - lambda *i: tvm.max(y_shf(*i), 0), + lambda *i: tvm.te.max(y_shf(*i), 0), "y_max") #relu - y_min = tvm.compute( + y_min = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), - lambda *i: tvm.min(y_max(*i), (1<<(env.INP_WIDTH-1))-1), + lambda *i: tvm.te.min(y_max(*i), (1<<(env.INP_WIDTH-1))-1), "y_min") #relu - y = tvm.compute( + y = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), lambda *i: y_min(*i).astype(env.inp_dtype), name="y") @@ -240,7 +241,7 @@ def test_gemm(): def test_schedule1(): # default schedule with no smt - s = tvm.create_schedule(y.op) + s = te.create_schedule(y.op) # set the scope of the SRAM buffers s[x_buf].set_scope(env.inp_scope) s[w_buf].set_scope(env.wgt_scope) @@ -270,7 +271,7 @@ def test_gemm(): def test_smt(): # test smt schedule - s = tvm.create_schedule(y.op) + s = te.create_schedule(y.op) s[x_buf].set_scope(env.inp_scope) s[w_buf].set_scope(env.wgt_scope) s[y_gem].set_scope(env.acc_scope) @@ -279,7 +280,7 @@ def test_gemm(): s[y_min].set_scope(env.acc_scope) abo, aco, abi, aci = s[y].op.axis abo1, abo2 = s[y].split(abo, nparts=2) - s[y].bind(abo1, tvm.thread_axis("cthread")) + s[y].bind(abo1, te.thread_axis("cthread")) s[y_gem].compute_at(s[y], abo1) s[y_shf].compute_at(s[y], abo1) s[y_max].compute_at(s[y], abo1) @@ -315,38 +316,38 @@ def test_alu(): n = 8 imm = np.random.randint(1,5) # compute - a = tvm.placeholder( + a = te.placeholder( (m, n, env.BATCH, env.BLOCK_OUT), name="a", dtype=env.acc_dtype) - a_buf = tvm.compute( + a_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: a(*i), "a_buf") #DRAM->SRAM if use_imm: - res_buf = tvm.compute( + res_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: tvm_op(a_buf(*i), imm), "res_buf") #compute else: - b = tvm.placeholder( + b = te.placeholder( (m, n, env.BATCH, env.BLOCK_OUT), name="b", dtype=env.acc_dtype) - b_buf = tvm.compute( + b_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: b(*i), "b_buf") #DRAM->SRAM - res_buf = tvm.compute( + res_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: tvm_op(a_buf(*i), b_buf(*i)), "res_buf") #compute5B - res = tvm.compute( + res = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: res_buf(*i).astype(env.inp_dtype), "res") #SRAM->DRAM # schedule - s = tvm.create_schedule(res.op) + s = te.create_schedule(res.op) s[a_buf].set_scope(env.acc_scope) # SRAM s[a_buf].pragma(a_buf.op.axis[0], env.dma_copy) # DRAM->SRAM s[res_buf].set_scope(env.acc_scope) # SRAM @@ -402,8 +403,8 @@ def test_alu(): print("\t{:<16}: {:>16}".format(k, v)) check_alu(lambda x, y: x << y, np.left_shift, use_imm=True, test_name="SHL") - check_alu(tvm.max, np.maximum, use_imm=True, test_name="MAX") - check_alu(tvm.max, np.maximum, test_name="MAX") + check_alu(tvm.te.max, np.maximum, use_imm=True, test_name="MAX") + check_alu(tvm.te.max, np.maximum, test_name="MAX") check_alu(lambda x, y: x + y, use_imm=True, test_name="ADD") check_alu(lambda x, y: x + y, test_name="ADD") check_alu(lambda x, y: x >> y, np.right_shift, use_imm=True, test_name="SHR") @@ -417,28 +418,28 @@ def test_relu(): m = 8 n = 10 # compute - a = tvm.placeholder( + a = te.placeholder( (m, n, env.BATCH, env.BLOCK_OUT), name="a", dtype=env.acc_dtype) - a_buf = tvm.compute( + a_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: a(*i), "a_buf") # DRAM->SRAM - max_buf = tvm.compute( + max_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), - lambda *i: tvm.max(a_buf(*i), 0), + lambda *i: tvm.te.max(a_buf(*i), 0), "res_buf") # relu - min_buf = tvm.compute( + min_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), - lambda *i: tvm.min(max_buf(*i), (1<<(env.INP_WIDTH-1))-1), + lambda *i: tvm.te.min(max_buf(*i), (1<<(env.INP_WIDTH-1))-1), "max_buf") # relu - res = tvm.compute( + res = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: min_buf(*i).astype(env.inp_dtype), "min_buf") # SRAM->DRAM # schedule - s = tvm.create_schedule(res.op) + s = te.create_schedule(res.op) s[a_buf].set_scope(env.acc_scope) # SRAM s[a_buf].pragma(a_buf.op.axis[0], env.dma_copy) # DRAM->SRAM s[max_buf].set_scope(env.acc_scope) # SRAM @@ -488,27 +489,27 @@ def test_shift_and_scale(): imm_shift = np.random.randint(0,8) imm_scale = np.random.randint(1,5) # compute - a = tvm.placeholder( + a = te.placeholder( (m, n, env.BATCH, env.BLOCK_OUT), name="a", dtype=env.acc_dtype) - a_buf = tvm.compute( + a_buf = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: a(*i), "a_buf") # DRAM->SRAM - res_shift = tvm.compute( + res_shift = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: a_buf(*i)+imm_shift, "res_shift") # compute - res_scale = tvm.compute( + res_scale = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: res_shift(*i)>>imm_scale, "res_scale") # compute - res = tvm.compute( + res = te.compute( (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: res_scale(*i).astype(env.inp_dtype), "res") # SRAM->DRAM # schedule - s = tvm.create_schedule(res.op) + s = te.create_schedule(res.op) s[a_buf].set_scope(env.acc_scope) # SRAM s[res_shift].set_scope(env.acc_scope) # SRAM s[res_scale].set_scope(env.acc_scope) # SRAM diff --git a/vta/tutorials/autotvm/tune_relay_vta.py b/vta/tutorials/autotvm/tune_relay_vta.py index a20b8ec..16c8b3e 100644 --- a/vta/tutorials/autotvm/tune_relay_vta.py +++ b/vta/tutorials/autotvm/tune_relay_vta.py @@ -60,6 +60,7 @@ from PIL import Image import topi import tvm +from tvm import te from tvm import rpc, autotvm, relay from tvm.contrib import graph_runtime, util, download from tvm.autotvm.measure.measure_methods import request_remote @@ -297,13 +298,13 @@ def tune_tasks(tasks, def register_vta_tuning_tasks(): from tvm.autotvm.task import TaskExtractEnv - @tvm.tag_scope(tag=topi.tag.ELEMWISE) + @tvm.te.tag_scope(tag=topi.tag.ELEMWISE) def my_clip(x, a_min, a_max): """Unlike topi's current clip, put min and max into two stages.""" - const_min = tvm.const(a_min, x.dtype) - const_max = tvm.const(a_max, x.dtype) - x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") - x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") + const_min = tvm.tir.const(a_min, x.dtype) + const_max = tvm.tir.const(a_max, x.dtype) + x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA") + x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB") return x # init autotvm env to register VTA operator @@ -323,7 +324,7 @@ def register_vta_tuning_tasks(): if tvm.target.Target.current().device_name == 'vta': s = vta.top.schedule_conv2d_packed([res]) else: - s = tvm.create_schedule([res.op]) + s = te.create_schedule([res.op]) return s, [A, W, res] diff --git a/vta/tutorials/frontend/deploy_classification.py b/vta/tutorials/frontend/deploy_classification.py index df02b48..15cba43 100644 --- a/vta/tutorials/frontend/deploy_classification.py +++ b/vta/tutorials/frontend/deploy_classification.py @@ -50,6 +50,7 @@ import numpy as np from matplotlib import pyplot as plt import tvm +from tvm import te from tvm import rpc, autotvm, relay from tvm.contrib import graph_runtime, util, download from tvm.contrib.debugger import debug_runtime diff --git a/vta/tutorials/matrix_multiply.py b/vta/tutorials/matrix_multiply.py index 3e46b42..4447626 100644 --- a/vta/tutorials/matrix_multiply.py +++ b/vta/tutorials/matrix_multiply.py @@ -36,6 +36,7 @@ from __future__ import absolute_import, print_function import os import tvm +from tvm import te import vta import numpy as np from tvm import rpc @@ -167,13 +168,13 @@ n = 16 # Batch factor o (we use single batch inference) o = 1 # A placeholder tensor in tiled data format -A = tvm.placeholder((o, n, env.BATCH, env.BLOCK_IN), name="A", dtype=env.inp_dtype) +A = te.placeholder((o, n, env.BATCH, env.BLOCK_IN), name="A", dtype=env.inp_dtype) # B placeholder tensor in tiled data format -B = tvm.placeholder((m, n, env.BLOCK_OUT, env.BLOCK_IN), name="B", dtype=env.wgt_dtype) +B = te.placeholder((m, n, env.BLOCK_OUT, env.BLOCK_IN), name="B", dtype=env.wgt_dtype) # A copy buffer -A_buf = tvm.compute((o, n, env.BATCH, env.BLOCK_IN), lambda *i: A(*i), "A_buf") +A_buf = te.compute((o, n, env.BATCH, env.BLOCK_IN), lambda *i: A(*i), "A_buf") # B copy buffer -B_buf = tvm.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: B(*i), "B_buf") +B_buf = te.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: B(*i), "B_buf") ###################################################################### # Matrix Multiplication @@ -186,8 +187,8 @@ B_buf = tvm.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: B(*i), "B_bu # In order to implement matrix multiplication, the lambda function needs to # include a reduction formula over the input channel dimension axes. # To create a reduction formula, we can declare a reduction axis using -# :code:`tvm.reduce_axis`, which takes in the range of reductions. -# :code:`tvm.sum` takes in the expression to be reduced as well as +# :code:`te.reduce_axis`, which takes in the range of reductions. +# :code:`te.sum` takes in the expression to be reduced as well as # the reduction axes to compute the sum of value over all k in the declared # ranges. # @@ -198,14 +199,14 @@ B_buf = tvm.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: B(*i), "B_bu # the computation should be done. # Outer input feature reduction axis -ko = tvm.reduce_axis((0, n), name="ko") +ko = te.reduce_axis((0, n), name="ko") # Inner input feature reduction axis -ki = tvm.reduce_axis((0, env.BLOCK_IN), name="ki") +ki = te.reduce_axis((0, env.BLOCK_IN), name="ki") # Describe the in-VTA matrix multiplication -C_buf = tvm.compute( +C_buf = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), lambda bo, co, bi, ci: - tvm.sum(A_buf[bo, ko, bi, ki].astype(env.acc_dtype) * + te.sum(A_buf[bo, ko, bi, ki].astype(env.acc_dtype) * B_buf[co, ko, ci, ki].astype(env.acc_dtype), axis=[ko, ki]), name="C_buf") @@ -234,7 +235,7 @@ C_buf = tvm.compute( # input activation data format. # Cast to output type, and send to main memory -C = tvm.compute( +C = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), lambda *i: C_buf(*i).astype(env.inp_dtype), name="C") @@ -265,7 +266,7 @@ C = tvm.compute( # :code:`C` in the following way: # Let's take a look at the generated schedule -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) print(tvm.lower(s, [A, B, C], simple_mode=True)) ###################################################################### diff --git a/vta/tutorials/optimize/convolution_opt.py b/vta/tutorials/optimize/convolution_opt.py index e5cf8e5..2616fb2 100644 --- a/vta/tutorials/optimize/convolution_opt.py +++ b/vta/tutorials/optimize/convolution_opt.py @@ -39,6 +39,7 @@ from __future__ import absolute_import, print_function import os import tvm +from tvm import te import vta import numpy as np @@ -167,16 +168,16 @@ output_shape = (batch_size // env.BATCH, env.BLOCK_OUT) # Convolution reduction axes -dy = tvm.reduce_axis((0, kernel_h), name='dy') -dx = tvm.reduce_axis((0, kernel_w), name='dx') -ic = tvm.reduce_axis((0, in_channels // env.BLOCK_IN), name='ic') -ic_tns = tvm.reduce_axis((0, env.BLOCK_IN), name='ic_tns') +dy = te.reduce_axis((0, kernel_h), name='dy') +dx = te.reduce_axis((0, kernel_w), name='dx') +ic = te.reduce_axis((0, in_channels // env.BLOCK_IN), name='ic') +ic_tns = te.reduce_axis((0, env.BLOCK_IN), name='ic_tns') # Input placeholder tensors -data = tvm.placeholder(data_shape, +data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) -kernel = tvm.placeholder(kernel_shape, +kernel = te.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype) @@ -185,33 +186,33 @@ kernel = tvm.placeholder(kernel_shape, data_buf = topi.nn.pad(data, [0, 0, pad_h, pad_w, 0, 0], name="data_buf") -kernel_buf = tvm.compute(kernel_shape, lambda *i: kernel(*i), "kernel_buf") +kernel_buf = te.compute(kernel_shape, lambda *i: kernel(*i), "kernel_buf") # Declare 2D convolution -res_conv = tvm.compute( +res_conv = te.compute( output_shape, - lambda bo, co, i, j, bi, ci: tvm.sum( + lambda bo, co, i, j, bi, ci: te.sum( data_buf[bo, ic, i*stride_h+dy, j*stride_w+dx, bi, ic_tns].astype(env.acc_dtype) * kernel_buf[co, ic, dy, dx, ci, ic_tns].astype(env.acc_dtype), axis=[ic, dy, dx, ic_tns]), name="res_conv") # Add shift stage for fix-point normalization -res_shr = tvm.compute(output_shape, +res_shr = te.compute(output_shape, lambda *i: res_conv(*i) >> 8, name="res_shr") # Apply clipping between (0, input max value) inp_max = (1 << (env.INP_WIDTH - 1)) - 1 -res_max = tvm.compute(output_shape, - lambda *i: tvm.max(res_shr(*i), 0), +res_max = te.compute(output_shape, + lambda *i: tvm.te.max(res_shr(*i), 0), "res_max") -res_min = tvm.compute(output_shape, - lambda *i: tvm.min(res_max(*i), inp_max), +res_min = te.compute(output_shape, + lambda *i: tvm.te.min(res_max(*i), inp_max), "res_min") # Result Tensor -res = tvm.compute(output_shape, +res = te.compute(output_shape, lambda *i: res_min(*i).astype(env.inp_dtype), name="res") @@ -228,7 +229,7 @@ res = tvm.compute(output_shape, # - Lowering to VTA hardware intrinsics # Create TVM schedule -s = tvm.create_schedule(res.op) +s = te.create_schedule(res.op) # Let's look at the default TVM schedule print(tvm.lower(s, [data, kernel, res], simple_mode=True)) @@ -306,7 +307,7 @@ v_threads = 2 # Perform virtual thread split along output channel outer axis _, tx = s[res].split(oc_out, factor=v_threads) s[res].reorder(tx, b_out) -s[res].bind(tx, tvm.thread_axis("cthread")) +s[res].bind(tx, te.thread_axis("cthread")) # Let's look at the current TVM schedule after blocking and virtual threading print(tvm.lower(s, [data, kernel, res], simple_mode=True)) diff --git a/vta/tutorials/optimize/matrix_multiply_opt.py b/vta/tutorials/optimize/matrix_multiply_opt.py index 2722af5..597a7e8 100644 --- a/vta/tutorials/optimize/matrix_multiply_opt.py +++ b/vta/tutorials/optimize/matrix_multiply_opt.py @@ -39,6 +39,7 @@ from __future__ import absolute_import, print_function import os import tvm +from tvm import te import vta import numpy as np from tvm import rpc @@ -119,45 +120,45 @@ output_shape = (batch_size // env.BATCH, num_ops = in_channels * out_channels * batch_size * 2 # Reduction axes -ic = tvm.reduce_axis((0, in_channels // env.BLOCK_IN), name='ic') -ic_tns = tvm.reduce_axis((0, env.BLOCK_IN), name='ic_tns') +ic = te.reduce_axis((0, in_channels // env.BLOCK_IN), name='ic') +ic_tns = te.reduce_axis((0, env.BLOCK_IN), name='ic_tns') # Input placeholder tensors -data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype) -weight = tvm.placeholder(weight_shape, name="weight", dtype=env.wgt_dtype) +data = te.placeholder(data_shape, name="data", dtype=env.inp_dtype) +weight = te.placeholder(weight_shape, name="weight", dtype=env.wgt_dtype) # Copy buffers -data_buf = tvm.compute(data_shape, +data_buf = te.compute(data_shape, lambda *i: data(*i), "data_buf") -weight_buf = tvm.compute(weight_shape, +weight_buf = te.compute(weight_shape, lambda *i: weight(*i), "weight_buf") # Declare matrix multiply computation -res_gemm = tvm.compute(output_shape, - lambda bo, co, bi, ci: tvm.sum( +res_gemm = te.compute(output_shape, + lambda bo, co, bi, ci: te.sum( data_buf[bo, ic, bi, ic_tns].astype(env.acc_dtype) * weight_buf[co, ic, ci, ic_tns].astype(env.acc_dtype), axis=[ic, ic_tns]), name="res_gem") # Add shift stage for fix-point normalization -res_shr = tvm.compute(output_shape, +res_shr = te.compute(output_shape, lambda *i: res_gemm(*i) >> env.INP_WIDTH, name="res_shr") # Apply clipping between (0, input max value) inp_max = (1<<(env.INP_WIDTH-1))-1 -res_max = tvm.compute(output_shape, - lambda *i: tvm.max(res_shr(*i), 0), +res_max = te.compute(output_shape, + lambda *i: tvm.te.max(res_shr(*i), 0), "res_max") -res_min = tvm.compute(output_shape, - lambda *i: tvm.min(res_max(*i), inp_max), +res_min = te.compute(output_shape, + lambda *i: tvm.te.min(res_max(*i), inp_max), "res_min") # Apply typecast to input data type before sending results back -res = tvm.compute(output_shape, +res = te.compute(output_shape, lambda *i: res_min(*i).astype(env.inp_dtype), name="res") @@ -173,7 +174,7 @@ res = tvm.compute(output_shape, # Create TVM schedule -s = tvm.create_schedule(res.op) +s = te.create_schedule(res.op) # Let's look at the default TVM schedule print(tvm.lower(s, [data, weight, res], simple_mode=True)) diff --git a/vta/tutorials/vta_get_started.py b/vta/tutorials/vta_get_started.py index dd30515..3dd1f8c 100644 --- a/vta/tutorials/vta_get_started.py +++ b/vta/tutorials/vta_get_started.py @@ -36,6 +36,7 @@ from __future__ import absolute_import, print_function import os import tvm +from tvm import te import vta import numpy as np @@ -137,9 +138,9 @@ m = 64 # Batch factor o - total 1 x 1 = 1 o = 1 # A placeholder tensor in tiled data format -A = tvm.placeholder((o, m, env.BATCH, env.BLOCK_OUT), name="A", dtype=env.acc_dtype) +A = te.placeholder((o, m, env.BATCH, env.BLOCK_OUT), name="A", dtype=env.acc_dtype) # B placeholder tensor in tiled data format -B = tvm.placeholder((o, m, env.BATCH, env.BLOCK_OUT), name="B", dtype=env.acc_dtype) +B = te.placeholder((o, m, env.BATCH, env.BLOCK_OUT), name="B", dtype=env.acc_dtype) ###################################################################### # Copy Buffers @@ -158,9 +159,9 @@ B = tvm.placeholder((o, m, env.BATCH, env.BLOCK_OUT), name="B", dtype=env.acc_dt # This can later be interpreted by the compiler as a cached read operation. # A copy buffer -A_buf = tvm.compute((o, m, env.BATCH, env.BLOCK_OUT), lambda *i: A(*i), "A_buf") +A_buf = te.compute((o, m, env.BATCH, env.BLOCK_OUT), lambda *i: A(*i), "A_buf") # B copy buffer -B_buf = tvm.compute((o, m, env.BATCH, env.BLOCK_OUT), lambda *i: B(*i), "B_buf") +B_buf = te.compute((o, m, env.BATCH, env.BLOCK_OUT), lambda *i: B(*i), "B_buf") ###################################################################### # Vector Addition @@ -174,7 +175,7 @@ B_buf = tvm.compute((o, m, env.BATCH, env.BLOCK_OUT), lambda *i: B(*i), "B_buf") # the computation should be done. # Describe the in-VTA vector addition -C_buf = tvm.compute( +C_buf = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), lambda *i: A_buf(*i).astype(env.acc_dtype) + B_buf(*i).astype(env.acc_dtype), name="C_buf") @@ -199,7 +200,7 @@ C_buf = tvm.compute( # input activation data format. # Cast to output type, and send to main memory -C = tvm.compute( +C = te.compute( (o, m, env.BATCH, env.BLOCK_OUT), lambda *i: C_buf(*i).astype(env.inp_dtype), name="C") @@ -231,7 +232,7 @@ C = tvm.compute( # :code:`C` in the following way: # Let's take a look at the generated schedule -s = tvm.create_schedule(C.op) +s = te.create_schedule(C.op) print(tvm.lower(s, [A, B, C], simple_mode=True)) -- 2.7.4