From 55d819252852d107b9d9a31224666c7032b9cf89 Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Tue, 21 Jan 2020 11:58:21 -0800
Subject: [PATCH] [REFACTOR] top->te (#4759)

Bring up namespace te -- Tensor expression language DSL.
---
 CMakeLists.txt                                     |   2 +-
 include/tvm/arith/bound.h                          |   4 +-
 include/tvm/driver/driver.h                        |   8 +-
 include/tvm/relay/op_attr_types.h                  |  20 +--
 include/tvm/{top => te}/operation.h                |  19 ++-
 include/tvm/{top => te}/schedule.h                 |  18 ++-
 include/tvm/{top => te}/schedule_pass.h            |  14 +--
 include/tvm/{top => te}/tensor.h                   |  20 ++-
 include/tvm/{top => te}/tensor_intrin.h            |  15 ++-
 include/tvm/tir/ir_pass.h                          |   8 +-
 src/api/api_arith.cc                               |   2 +-
 src/api/api_base.cc                                |   2 +-
 src/api/api_lang.cc                                |  10 +-
 src/api/api_pass.cc                                |   4 +-
 src/api/api_schedule.cc                            |  12 +-
 src/api/api_test.cc                                |   2 +-
 src/arith/domain_touched.cc                        |   8 +-
 src/contrib/hybrid/codegen_hybrid.h                |   4 +-
 src/driver/driver.cc                               |  26 ++--
 src/ir/expr.cc                                     |   6 +-
 src/relay/backend/compile_engine.cc                | 138 ++++++++++----------
 src/relay/backend/compile_engine.h                 |   4 +-
 src/relay/backend/utils.h                          |   2 +-
 src/relay/backend/vm/compiler.cc                   |   2 +-
 src/relay/op/annotation/annotation.cc              |  30 ++---
 src/relay/op/debug.cc                              |   6 +-
 src/relay/op/memory/memory.cc                      |  20 +--
 src/relay/op/nn/nn.cc                              |  28 ++---
 src/relay/op/nn/pad.cc                             |   6 +-
 src/relay/op/nn/pooling.cc                         |  42 +++----
 src/relay/op/tensor/binary.cc                      |   4 +-
 src/relay/op/tensor/reduce.cc                      |  44 +++----
 src/relay/op/tensor/transform.cc                   | 140 ++++++++++-----------
 src/relay/op/tensor/unary.cc                       |  14 +--
 src/relay/op/vision/yolo.cc                        |   4 +-
 src/relay/pass/alter_op_layout.cc                  |   6 +-
 src/relay/pass/convert_layout.cc                   |   6 +-
 src/relay/pass/gradient.cc                         |   2 +-
 src/relay/pass/legalize.cc                         |   2 +-
 src/{top => te}/operation/compute_op.cc            |  12 +-
 src/{top => te}/operation/compute_op.h             |  12 +-
 .../operation/cross_thread_reduction.cc            |   4 +-
 src/{top => te}/operation/extern_op.cc             |   6 +-
 src/{top => te}/operation/hybrid_op.cc             |  16 +--
 src/{top => te}/operation/hybrid_op.h              |  12 +-
 src/{top => te}/operation/op_util.cc               |   8 +-
 src/{top => te}/operation/op_util.h                |  12 +-
 src/{top => te}/operation/placeholder_op.cc        |   6 +-
 src/{top => te}/operation/scan_op.cc               |   6 +-
 src/{top => te}/operation/tensor_compute_op.cc     |  14 +--
 src/{top => te}/operation/tensorize.cc             |  14 +--
 src/{top => te}/schedule/auto_inline_elem_wise.cc  |   8 +-
 src/{top => te}/schedule/bound.cc                  |   8 +-
 src/{top => te}/schedule/graph.cc                  |  14 +--
 src/{top => te}/schedule/graph.h                   |  14 +--
 src/{top => te}/schedule/message_passing.cc        |   4 +-
 src/{top => te}/schedule/message_passing.h         |  14 +--
 .../schedule/schedule_dataflow_rewrite.cc          |  20 +--
 src/{top => te}/schedule/schedule_lang.cc          |  14 +--
 src/{top => te}/schedule/schedule_ops.cc           |   8 +-
 src/{top => te}/tensor.cc                          |  10 +-
 src/tir/pass/inject_prefetch.cc                    |   2 +-
 src/tir/pass/storage_flatten.cc                    |  14 +--
 src/tir/pass/tensor_core.cc                        |  12 +-
 src/tir/pass/verify_compact_buffer.cc              |   2 +-
 tests/cpp/build_module_test.cc                     |   6 +-
 tests/cpp/expr_test.cc                             |   2 +-
 tests/cpp/ir_simplify_test.cc                      |   6 +-
 tests/cpp/relay_build_module_test.cc               |   2 +-
 tests/cpp/relay_pass_type_infer_test.cc            |   2 +-
 tests/cpp/relay_transform_sequential.cc            |   2 +-
 tests/cpp/simple_passes_test.cc                    |   6 +-
 tests/cpp/tensor_test.cc                           |  14 +--
 tests/cpp/topi_ewise_test.cc                       |   2 +-
 tests/cpp/utvm_runtime_standalone_test.cc          |   2 +-
 topi/include/topi/broadcast.h                      |  51 ++++----
 topi/include/topi/contrib/cublas.h                 |   6 +-
 topi/include/topi/contrib/rocblas.h                |   4 +-
 topi/include/topi/cuda/dense.h                     |  34 ++---
 topi/include/topi/cuda/injective.h                 |  14 +--
 topi/include/topi/cuda/normalization.h             |  18 +--
 topi/include/topi/cuda/pooling.h                   |  26 ++--
 topi/include/topi/cuda/reduction.h                 |  20 +--
 topi/include/topi/cuda/softmax.h                   |  22 ++--
 topi/include/topi/detail/array_utils.h             |   4 +-
 topi/include/topi/detail/broadcast.h               |  18 ++-
 topi/include/topi/detail/constant_utils.h          |   2 +-
 topi/include/topi/detail/extern.h                  |   4 +-
 topi/include/topi/detail/fuse.h                    |   4 +-
 topi/include/topi/detail/pad_utils.h               |   2 +-
 topi/include/topi/detail/ravel_unravel.h           |   7 +-
 topi/include/topi/detail/tensor_utils.h            |   2 +-
 topi/include/topi/elemwise.h                       |   2 +-
 topi/include/topi/generic/default.h                |  14 +--
 topi/include/topi/generic/extern.h                 |  16 +--
 topi/include/topi/generic/injective.h              |  14 +--
 topi/include/topi/image/resize.h                   |  15 ++-
 topi/include/topi/nn.h                             |  94 +++++++-------
 topi/include/topi/nn/batch_matmul.h                |  16 +--
 topi/include/topi/nn/bias_add.h                    |  15 ++-
 topi/include/topi/nn/bnn.h                         |  26 ++--
 topi/include/topi/nn/dense.h                       |  20 +--
 topi/include/topi/nn/dilate.h                      |  12 +-
 topi/include/topi/nn/flatten.h                     |  14 +--
 topi/include/topi/nn/l2_normalize.h                |  10 +-
 topi/include/topi/nn/local_response_norm.h         |  16 +--
 topi/include/topi/nn/mapping.h                     |  12 +-
 topi/include/topi/nn/pooling.h                     |  64 +++++-----
 topi/include/topi/nn/softmax.h                     |  33 +++--
 topi/include/topi/nn/upsampling.h                  |   2 +-
 topi/include/topi/reduction.h                      |  28 ++---
 topi/include/topi/rocm/dense.h                     |  18 +--
 topi/include/topi/rocm/injective.h                 |  10 +-
 topi/include/topi/rocm/normalization.h             |   8 +-
 topi/include/topi/rocm/pooling.h                   |  15 ++-
 topi/include/topi/rocm/reduction.h                 |  10 +-
 topi/include/topi/rocm/softmax.h                   |  10 +-
 topi/include/topi/transform.h                      |  29 +++--
 topi/include/topi/vision/reorg.h                   |  17 ++-
 topi/include/topi/x86/bnn.h                        |  10 +-
 topi/include/topi/x86/default.h                    |  12 +-
 topi/include/topi/x86/injective.h                  |  12 +-
 topi/src/topi.cc                                   |  36 +++---
 123 files changed, 923 insertions(+), 939 deletions(-)
 rename include/tvm/{top => te}/operation.h (98%)
 rename include/tvm/{top => te}/schedule.h (99%)
 rename include/tvm/{top => te}/schedule_pass.h (92%)
 rename include/tvm/{top => te}/tensor.h (96%)
 rename include/tvm/{top => te}/tensor_intrin.h (96%)
 rename src/{top => te}/operation/compute_op.cc (98%)
 rename src/{top => te}/operation/compute_op.h (95%)
 rename src/{top => te}/operation/cross_thread_reduction.cc (99%)
 rename src/{top => te}/operation/extern_op.cc (98%)
 rename src/{top => te}/operation/hybrid_op.cc (98%)
 rename src/{top => te}/operation/hybrid_op.h (94%)
 rename src/{top => te}/operation/op_util.cc (98%)
 rename src/{top => te}/operation/op_util.h (95%)
 rename src/{top => te}/operation/placeholder_op.cc (97%)
 rename src/{top => te}/operation/scan_op.cc (99%)
 rename src/{top => te}/operation/tensor_compute_op.cc (97%)
 rename src/{top => te}/operation/tensorize.cc (98%)
 rename src/{top => te}/schedule/auto_inline_elem_wise.cc (96%)
 rename src/{top => te}/schedule/bound.cc (98%)
 rename src/{top => te}/schedule/graph.cc (98%)
 rename src/{top => te}/schedule/graph.h (95%)
 rename src/{top => te}/schedule/message_passing.cc (99%)
 rename src/{top => te}/schedule/message_passing.h (95%)
 rename src/{top => te}/schedule/schedule_dataflow_rewrite.cc (98%)
 rename src/{top => te}/schedule/schedule_lang.cc (99%)
 rename src/{top => te}/schedule/schedule_ops.cc (99%)
 rename src/{top => te}/tensor.cc (97%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a9d9fc3..8540a66 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -128,7 +128,7 @@ file(GLOB_RECURSE COMPILER_SRCS
     src/node/*.cc
     src/ir/*.cc
     src/arith/*.cc
-    src/top/*.cc
+    src/te/*.cc
     src/autotvm/*.cc
     src/tir/*.cc
     src/driver/*.cc
diff --git a/include/tvm/arith/bound.h b/include/tvm/arith/bound.h
index 4d77e3a..6165a2a 100644
--- a/include/tvm/arith/bound.h
+++ b/include/tvm/arith/bound.h
@@ -33,7 +33,7 @@
 
 namespace tvm {
 // forward delcare Tensor
-namespace top {
+namespace te {
 class Tensor;
 }
 namespace arith {
@@ -84,7 +84,7 @@ IntSet DeduceBound(PrimExpr v, PrimExpr cond,
  * \return The domain that covers all the calls or provides within the given statement.
  */
 Domain DomainTouched(Stmt body,
-                     const top::Tensor &tensor,
+                     const te::Tensor &tensor,
                      bool consider_calls,
                      bool consider_provides);
 
diff --git a/include/tvm/driver/driver.h b/include/tvm/driver/driver.h
index 7541b11..e495256 100644
--- a/include/tvm/driver/driver.h
+++ b/include/tvm/driver/driver.h
@@ -32,7 +32,7 @@
 #include <tvm/runtime/packed_func.h>
 #include <tvm/target/target.h>
 #include <tvm/support/with.h>
-#include <tvm/top/schedule_pass.h>
+#include <tvm/te/schedule_pass.h>
 #include <tvm/tir/lowered_func.h>
 
 #include <string>
@@ -52,10 +52,10 @@ namespace tvm {
 * \return The lowered function.
 */
 TVM_DLL Array<tir::LoweredFunc> lower(
-    top::Schedule sch,
-    const Array<top::Tensor>& args,
+    te::Schedule sch,
+    const Array<te::Tensor>& args,
     const std::string& name,
-    const std::unordered_map<top::Tensor, tir::Buffer>& binds,
+    const std::unordered_map<te::Tensor, tir::Buffer>& binds,
     const BuildConfig& config);
 /*!
 * \brief Split host/device function and running necessary pass before build
diff --git a/include/tvm/relay/op_attr_types.h b/include/tvm/relay/op_attr_types.h
index c480fcd..88e948f 100644
--- a/include/tvm/relay/op_attr_types.h
+++ b/include/tvm/relay/op_attr_types.h
@@ -24,8 +24,8 @@
 #ifndef TVM_RELAY_OP_ATTR_TYPES_H_
 #define TVM_RELAY_OP_ATTR_TYPES_H_
 
-#include <tvm/top/tensor.h>
-#include <tvm/top/schedule.h>
+#include <tvm/te/tensor.h>
+#include <tvm/te/schedule.h>
 #include <tvm/relay/type.h>
 #include <tvm/relay/expr.h>
 #include <tvm/target/target.h>
@@ -104,8 +104,8 @@ using TShapeDataDependant = bool;
  * \return The output compute description of the operator.
  */
 using FTVMCompute = runtime::TypedPackedFunc<
-  Array<top::Tensor>(const Attrs& attrs,
-                     const Array<top::Tensor>& inputs,
+  Array<te::Tensor>(const Attrs& attrs,
+                     const Array<te::Tensor>& inputs,
                      const Type& out_type,
                      const Target& target)>;
 
@@ -119,8 +119,8 @@ using FTVMCompute = runtime::TypedPackedFunc<
  * \return schedule The computation schedule.
  */
 using FTVMSchedule = runtime::TypedPackedFunc<
-  top::Schedule(const Attrs& attrs,
-                const Array<top::Tensor>& outs,
+  te::Schedule(const Attrs& attrs,
+                const Array<te::Tensor>& outs,
                 const Target& target)>;
 
 /*!
@@ -136,7 +136,7 @@ using FTVMSchedule = runtime::TypedPackedFunc<
 using FTVMAlterOpLayout = runtime::TypedPackedFunc<
   Expr(const Attrs& attrs,
        const Array<Expr>& args,
-       const Array<top::Tensor>& tinfos)>;
+       const Array<te::Tensor>& tinfos)>;
 
 /*!
  * \brief Convert the layout of operators or replace the
@@ -152,7 +152,7 @@ using FTVMAlterOpLayout = runtime::TypedPackedFunc<
 using FTVMConvertOpLayout = runtime::TypedPackedFunc<
   Expr(const Attrs& attrs,
        const Array<Expr>& args,
-       const Array<top::Tensor>& tinfos,
+       const Array<te::Tensor>& tinfos,
        const std::string& desired_layout)>;
 /*!
  * \brief Legalizes an expression with another expression. This function will be
@@ -211,8 +211,8 @@ enum AnyCodegenStrategy {
 using Shape = Array<IndexExpr>;
 
 using FShapeFunc = runtime::TypedPackedFunc<
-  Array<top::Tensor>(const Attrs& attrs,
-                     const Array<top::Tensor>& inputs,
+  Array<te::Tensor>(const Attrs& attrs,
+                     const Array<te::Tensor>& inputs,
                      const Array<IndexExpr>& out_ndims)>;
 
 }  // namespace relay
diff --git a/include/tvm/top/operation.h b/include/tvm/te/operation.h
similarity index 98%
rename from include/tvm/top/operation.h
rename to include/tvm/te/operation.h
index 1b138d0..2055899 100644
--- a/include/tvm/top/operation.h
+++ b/include/tvm/te/operation.h
@@ -18,15 +18,15 @@
  */
 
 /*!
- * \file tvm/top/operation.h
+ * \file tvm/te/operation.h
  * \brief Operation node can generate one or multiple Tensors
  */
-#ifndef TVM_TOP_OPERATION_H_
-#define TVM_TOP_OPERATION_H_
+#ifndef TVM_TE_OPERATION_H_
+#define TVM_TE_OPERATION_H_
 
 #include <tvm/arith/analyzer.h>
-#include <tvm/top/tensor.h>
-#include <tvm/top/schedule.h>
+#include <tvm/te/tensor.h>
+#include <tvm/te/schedule.h>
 
 #include <tvm/tir/expr.h>
 #include <tvm/tir/op.h>
@@ -36,10 +36,9 @@
 #include <vector>
 #include <unordered_map>
 
-
-
 namespace tvm {
-namespace top {
+/*! \brief Tensor expression language DSL. */
+namespace te {
 
 /*!
  * \brief Temporary data structure to store union
@@ -679,6 +678,6 @@ inline Tensor compute(Array<PrimExpr> shape,
 inline const OperationNode* Operation::operator->() const {
   return static_cast<const OperationNode*>(get());
 }
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
-#endif  // TVM_TOP_OPERATION_H_
+#endif  // TVM_TE_OPERATION_H_
diff --git a/include/tvm/top/schedule.h b/include/tvm/te/schedule.h
similarity index 99%
rename from include/tvm/top/schedule.h
rename to include/tvm/te/schedule.h
index 5eaa02d..e99b54a 100644
--- a/include/tvm/top/schedule.h
+++ b/include/tvm/te/schedule.h
@@ -18,24 +18,22 @@
  */
 
 /*!
- * \file tvm/top/schedule.h
+ * \file tvm/te/schedule.h
  * \brief Define a schedule.
  */
 // Acknowledgement: Many schedule primitives originate from Halide and Loopy.
-#ifndef TVM_TOP_SCHEDULE_H_
-#define TVM_TOP_SCHEDULE_H_
+#ifndef TVM_TE_SCHEDULE_H_
+#define TVM_TE_SCHEDULE_H_
 
 #include <tvm/tir/expr.h>
-#include <tvm/top/tensor.h>
-#include <tvm/top/tensor_intrin.h>
-
+#include <tvm/te/tensor.h>
+#include <tvm/te/tensor_intrin.h>
 
 #include <string>
 #include <unordered_map>
 
-
 namespace tvm {
-namespace top {
+namespace te {
 // Node container for Stage
 class StageNode;
 // Node container for Schedule
@@ -767,6 +765,6 @@ inline const IterVarRelationNode* IterVarRelation::operator->() const {
 inline const IterVarAttrNode* IterVarAttr::operator->() const {
   return static_cast<const IterVarAttrNode*>(get());
 }
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
-#endif  // TVM_TOP_SCHEDULE_H_
+#endif  // TVM_TE_SCHEDULE_H_
diff --git a/include/tvm/top/schedule_pass.h b/include/tvm/te/schedule_pass.h
similarity index 92%
rename from include/tvm/top/schedule_pass.h
rename to include/tvm/te/schedule_pass.h
index eacc9cd..b3ecbf8 100644
--- a/include/tvm/top/schedule_pass.h
+++ b/include/tvm/te/schedule_pass.h
@@ -18,20 +18,20 @@
  */
 
 /*!
- * \file tvm/top/schedule_pass.h
+ * \file tvm/te/schedule_pass.h
  * \brief  Collection of Schedule pass functions.
  *
  *  These passes works on the schedule hyper-graph
  *  and infers information such as bounds, check conditions
  *  read/write dependencies between the IterVar
  */
-#ifndef TVM_TOP_SCHEDULE_PASS_H_
-#define TVM_TOP_SCHEDULE_PASS_H_
+#ifndef TVM_TE_SCHEDULE_PASS_H_
+#define TVM_TE_SCHEDULE_PASS_H_
 
-#include <tvm/top/schedule.h>
+#include <tvm/te/schedule.h>
 
 namespace tvm {
-namespace top {
+namespace te {
 
 /*!
  * \brief Infer the bound of all iteration variables relates to the schedule.
@@ -71,6 +71,6 @@ void AutoInlineElemWise(Schedule sch);
  */
 TVM_DLL void AutoInlineInjective(Schedule sch);
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
-#endif  // TVM_TOP_SCHEDULE_PASS_H_
+#endif  // TVM_TE_SCHEDULE_PASS_H_
diff --git a/include/tvm/top/tensor.h b/include/tvm/te/tensor.h
similarity index 96%
rename from include/tvm/top/tensor.h
rename to include/tvm/te/tensor.h
index 722ed50..c247dca 100644
--- a/include/tvm/top/tensor.h
+++ b/include/tvm/te/tensor.h
@@ -18,11 +18,11 @@
  */
 
 /*!
- * \file tvm/top/tensor.h
+ * \file tvm/te/tensor.h
  * \brief Dataflow tensor object
  */
-#ifndef TVM_TOP_TENSOR_H_
-#define TVM_TOP_TENSOR_H_
+#ifndef TVM_TE_TENSOR_H_
+#define TVM_TE_TENSOR_H_
 
 #include <tvm/node/container.h>
 #include <tvm/arith/bound.h>
@@ -34,10 +34,8 @@
 #include <utility>
 #include <type_traits>
 
-
-
 namespace tvm {
-namespace top {
+namespace te {
 
 using arith::IntSet;
 using namespace tvm::tir;
@@ -251,17 +249,17 @@ DEFINE_OVERLOAD_SLICE_BINARY_OP(<<);
 DEFINE_OVERLOAD_SLICE_BINARY_OP(>);  // NOLINT(*)
 DEFINE_OVERLOAD_SLICE_BINARY_OP(<);  // NOLINT(*)
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
 
 namespace std {
 template <>
-struct hash<::tvm::top::Operation> : public ::tvm::ObjectHash {
+struct hash<::tvm::te::Operation> : public ::tvm::ObjectHash {
 };
 
 template <>
-struct hash<::tvm::top::Tensor> {
-  std::size_t operator()(const ::tvm::top::Tensor& k) const {
+struct hash<::tvm::te::Tensor> {
+  std::size_t operator()(const ::tvm::te::Tensor& k) const {
     ::tvm::ObjectHash hasher;
     if (k.defined() && k->op.defined()) {
       return hasher(k->op);
@@ -271,4 +269,4 @@ struct hash<::tvm::top::Tensor> {
   }
 };
 }  // namespace std
-#endif  // TVM_TOP_TENSOR_H_
+#endif  // TVM_TE_TENSOR_H_
diff --git a/include/tvm/top/tensor_intrin.h b/include/tvm/te/tensor_intrin.h
similarity index 96%
rename from include/tvm/top/tensor_intrin.h
rename to include/tvm/te/tensor_intrin.h
index d216ecc..c964d3e 100644
--- a/include/tvm/top/tensor_intrin.h
+++ b/include/tvm/te/tensor_intrin.h
@@ -18,20 +18,19 @@
  */
 
 /*!
- * \file tvm/top/tensor_intrin.h
+ * \file tvm/te/tensor_intrin.h
  * \brief Tensor intrinsic operations.
  */
-#ifndef TVM_TOP_TENSOR_INTRIN_H_
-#define TVM_TOP_TENSOR_INTRIN_H_
+#ifndef TVM_TE_TENSOR_INTRIN_H_
+#define TVM_TE_TENSOR_INTRIN_H_
 
-#include <tvm/top/tensor.h>
+#include <tvm/te/tensor.h>
 #include <tvm/tir/buffer.h>
 
 #include <string>
 
-
 namespace tvm {
-namespace top {
+namespace te {
 
 // Internal node container of tensor intrinsics.
 class TensorIntrinNode;
@@ -176,6 +175,6 @@ inline const TensorIntrinCallNode* TensorIntrinCall::operator->() const {
   return static_cast<const TensorIntrinCallNode*>(get());
 }
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
-#endif  // TVM_TOP_TENSOR_INTRIN_H_
+#endif  // TVM_TE_TENSOR_INTRIN_H_
diff --git a/include/tvm/tir/ir_pass.h b/include/tvm/tir/ir_pass.h
index ae1f35c..3a8d62c 100644
--- a/include/tvm/tir/ir_pass.h
+++ b/include/tvm/tir/ir_pass.h
@@ -27,7 +27,7 @@
 #ifndef TVM_TIR_IR_PASS_H_
 #define TVM_TIR_IR_PASS_H_
 
-#include <tvm/top/schedule.h>
+#include <tvm/te/schedule.h>
 #include <tvm/tir/expr.h>
 #include <tvm/tir/buffer.h>
 #include <tvm/tir/lowered_func.h>
@@ -205,7 +205,7 @@ Stmt Inline(Stmt stmt,
  * \return Transformed stmt.
  */
 Stmt StorageFlatten(Stmt stmt,
-                    Map<top::Tensor, Buffer> extern_buffer,
+                    Map<te::Tensor, Buffer> extern_buffer,
                     int cache_line_size,
                     bool create_bound_attribute = false);
 
@@ -219,8 +219,8 @@ Stmt StorageFlatten(Stmt stmt,
  * \return Transformed stmt.
  */
 Stmt RewriteForTensorCore(Stmt stmt,
-                          top::Schedule schedule,
-                          Map<top::Tensor, Buffer> extern_buffer);
+                          te::Schedule schedule,
+                          Map<te::Tensor, Buffer> extern_buffer);
 
 /*!
  * \brief Verify if there is any argument bound to compact buffer.
diff --git a/src/api/api_arith.cc b/src/api/api_arith.cc
index f5232d8..6ac12b1 100644
--- a/src/api/api_arith.cc
+++ b/src/api/api_arith.cc
@@ -30,7 +30,7 @@
 #include <tvm/tir/expr.h>
 #include <tvm/runtime/registry.h>
 
-#include <tvm/top/tensor.h>
+#include <tvm/te/tensor.h>
 
 namespace tvm {
 namespace arith {
diff --git a/src/api/api_base.cc b/src/api/api_base.cc
index d1d3fb0..48245fa 100644
--- a/src/api/api_base.cc
+++ b/src/api/api_base.cc
@@ -23,7 +23,7 @@
  */
 #include <dmlc/memory_io.h>
 #include <tvm/tir/expr.h>
-#include <tvm/top/tensor.h>
+#include <tvm/te/tensor.h>
 #include <tvm/runtime/registry.h>
 #include <tvm/node/serialization.h>
 
diff --git a/src/api/api_lang.cc b/src/api/api_lang.cc
index 5baa61c..4bab969 100644
--- a/src/api/api_lang.cc
+++ b/src/api/api_lang.cc
@@ -23,10 +23,10 @@
  */
 #include <tvm/tir/expr.h>
 #include <tvm/tir/expr.h>
-#include <tvm/top/tensor.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/tensor.h>
+#include <tvm/te/operation.h>
 #include <tvm/tir/buffer.h>
-#include <tvm/top/schedule.h>
+#include <tvm/te/schedule.h>
 #include <tvm/runtime/registry.h>
 
 #include <tvm/driver/driver.h>
@@ -276,7 +276,7 @@ TVM_REGISTER_GLOBAL("_BijectiveLayoutBackwardShape")
 .set_body_method(&BijectiveLayout::BackwardShape);
 }  // namespace tir
 
-namespace top {
+namespace te {
 TVM_REGISTER_GLOBAL("_Tensor")
 .set_body_typed(TensorNode::make);
 
@@ -444,7 +444,7 @@ TVM_REGISTER_GLOBAL("_ScheduleCacheWrite")
 
 TVM_REGISTER_GLOBAL("_ScheduleRFactor")
 .set_body_method(&Schedule::rfactor);
-}  // namespace top
+}  // namespace te
 
 TVM_REGISTER_GLOBAL("_CommReducerCombine")
 .set_body_method<tir::CommReducer>(&tir::CommReducerNode::operator());
diff --git a/src/api/api_pass.cc b/src/api/api_pass.cc
index 2fca435..75d5439 100644
--- a/src/api/api_pass.cc
+++ b/src/api/api_pass.cc
@@ -96,8 +96,8 @@ TVM_REGISTER_GLOBAL("ir_pass.StorageFlatten")
 TVM_REGISTER_GLOBAL("ir_pass.RewriteForTensorCore")
 .set_body_typed
   ([](const Stmt& stmt,
-      const top::Schedule& schedule,
-      const Map<top::Tensor, Buffer>& extern_buffer) {
+      const te::Schedule& schedule,
+      const Map<te::Tensor, Buffer>& extern_buffer) {
       return RewriteForTensorCore(stmt, schedule, extern_buffer);
   });
 
diff --git a/src/api/api_schedule.cc b/src/api/api_schedule.cc
index 19a8414..4a57376 100644
--- a/src/api/api_schedule.cc
+++ b/src/api/api_schedule.cc
@@ -22,15 +22,15 @@
  * \file api_schedule.cc
  */
 #include <tvm/tir/expr.h>
-#include <tvm/top/tensor.h>
-#include <tvm/top/schedule.h>
-#include <tvm/top/schedule_pass.h>
+#include <tvm/te/tensor.h>
+#include <tvm/te/schedule.h>
+#include <tvm/te/schedule_pass.h>
 #include <tvm/runtime/registry.h>
 
-#include "../top/schedule/graph.h"
+#include "../te/schedule/graph.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 
 TVM_REGISTER_GLOBAL("schedule.AutoInlineElemWise")
 .set_body_typed(AutoInlineElemWise);
@@ -59,5 +59,5 @@ REGISTER_SCHEDULE_PASS(CreateAttachPath);
 REGISTER_SCHEDULE_PASS(ScanGetBody);
 REGISTER_SCHEDULE_PASS(ScanFixPointAnalysis);
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/api/api_test.cc b/src/api/api_test.cc
index 24934db..9fbe04e 100644
--- a/src/api/api_test.cc
+++ b/src/api/api_test.cc
@@ -22,7 +22,7 @@
  * \file api_test.cc
  */
 #include <tvm/tir/expr.h>
-#include <tvm/top/tensor.h>
+#include <tvm/te/tensor.h>
 #include <tvm/ir/attrs.h>
 #include <tvm/runtime/registry.h>
 #include <tvm/ir/env_func.h>
diff --git a/src/arith/domain_touched.cc b/src/arith/domain_touched.cc
index 71a92d8..aa1ba4e 100644
--- a/src/arith/domain_touched.cc
+++ b/src/arith/domain_touched.cc
@@ -24,7 +24,7 @@
 #include <tvm/tir/expr.h>
 #include <tvm/tir/ir_pass.h>
 #include <tvm/tir/stmt_functor.h>
-#include <tvm/top/tensor.h>
+#include <tvm/te/tensor.h>
 #include <tvm/runtime/registry.h>
 
 #include <unordered_set>
@@ -38,7 +38,7 @@ using namespace tir;
 // Find Read region of the tensor in the stmt.
 class FuncTouchedDomain final : public StmtExprVisitor {
  public:
-  FuncTouchedDomain(const top::Tensor &tensor, bool consider_calls, bool consider_provides)
+  FuncTouchedDomain(const te::Tensor &tensor, bool consider_calls, bool consider_provides)
     : tensor_(tensor), consider_calls_(consider_calls), consider_provides_(consider_provides)  {}
 
   Domain Find(const Stmt& stmt) {
@@ -106,14 +106,14 @@ class FuncTouchedDomain final : public StmtExprVisitor {
     }
   }
 
-  const top::Tensor &tensor_;
+  const te::Tensor &tensor_;
   bool consider_calls_, consider_provides_;
   std::vector<std::vector<IntSet> > bounds_;
   std::unordered_map<const VarNode*, IntSet> dom_map_;
 };
 
 Domain DomainTouched(Stmt stmt,
-                     const top::Tensor &tensor,
+                     const te::Tensor &tensor,
                      bool consider_calls,
                      bool consider_provides) {
   return FuncTouchedDomain(tensor, consider_calls, consider_provides).Find(stmt);
diff --git a/src/contrib/hybrid/codegen_hybrid.h b/src/contrib/hybrid/codegen_hybrid.h
index fe84099..6491491 100644
--- a/src/contrib/hybrid/codegen_hybrid.h
+++ b/src/contrib/hybrid/codegen_hybrid.h
@@ -28,7 +28,7 @@
 #include <tvm/tir/stmt_functor.h>
 #include <tvm/target/codegen.h>
 #include <tvm/tir/lowered_func.h>
-#include <tvm/top/schedule.h>
+#include <tvm/te/schedule.h>
 #include <map>
 #include <string>
 #include <unordered_map>
@@ -38,7 +38,7 @@
 namespace tvm {
 namespace contrib {
 
-using namespace top;
+using namespace te;
 using namespace tir;
 /*!
  * \brief A base class to generate Hybrid Script.
diff --git a/src/driver/driver.cc b/src/driver/driver.cc
index c1facfa..ae017a3 100644
--- a/src/driver/driver.cc
+++ b/src/driver/driver.cc
@@ -23,7 +23,7 @@
  */
 #include <dmlc/thread_local.h>
 #include <tvm/driver/driver.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/tir/ir_pass.h>
 #include <tvm/target/codegen.h>
 #include <tvm/runtime/registry.h>
@@ -86,10 +86,10 @@ tir::Buffer BufferWithOffsetAlignment(Array<PrimExpr> shape,
     data_alignment, offset_factor, buffer_type);
 }
 
-void GetBinds(const Array<top::Tensor>& args,
+void GetBinds(const Array<te::Tensor>& args,
               bool compact,
-              const std::unordered_map<top::Tensor, tir::Buffer>& binds,
-              Map<top::Tensor, tir::Buffer>* out_binds,
+              const std::unordered_map<te::Tensor, tir::Buffer>& binds,
+              Map<te::Tensor, tir::Buffer>* out_binds,
               Array<ObjectRef>* out_arg_list,
               const BuildConfig& config) {
   *out_binds = binds;
@@ -116,21 +116,21 @@ void GetBinds(const Array<top::Tensor>& args,
 * \param config The build configuration.
 * \return The built Stmt.
 */
-tir::Stmt BuildStmt(top::Schedule sch,
-                    const Array<top::Tensor>& args,
-                    const std::unordered_map<top::Tensor, tir::Buffer>& binds,
+tir::Stmt BuildStmt(te::Schedule sch,
+                    const Array<te::Tensor>& args,
+                    const std::unordered_map<te::Tensor, tir::Buffer>& binds,
                     bool loop_partition,
                     Array<ObjectRef> *out_arg_list,
                     const BuildConfig& config) {
   sch = sch.normalize();
 
   // Phase 0
-  auto bounds = top::InferBound(sch);
-  auto stmt = top::ScheduleOps(sch, bounds, false);
+  auto bounds = te::InferBound(sch);
+  auto stmt = te::ScheduleOps(sch, bounds, false);
   stmt = tir::InjectPrefetch(stmt);
 
   bool compact = tir::VerifyCompactBuffer(stmt);
-  Map<top::Tensor, tir::Buffer> out_binds;
+  Map<te::Tensor, tir::Buffer> out_binds;
   GetBinds(args, compact, binds, &out_binds, out_arg_list, config);
 
   // Phase 1
@@ -164,10 +164,10 @@ tir::Stmt BuildStmt(top::Schedule sch,
   return stmt;
 }
 
-Array<LoweredFunc> lower(top::Schedule sch,
-                         const Array<top::Tensor>& args,
+Array<LoweredFunc> lower(te::Schedule sch,
+                         const Array<te::Tensor>& args,
                          const std::string& name,
-                         const std::unordered_map<top::Tensor, tir::Buffer>& binds,
+                         const std::unordered_map<te::Tensor, tir::Buffer>& binds,
                          const BuildConfig& config) {
   Array<ObjectRef> out_arg_list;
   auto stmt = BuildStmt(sch, args, binds, true, &out_arg_list, config);
diff --git a/src/ir/expr.cc b/src/ir/expr.cc
index f194a38..c061587 100644
--- a/src/ir/expr.cc
+++ b/src/ir/expr.cc
@@ -28,7 +28,7 @@
 // and are only used in minimum cases where they are clearly marked.
 //
 // Rationale: convert from IterVar and top::Tensor
-#include <tvm/top/tensor.h>
+#include <tvm/te/tensor.h>
 #include <tvm/tir/expr.h>
 
 namespace tvm {
@@ -47,8 +47,8 @@ PrimExpr PrimExpr::FromObject_(ObjectPtr<Object> ptr) {
   if (ptr->IsInstance<tir::IterVarNode>()) {
     return tir::IterVar(ptr)->var;
   }
-  if (ptr->IsInstance<top::TensorNode>()) {
-    return top::Tensor(ptr)();
+  if (ptr->IsInstance<te::TensorNode>()) {
+    return te::Tensor(ptr)();
   }
   CHECK(ObjectTypeChecker<PrimExpr>::Check(ptr.get()))
       << "Expect type " << ObjectTypeChecker<PrimExpr>::TypeName()
diff --git a/src/relay/backend/compile_engine.cc b/src/relay/backend/compile_engine.cc
index 8ba6eb4..96e9a69 100644
--- a/src/relay/backend/compile_engine.cc
+++ b/src/relay/backend/compile_engine.cc
@@ -22,9 +22,9 @@
  * \brief Internal compialtion engine.
  */
 #include <tvm/ir/type_functor.h>
-#include <tvm/top/schedule.h>
-#include <tvm/top/operation.h>
-#include <tvm/top/schedule_pass.h>
+#include <tvm/te/schedule.h>
+#include <tvm/te/operation.h>
+#include <tvm/te/schedule_pass.h>
 #include <tvm/runtime/registry.h>
 #include <tvm/relay/attrs/device_copy.h>
 #include <tvm/relay/analysis.h>
@@ -103,20 +103,20 @@ Array<IndexExpr> GetShape(const Array<IndexExpr>& shape) {
 // The getter to get schedule from compile engine.
 // Get schedule from functor.
 class ScheduleGetter :
-      public ExprFunctor<Array<top::Tensor>(const Expr&)> {
+      public ExprFunctor<Array<te::Tensor>(const Expr&)> {
  public:
   explicit ScheduleGetter(Target target)
       : target_(target), device_copy_op_(Op::Get("device_copy")) {}
 
-  std::pair<top::Schedule, CachedFunc> Create(const Function& prim_func) {
+  std::pair<te::Schedule, CachedFunc> Create(const Function& prim_func) {
     static auto fschedule =
         Op::GetAttr<FTVMSchedule>("FTVMSchedule");
     auto cache_node = make_object<CachedFuncNode>();
     cache_node->target = target_;
     for (Var param : prim_func->params) {
-      Array<tvm::top::Tensor> inputs;
+      Array<tvm::te::Tensor> inputs;
       if (const auto* ttype = param->checked_type().as<TensorTypeNode>()) {
-        tvm::top::Tensor tensor = tvm::top::placeholder(
+        tvm::te::Tensor tensor = tvm::te::placeholder(
             GetShape(ttype->shape), ttype->dtype);
         cache_node->inputs.push_back(tensor);
         inputs.push_back(tensor);
@@ -127,7 +127,7 @@ class ScheduleGetter :
           const auto* ttype = field.as<TensorTypeNode>();
           // TODO(@icemelon): Allow recursive tuple
           CHECK(ttype != nullptr);
-          tvm::top::Tensor tensor = tvm::top::placeholder(
+          tvm::te::Tensor tensor = tvm::te::placeholder(
               GetShape(ttype->shape), ttype->dtype);
           cache_node->inputs.push_back(tensor);
           inputs.push_back(tensor);
@@ -152,13 +152,13 @@ class ScheduleGetter :
     // Fusion over tupled results may leave identity relationships
     // between inputs and outputs, and those should not be scheduled.
     // Hence schedule only non PlaceholderOp outputs.
-    tvm::Array<top::Tensor> tensor_outs;
+    tvm::Array<te::Tensor> tensor_outs;
     for (const auto& tensor : cache_node->outputs) {
-      if (!tensor->op.as<top::PlaceholderOpNode>()) {
+      if (!tensor->op.as<te::PlaceholderOpNode>()) {
         tensor_outs.push_back(tensor);
       }
     }
-    top::Schedule schedule;
+    te::Schedule schedule;
     // No need to register schedule for device copy op.
     if (master_attrs_.as<DeviceCopyAttrs>() == nullptr) {
       schedule =
@@ -172,28 +172,28 @@ class ScheduleGetter :
     return std::make_pair(schedule, cfunc);
   }
 
-  Array<top::Tensor> VisitExpr(const Expr& expr) {
+  Array<te::Tensor> VisitExpr(const Expr& expr) {
     auto it = memo_.find(expr);
     if (it != memo_.end()) {
       return it->second;
     } else {
-      Array<top::Tensor> res = ExprFunctor::VisitExpr(expr);
+      Array<te::Tensor> res = ExprFunctor::VisitExpr(expr);
       memo_[expr] = res;
       return res;
     }
   }
 
-  Array<top::Tensor> VisitExpr_(const VarNode* op) final {
+  Array<te::Tensor> VisitExpr_(const VarNode* op) final {
     LOG(FATAL) << "Free variable " << op->name_hint();
     return {};
   }
 
-  Array<top::Tensor> VisitExpr_(const ConstantNode* op) final {
+  Array<te::Tensor> VisitExpr_(const ConstantNode* op) final {
     using tir::make_const;
     CHECK(op->is_scalar());
     void* data = op->data->data;
     DataType dtype = DataType(op->data->dtype);
-    auto value = top::compute({}, [&](const Array<tvm::tir::Var>&) {
+    auto value = te::compute({}, [&](const Array<tvm::tir::Var>&) {
         if (dtype == DataType::Int(32)) {
           return make_const(dtype, static_cast<const int32_t*>(data)[0]);
         } else if (dtype == DataType::Int(64)) {
@@ -213,19 +213,19 @@ class ScheduleGetter :
     return {value};
   }
 
-  Array<top::Tensor> VisitExpr_(const CallNode* call_node) final {
+  Array<te::Tensor> VisitExpr_(const CallNode* call_node) final {
     static auto fcompute =
         Op::GetAttr<FTVMCompute>("FTVMCompute");
     static auto fpattern =
         Op::GetAttr<TOpPattern>("TOpPattern");
 
-    Array<top::Tensor> inputs;
+    Array<te::Tensor> inputs;
     int count_tuple = 0;
     for (Expr arg : call_node->args) {
       if (arg->checked_type().as<TupleTypeNode>()) {
         ++count_tuple;
       }
-      for (top::Tensor tensor : VisitExpr(arg)) {
+      for (te::Tensor tensor : VisitExpr(arg)) {
         inputs.push_back(tensor);
       }
     }
@@ -255,12 +255,12 @@ class ScheduleGetter :
     CHECK(call_node->op.as<OpNode>())
         << "Primitive function only allows call into primitive ops";
     Op op = Downcast<Op>(call_node->op);
-    Array<top::Tensor> outputs;
+    Array<te::Tensor> outputs;
     // Skip fcompute for device copy operators as it is not registered.
     if (op == device_copy_op_) {
       const auto* copy_input = inputs[0].operator->();
-      outputs.push_back(top::TensorNode::make(copy_input->shape, copy_input->dtype,
-                                         top::Operation(), 0));
+      outputs.push_back(te::TensorNode::make(copy_input->shape, copy_input->dtype,
+                                         te::Operation(), 0));
     } else {
       outputs = fcompute[op](call_node->attrs, inputs,
                              call_node_type, target_);
@@ -294,33 +294,33 @@ class ScheduleGetter :
     return outputs;
   }
 
-  Array<top::Tensor> VisitExpr_(const FunctionNode* op) final {
+  Array<te::Tensor> VisitExpr_(const FunctionNode* op) final {
     LOG(FATAL) << "Do not support sub function";
-    return Array<top::Tensor>();
+    return Array<te::Tensor>();
   }
 
-  Array<top::Tensor> VisitExpr_(const LetNode* op) final {
-    Array<top::Tensor> val = VisitExpr(op->value);
+  Array<te::Tensor> VisitExpr_(const LetNode* op) final {
+    Array<te::Tensor> val = VisitExpr(op->value);
     CHECK(!memo_.count(op->var));
     memo_[op->var] = val;
     return VisitExpr(op->body);
   }
 
-  Array<top::Tensor> VisitExpr_(const TupleNode* op) final {
-    Array<top::Tensor> fields;
+  Array<te::Tensor> VisitExpr_(const TupleNode* op) final {
+    Array<te::Tensor> fields;
     for (Expr field : op->fields) {
       CHECK(field->checked_type().as<TensorTypeNode>())
           << "Only allow Tuple of Tensor";
-      Array<top::Tensor> res = VisitExpr(field);
+      Array<te::Tensor> res = VisitExpr(field);
       CHECK_EQ(res.size(), 1);
       fields.push_back(res[0]);
     }
     return fields;
   }
 
-  Array<top::Tensor> VisitExpr_(const TupleGetItemNode* op) final {
+  Array<te::Tensor> VisitExpr_(const TupleGetItemNode* op) final {
     const auto* tuple_type = op->tuple->type_as<TupleTypeNode>();
-    Array<top::Tensor> tuple = VisitExpr(op->tuple);
+    Array<te::Tensor> tuple = VisitExpr(op->tuple);
     CHECK_EQ(tuple_type->fields.size(), tuple.size());
     CHECK_GE(op->index, 0);
     CHECK_LT(static_cast<size_t>(op->index), tuple.size());
@@ -333,28 +333,28 @@ class ScheduleGetter :
   Attrs master_attrs_;
   int master_op_pattern_{0};
   std::ostringstream readable_name_stream_;
-  std::unordered_map<Expr, Array<top::Tensor>, ObjectHash, ObjectEqual> memo_;
-  Array<top::Operation> scalars_;
+  std::unordered_map<Expr, Array<te::Tensor>, ObjectHash, ObjectEqual> memo_;
+  Array<te::Operation> scalars_;
   // Cache device copy op for equivalence checking to reduce registry lookup
   // overhead for each invocation of call node when retrieving schedules.
   const Op& device_copy_op_;
 };
 
 // Creates shape function from functor.
-class MakeShapeFunc : public ExprFunctor<Array<top::Tensor>(const Expr&)> {
+class MakeShapeFunc : public ExprFunctor<Array<te::Tensor>(const Expr&)> {
  public:
   MakeShapeFunc() {}
 
-  std::pair<top::Schedule, CachedFunc> Create(const Function& prim_func) {
+  std::pair<te::Schedule, CachedFunc> Create(const Function& prim_func) {
     for (auto param : prim_func->params) {
       param_states_[param] = kNoNeed;
-      Array<tvm::top::Tensor> data_inputs;
-      Array<tvm::top::Tensor> shape_inputs;
+      Array<tvm::te::Tensor> data_inputs;
+      Array<tvm::te::Tensor> shape_inputs;
 
       auto add_placeholder = [&data_inputs, &shape_inputs](const TensorTypeNode* ttype) {
         // Add data placeholder
         Shape shape = GetShape(ttype->shape);
-        tvm::top::Tensor data_tensor = tvm::top::placeholder(shape, ttype->dtype);
+        tvm::te::Tensor data_tensor = tvm::te::placeholder(shape, ttype->dtype);
         data_inputs.push_back(data_tensor);
         // Add shape placeholder
         int64_t ndim = shape.size();
@@ -362,7 +362,7 @@ class MakeShapeFunc : public ExprFunctor<Array<top::Tensor>(const Expr&)> {
         if (ndim > 0) {
           sshape.push_back(tvm::Integer(ndim));
         }
-        tvm::top::Tensor shape_tensor = tvm::top::placeholder(sshape, DataType::Int(64));
+        tvm::te::Tensor shape_tensor = tvm::te::placeholder(sshape, DataType::Int(64));
         shape_inputs.push_back(shape_tensor);
       };
 
@@ -413,12 +413,12 @@ class MakeShapeFunc : public ExprFunctor<Array<top::Tensor>(const Expr&)> {
 
     CachedFunc cfunc(cache_node);
     // generate schedule for shape func
-    Array<top::Operation> out_ops;
+    Array<te::Operation> out_ops;
     for (auto t : cache_node->outputs) {
       out_ops.push_back(t->op);
     }
-    auto schedule = top::create_schedule(out_ops);
-    tvm::top::AutoInlineInjective(schedule);
+    auto schedule = te::create_schedule(out_ops);
+    tvm::te::AutoInlineInjective(schedule);
     for (const auto& scalar : scalars_) {
       auto scalar_op = scalar->op;
       if (schedule->Contain(scalar_op)) {
@@ -428,12 +428,12 @@ class MakeShapeFunc : public ExprFunctor<Array<top::Tensor>(const Expr&)> {
     return std::make_pair(schedule, cfunc);
   }
 
-  Array<top::Tensor> VisitExpr(const Expr& expr) {
+  Array<te::Tensor> VisitExpr(const Expr& expr) {
     auto it = memo_.find(expr);
     if (it != memo_.end()) {
       return it->second;
     } else {
-      Array<top::Tensor> res = ExprFunctor::VisitExpr(expr);
+      Array<te::Tensor> res = ExprFunctor::VisitExpr(expr);
       if (expr.as<VarNode>() == nullptr) {
         // Do not memoize vars because shape functions could use either the data
         // or the shape of a var each time.
@@ -443,7 +443,7 @@ class MakeShapeFunc : public ExprFunctor<Array<top::Tensor>(const Expr&)> {
     }
   }
 
-  Array<top::Tensor> VisitExpr_(const VarNode* var_node) final {
+  Array<te::Tensor> VisitExpr_(const VarNode* var_node) final {
     auto var = GetRef<Var>(var_node);
     auto it = param_states_.find(var);
     if (it == param_states_.end()) {
@@ -462,7 +462,7 @@ class MakeShapeFunc : public ExprFunctor<Array<top::Tensor>(const Expr&)> {
     }
   }
 
-  Array<top::Tensor> VisitExpr_(const ConstantNode* op) final {
+  Array<te::Tensor> VisitExpr_(const ConstantNode* op) final {
     using tir::make_const;
     CHECK(data_dependants_.size());
     CHECK(op->is_scalar());
@@ -470,7 +470,7 @@ class MakeShapeFunc : public ExprFunctor<Array<top::Tensor>(const Expr&)> {
     if (data_dependant) {
       void* data = op->data->data;
       DataType dtype = DataType(op->data->dtype);
-      auto value = tvm::top::compute({}, [&](const Array<tvm::tir::Var>&) {
+      auto value = tvm::te::compute({}, [&](const Array<tvm::tir::Var>&) {
           if (dtype == DataType::Int(32)) {
             return make_const(dtype, static_cast<const int32_t*>(data)[0]);
           } else if (dtype == DataType::Int(64)) {
@@ -489,7 +489,7 @@ class MakeShapeFunc : public ExprFunctor<Array<top::Tensor>(const Expr&)> {
       scalars_.push_back(value);
       return {value};
     } else {
-      auto value = tvm::top::compute({}, [&](const Array<tvm::tir::Var>&) {
+      auto value = tvm::te::compute({}, [&](const Array<tvm::tir::Var>&) {
           return tir::make_const(DataType::Int(64), 0);
       }, "shape_const", topi::kBroadcast);
       scalars_.push_back(value);
@@ -497,7 +497,7 @@ class MakeShapeFunc : public ExprFunctor<Array<top::Tensor>(const Expr&)> {
     }
   }
 
-  Array<top::Tensor> VisitExpr_(const CallNode* call_node) final {
+  Array<te::Tensor> VisitExpr_(const CallNode* call_node) final {
     static auto fshape_func = Op::GetAttr<FShapeFunc>("FShapeFunc");
     static auto tshape_data_dependant = Op::GetAttr<TShapeDataDependant>(
         "TShapeDataDependant");
@@ -514,13 +514,13 @@ class MakeShapeFunc : public ExprFunctor<Array<top::Tensor>(const Expr&)> {
 
     data_dependants_.push_back(tshape_data_dependant[op]);
     // Visit all inputs
-    Array<top::Tensor> inputs;
+    Array<te::Tensor> inputs;
     int count_tuple = 0;
     for (Expr arg : call_node->args) {
       if (arg->checked_type().as<TupleTypeNode>()) {
         ++count_tuple;
       }
-      for (top::Tensor tensor : VisitExpr(arg)) {
+      for (te::Tensor tensor : VisitExpr(arg)) {
         inputs.push_back(tensor);
       }
     }
@@ -550,24 +550,24 @@ class MakeShapeFunc : public ExprFunctor<Array<top::Tensor>(const Expr&)> {
     return outputs;
   }
 
-  Array<top::Tensor> VisitExpr_(const FunctionNode* op) final {
+  Array<te::Tensor> VisitExpr_(const FunctionNode* op) final {
     LOG(FATAL) << "Do not support sub function";
-    return Array<top::Tensor>();
+    return Array<te::Tensor>();
   }
 
-  Array<top::Tensor> VisitExpr_(const LetNode* op) final {
-    Array<top::Tensor> val = VisitExpr(op->value);
+  Array<te::Tensor> VisitExpr_(const LetNode* op) final {
+    Array<te::Tensor> val = VisitExpr(op->value);
     CHECK(!memo_.count(op->var));
     memo_[op->var] = val;
     return VisitExpr(op->body);
   }
 
-  Array<top::Tensor> VisitExpr_(const TupleNode* op) final {
-    Array<top::Tensor> fields;
+  Array<te::Tensor> VisitExpr_(const TupleNode* op) final {
+    Array<te::Tensor> fields;
     for (Expr field : op->fields) {
       CHECK(field->checked_type().as<TensorTypeNode>())
         << "Only allow Tuple of Tensor";
-      Array<top::Tensor> res = VisitExpr(field);
+      Array<te::Tensor> res = VisitExpr(field);
       CHECK_EQ(res.size(), 1);
       fields.push_back(res[0]);
     }
@@ -580,15 +580,15 @@ class MakeShapeFunc : public ExprFunctor<Array<top::Tensor>(const Expr&)> {
   /*! \brief Map from parameter to its shape function usage state */
   std::unordered_map<Expr, int, ObjectHash, ObjectEqual> param_states_;
   /*! \brief Map from parameter to list of data placeholder */
-  std::unordered_map<Expr, Array<top::Tensor>, ObjectHash, ObjectEqual> param_data_;
+  std::unordered_map<Expr, Array<te::Tensor>, ObjectHash, ObjectEqual> param_data_;
   /*! \brief Map from parameter to list of shape placeholder */
-  std::unordered_map<Expr, Array<top::Tensor>, ObjectHash, ObjectEqual> param_shapes_;
+  std::unordered_map<Expr, Array<te::Tensor>, ObjectHash, ObjectEqual> param_shapes_;
   /*! \brief Memoized visit result */
-  std::unordered_map<Expr, Array<top::Tensor>, ObjectHash, ObjectEqual> memo_;
+  std::unordered_map<Expr, Array<te::Tensor>, ObjectHash, ObjectEqual> memo_;
   /*! \brief Stack of data dependencies for shape function */
   std::vector<bool> data_dependants_;
   /*! \brief Scalars used in the shape function */
-  Array<top::Tensor> scalars_;
+  Array<te::Tensor> scalars_;
 };
 
 class CompileEngineImpl : public CompileEngineNode {
@@ -677,7 +677,7 @@ class CompileEngineImpl : public CompileEngineNode {
    * \return Pair of schedule and cache.
    *  The funcs field in cache is not yet populated.
    */
-  std::pair<top::Schedule, CachedFunc> CreateSchedule(
+  std::pair<te::Schedule, CachedFunc> CreateSchedule(
       const Function& source_func, const Target& target) {
     return ScheduleGetter(target).Create(source_func);
   }
@@ -728,8 +728,8 @@ class CompileEngineImpl : public CompileEngineNode {
 
     cache_node->func_name = GetUniqueName(cache_node->func_name);
     // NOTE: array will copy on write.
-    Array<top::Tensor> all_args = cache_node->inputs;
-    for (top::Tensor arg : cache_node->outputs) {
+    Array<te::Tensor> all_args = cache_node->inputs;
+    for (te::Tensor arg : cache_node->outputs) {
       all_args.push_back(arg);
     }
     // lower the function
@@ -738,7 +738,7 @@ class CompileEngineImpl : public CompileEngineNode {
           spair.first, all_args, cache_node->func_name, key->source_func);
     } else {
       tvm::BuildConfig bcfg = BuildConfig::Create();
-      std::unordered_map<top::Tensor, tir::Buffer> binds;
+      std::unordered_map<te::Tensor, tir::Buffer> binds;
       cache_node->funcs = tvm::lower(spair.first, all_args, cache_node->func_name, binds, bcfg);
     }
     value->cached_func = CachedFunc(cache_node);
@@ -768,12 +768,12 @@ class CompileEngineImpl : public CompileEngineNode {
     cache_node->func_name = GetUniqueName(cache_node->func_name);
     cache_node->target = key->target;
 
-    Array<top::Tensor> all_args = cache_node->inputs;
-    for (top::Tensor arg : cache_node->outputs) {
+    Array<te::Tensor> all_args = cache_node->inputs;
+    for (te::Tensor arg : cache_node->outputs) {
       all_args.push_back(arg);
     }
     tvm::BuildConfig bcfg = BuildConfig::Create();
-    std::unordered_map<top::Tensor, tir::Buffer> binds;
+    std::unordered_map<te::Tensor, tir::Buffer> binds;
     cache_node->funcs = tvm::lower(spair.first, all_args, cache_node->func_name, binds, bcfg);
     value->cached_func = CachedFunc(cache_node);
     return value;
diff --git a/src/relay/backend/compile_engine.h b/src/relay/backend/compile_engine.h
index 4ea4976..42142f1 100644
--- a/src/relay/backend/compile_engine.h
+++ b/src/relay/backend/compile_engine.h
@@ -51,9 +51,9 @@ struct CachedFuncNode : public Object {
   /*! \brief Function name */
   std::string func_name;
   /* \brief The inputs to the function */
-  tvm::Array<top::Tensor> inputs;
+  tvm::Array<te::Tensor> inputs;
   /* \brief The outputs to the function */
-  tvm::Array<top::Tensor> outputs;
+  tvm::Array<te::Tensor> outputs;
   /*! \brief The lowered functions to support the function. */
   tvm::Array<tir::LoweredFunc> funcs;
   /*! \brief Parameter usage states in the shape function. */
diff --git a/src/relay/backend/utils.h b/src/relay/backend/utils.h
index f6cd8e0..4b28121 100644
--- a/src/relay/backend/utils.h
+++ b/src/relay/backend/utils.h
@@ -30,7 +30,7 @@
 #include <tvm/driver/driver.h>
 #include <tvm/target/codegen.h>
 #include <tvm/tir/ir_pass.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 
 #include <typeinfo>
 #include <string>
diff --git a/src/relay/backend/vm/compiler.cc b/src/relay/backend/vm/compiler.cc
index 79245ab..bbb5ac4 100644
--- a/src/relay/backend/vm/compiler.cc
+++ b/src/relay/backend/vm/compiler.cc
@@ -22,7 +22,7 @@
  * \brief A compiler from relay::Module to the VM byte code.
  */
 
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/ir/error.h>
 #include <tvm/relay/expr_functor.h>
 #include <tvm/relay/interpreter.h>
diff --git a/src/relay/op/annotation/annotation.cc b/src/relay/op/annotation/annotation.cc
index 50a55f5..6106b07 100644
--- a/src/relay/op/annotation/annotation.cc
+++ b/src/relay/op/annotation/annotation.cc
@@ -78,8 +78,8 @@ TVM_ADD_FILELINE)
 .set_attr<TOpIsStateful>("TOpIsStateful", false)
 .set_attr<FInferCorrectLayout>("FInferCorrectLayout", ElemwiseArbitraryLayout)
 .set_attr<FTVMCompute>("FTVMCompute",
-                       [](const Attrs& attrs, const Array<top::Tensor>& inputs,
-                          const Type& out_dtype, const Target& target) -> Array<top::Tensor> {
+                       [](const Attrs& attrs, const Array<te::Tensor>& inputs,
+                          const Type& out_dtype, const Target& target) -> Array<te::Tensor> {
                          return {topi::identity(inputs[0])};
                        });
 
@@ -104,8 +104,8 @@ TVM_ADD_FILELINE)
 .set_attr<TOpIsStateful>("TOpIsStateful", false)
 .set_attr<FInferCorrectLayout>("FInferCorrectLayout", ElemwiseArbitraryLayout)
 .set_attr<FTVMCompute>("FTVMCompute",
-                       [](const Attrs& attrs, const Array<top::Tensor>& inputs,
-                          const Type& out_dtype, const Target& target) -> Array<top::Tensor> {
+                       [](const Attrs& attrs, const Array<te::Tensor>& inputs,
+                          const Type& out_dtype, const Target& target) -> Array<te::Tensor> {
                          return {topi::identity(inputs[0])};
                        });
 
@@ -122,8 +122,8 @@ Mark the start of bitpacking.
 .set_attr<FInferCorrectLayout>("FInferCorrectLayout",
                                ElemwiseArbitraryLayout)
 .set_attr<FTVMCompute>("FTVMCompute",
-                       [](const Attrs& attrs, const Array<top::Tensor>& inputs,
-                          const Type& out_dtype, const Target& target) -> Array<top::Tensor> {
+                       [](const Attrs& attrs, const Array<te::Tensor>& inputs,
+                          const Type& out_dtype, const Target& target) -> Array<te::Tensor> {
                          return {topi::identity(inputs[0])};
                        });
 
@@ -139,8 +139,8 @@ Mark the end of bitpacking.
 .set_attr<FInferCorrectLayout>("FInferCorrectLayout",
                                ElemwiseArbitraryLayout)
 .set_attr<FTVMCompute>("FTVMCompute",
-                       [](const Attrs& attrs, const Array<top::Tensor>& inputs,
-                          const Type& out_dtype, const Target& target) -> Array<top::Tensor> {
+                       [](const Attrs& attrs, const Array<te::Tensor>& inputs,
+                          const Type& out_dtype, const Target& target) -> Array<te::Tensor> {
                          return {topi::identity(inputs[0])};
                        });
 
@@ -162,9 +162,9 @@ Mark a checkpoint for checkpointing memory optimization.
 .set_attr<FInferCorrectLayout>("FInferCorrectLayout",
                                ElemwiseArbitraryLayout)
 .set_attr<FTVMCompute>("FTVMCompute",
-                       [](const Attrs& attrs, const Array<top::Tensor>& inputs,
-                          const Type& out_dtype, const Target& target) -> Array<top::Tensor> {
-                         Array<top::Tensor> outputs;
+                       [](const Attrs& attrs, const Array<te::Tensor>& inputs,
+                          const Type& out_dtype, const Target& target) -> Array<te::Tensor> {
+                         Array<te::Tensor> outputs;
                          for (size_t i = 0; i < inputs.size(); ++i) {
                            outputs.push_back(topi::identity(inputs[i]));
                          }
@@ -183,8 +183,8 @@ Beginning of a region that is handled by a given compiler.
 .set_attr<FInferCorrectLayout>("FInferCorrectLayout",
                                ElemwiseArbitraryLayout)
 .set_attr<FTVMCompute>("FTVMCompute",
-                       [](const Attrs& attrs, const Array<top::Tensor>& inputs,
-                          const Type& out_dtype, const Target& target) -> Array<top::Tensor> {
+                       [](const Attrs& attrs, const Array<te::Tensor>& inputs,
+                          const Type& out_dtype, const Target& target) -> Array<te::Tensor> {
                          return {topi::identity(inputs[0])};
                        });
 
@@ -208,8 +208,8 @@ End of a region that is handled by a given compiler.
 .set_attr<FInferCorrectLayout>("FInferCorrectLayout",
                                ElemwiseArbitraryLayout)
 .set_attr<FTVMCompute>("FTVMCompute",
-                       [](const Attrs& attrs, const Array<top::Tensor>& inputs,
-                          const Type& out_dtype, const Target& target) -> Array<top::Tensor> {
+                       [](const Attrs& attrs, const Array<te::Tensor>& inputs,
+                          const Type& out_dtype, const Target& target) -> Array<te::Tensor> {
                          return {topi::identity(inputs[0])};
                        });
 
diff --git a/src/relay/op/debug.cc b/src/relay/op/debug.cc
index 9b4647d..14c0a01 100644
--- a/src/relay/op/debug.cc
+++ b/src/relay/op/debug.cc
@@ -35,11 +35,11 @@ namespace relay {
 
 TVM_REGISTER_NODE_TYPE(DebugAttrs);
 
-Array<top::Tensor> DebugCompute(const Attrs& attrs,
-                           const Array<top::Tensor>& inputs,
+Array<te::Tensor> DebugCompute(const Attrs& attrs,
+                           const Array<te::Tensor>& inputs,
                            const Type& out_type,
                            const Target& target) {
-  return Array<top::Tensor>{ topi::identity(inputs[0]) };
+  return Array<te::Tensor>{ topi::identity(inputs[0]) };
 }
 
 RELAY_REGISTER_OP("debug")
diff --git a/src/relay/op/memory/memory.cc b/src/relay/op/memory/memory.cc
index aa0ba2d..076e3fc 100644
--- a/src/relay/op/memory/memory.cc
+++ b/src/relay/op/memory/memory.cc
@@ -82,8 +82,8 @@ RELAY_REGISTER_OP("memory.alloc_storage")
     .set_attr<TNonComputational>("TNonComputational", true)
     .set_attr<FInferCorrectLayout>("FInferCorrectLayout", ElemwiseArbitraryLayout)
     .set_attr<FTVMCompute>("FTVMCompute",
-                           [](const Attrs& attrs, const Array<top::Tensor>& inputs,
-                              const Type& out_dtype, const Target& target) -> Array<top::Tensor> {
+                           [](const Attrs& attrs, const Array<te::Tensor>& inputs,
+                              const Type& out_dtype, const Target& target) -> Array<te::Tensor> {
                              return {topi::identity(inputs[0])};
                            });
 
@@ -178,8 +178,8 @@ RELAY_REGISTER_OP("memory.alloc_tensor")
     .set_attr<TNonComputational>("TNonComputational", true)
     .set_attr<FInferCorrectLayout>("FInferCorrectLayout", ElemwiseArbitraryLayout)
     .set_attr<FTVMCompute>("FTVMCompute",
-                           [](const Attrs& attrs, const Array<top::Tensor>& inputs,
-                              const Type& out_dtype, const Target& target) -> Array<top::Tensor> {
+                           [](const Attrs& attrs, const Array<te::Tensor>& inputs,
+                              const Type& out_dtype, const Target& target) -> Array<te::Tensor> {
                              return {topi::identity(inputs[0])};
                            });
 
@@ -227,8 +227,8 @@ RELAY_REGISTER_OP("memory.invoke_tvm_op")
     .set_attr<TNonComputational>("TNonComputational", true)
     .set_attr<FInferCorrectLayout>("FInferCorrectLayout", ElemwiseArbitraryLayout)
     .set_attr<FTVMCompute>("FTVMCompute",
-                           [](const Attrs& attrs, const Array<top::Tensor>& inputs,
-                              const Type& out_dtype, const Target& target) -> Array<top::Tensor> {
+                           [](const Attrs& attrs, const Array<te::Tensor>& inputs,
+                              const Type& out_dtype, const Target& target) -> Array<te::Tensor> {
                              return {topi::identity(inputs[0])};
                            });
 
@@ -251,8 +251,8 @@ RELAY_REGISTER_OP("memory.kill")
     .set_attr<TNonComputational>("TNonComputational", true)
     .set_attr<FInferCorrectLayout>("FInferCorrectLayout", ElemwiseArbitraryLayout)
     .set_attr<FTVMCompute>("FTVMCompute",
-                           [](const Attrs& attrs, const Array<top::Tensor>& inputs,
-                              const Type& out_dtype, const Target& target) -> Array<top::Tensor> {
+                           [](const Attrs& attrs, const Array<te::Tensor>& inputs,
+                              const Type& out_dtype, const Target& target) -> Array<te::Tensor> {
                              return {topi::identity(inputs[0])};
                            });
 
@@ -339,8 +339,8 @@ RELAY_REGISTER_OP("memory.shape_func")
     .set_attr<TNonComputational>("TNonComputational", true)
     .set_attr<FInferCorrectLayout>("FInferCorrectLayout", ElemwiseArbitraryLayout)
     .set_attr<FTVMCompute>("FTVMCompute",
-                           [](const Attrs& attrs, const Array<top::Tensor>& inputs,
-                              const Type& out_dtype, const Target& target) -> Array<top::Tensor> {
+                           [](const Attrs& attrs, const Array<te::Tensor>& inputs,
+                              const Type& out_dtype, const Target& target) -> Array<te::Tensor> {
                              return {topi::identity(inputs[0])};
                            });
 
diff --git a/src/relay/op/nn/nn.cc b/src/relay/op/nn/nn.cc
index 1f6ad8f..ee4471a 100644
--- a/src/relay/op/nn/nn.cc
+++ b/src/relay/op/nn/nn.cc
@@ -93,10 +93,10 @@ RELAY_REGISTER_OP("nn.bias_add")
 .add_argument("bias", "1D Tensor", "Bias.")
 .set_support_level(1)
 .add_type_rel("BiasAdd", BiasAddRel)
-.set_attr<FTVMCompute>("FTVMCompute", [](const Attrs& attrs, const Array<top::Tensor>& inputs,
+.set_attr<FTVMCompute>("FTVMCompute", [](const Attrs& attrs, const Array<te::Tensor>& inputs,
                                         const Type& out_type, const Target& target) {
     const auto* param = attrs.as<BiasAddAttrs>();
-    return tvm::Array<tvm::top::Tensor>{topi::nn::bias_add(inputs[0], inputs[1], param->axis)};
+    return tvm::Array<tvm::te::Tensor>{topi::nn::bias_add(inputs[0], inputs[1], param->axis)};
 });
 
 
@@ -233,11 +233,11 @@ RELAY_REGISTER_OP("nn.leaky_relu")
 .set_attr<FInferCorrectLayout>("FInferCorrectLayout", ElemwiseArbitraryLayout)
 .set_attr<FTVMCompute>(
   "FTVMCompute", [](const Attrs& attrs,
-                    const Array<top::Tensor>& inputs,
+                    const Array<te::Tensor>& inputs,
                     const Type& out_type,
                     const Target& target) {
     const auto* param = attrs.as<LeakyReluAttrs>();
-    return Array<top::Tensor>{ topi::leaky_relu(inputs[0], param->alpha) };
+    return Array<te::Tensor>{ topi::leaky_relu(inputs[0], param->alpha) };
 });
 
 
@@ -314,11 +314,11 @@ where :math:`*` is an channelwise multiplication for each sample in the batch.
 .set_attr<FInferCorrectLayout>("FInferCorrectLayout", PReluInferCorrectLayout<PReluAttrs>)
 .set_attr<FTVMCompute>(
   "FTVMCompute", [](const Attrs& attrs,
-                    const Array<top::Tensor>& inputs,
+                    const Array<te::Tensor>& inputs,
                     const Type& out_type,
                     const Target& target) {
     const auto* param = attrs.as<PReluAttrs>();
-    return Array<top::Tensor>{ topi::prelu(inputs[0], inputs[1], param->axis)};
+    return Array<te::Tensor>{ topi::prelu(inputs[0], inputs[1], param->axis)};
 });
 
 
@@ -350,12 +350,12 @@ RELAY_REGISTER_OP("nn.softmax")
 .set_support_level(1)
 .add_type_rel("Identity", IdentityRel)
 .set_attr<FTVMCompute>("FTVMCompute", [](const Attrs& attrs,
-                                         const Array<top::Tensor>& inputs,
+                                         const Array<te::Tensor>& inputs,
                                          const Type& out_type,
                                          const Target& target) {
   const auto* param = attrs.as<SoftmaxAttrs>();
   CHECK(param != nullptr);
-  return Array<top::Tensor>{ topi::nn::softmax(inputs[0], param->axis) };
+  return Array<te::Tensor>{ topi::nn::softmax(inputs[0], param->axis) };
 });
 
 
@@ -384,14 +384,14 @@ RELAY_REGISTER_OP("nn.log_softmax")
 .set_support_level(1)
 .add_type_rel("Identity", IdentityRel)
 .set_attr<FTVMCompute>("FTVMCompute", [](const Attrs& attrs,
-                                         const Array<top::Tensor>& inputs,
+                                         const Array<te::Tensor>& inputs,
                                          const Type& out_type,
                                          const Target& target) {
   const auto* param = attrs.as<SoftmaxAttrs>();
   CHECK(param != nullptr);
   CHECK(param->axis == -1 || param->axis == static_cast<int32_t>(inputs[0].ndim()) - 1)
       << "log_softmax currently only works on last dimension";
-  return Array<top::Tensor>{ topi::nn::log_softmax(inputs[0]) };
+  return Array<te::Tensor>{ topi::nn::log_softmax(inputs[0]) };
 });
 
 
@@ -461,10 +461,10 @@ Example::
 .add_type_rel("BatchFlatten", BatchFlattenRel)
 .set_attr<FTVMCompute>(
   "FTVMCompute", [](const Attrs& attrs,
-                    const Array<top::Tensor>& inputs,
+                    const Array<te::Tensor>& inputs,
                     const Type& out_type,
                     const Target& target) {
-    return Array<top::Tensor>{ topi::nn::flatten(inputs[0]) };
+    return Array<te::Tensor>{ topi::nn::flatten(inputs[0]) };
 });
 
 
@@ -488,10 +488,10 @@ RELAY_REGISTER_OP("nn.relu")
 .add_type_rel("Identity", IdentityRel)
 .set_attr<FInferCorrectLayout>("FInferCorrectLayout", ElemwiseArbitraryLayout)
 .set_attr<FTVMCompute>("FTVMCompute", [](const Attrs& attrs,
-                                         const Array<top::Tensor>& inputs,
+                                         const Array<te::Tensor>& inputs,
                                          const Type& out_type,
                                          const Target& target) {
-  return Array<top::Tensor>{ topi::relu(inputs[0], 0.0f) };
+  return Array<te::Tensor>{ topi::relu(inputs[0], 0.0f) };
 });
 
 
diff --git a/src/relay/op/nn/pad.cc b/src/relay/op/nn/pad.cc
index 1656158..94602ec 100644
--- a/src/relay/op/nn/pad.cc
+++ b/src/relay/op/nn/pad.cc
@@ -160,8 +160,8 @@ bool PadRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> PadCompute(const Attrs& attrs,
-                         const Array<top::Tensor>& inputs,
+Array<te::Tensor> PadCompute(const Attrs& attrs,
+                         const Array<te::Tensor>& inputs,
                          const Type& out_type,
                          const Target& target) {
   const auto* param = attrs.as<PadAttrs>();
@@ -180,7 +180,7 @@ Array<top::Tensor> PadCompute(const Attrs& attrs,
     pad_after.push_back(pad_width[i][1]);
   }
   const auto* out_ttype = out_type.as<TensorTypeNode>();
-  return Array<top::Tensor>{ topi::pad(inputs[0], pad_before, pad_after,
+  return Array<te::Tensor>{ topi::pad(inputs[0], pad_before, pad_after,
                                   tvm::tir::make_const(out_ttype->dtype, param->pad_value),
                                   "T_pad",
                                   topi::kElementWise,
diff --git a/src/relay/op/nn/pooling.cc b/src/relay/op/nn/pooling.cc
index 7b6deff..dfda088 100644
--- a/src/relay/op/nn/pooling.cc
+++ b/src/relay/op/nn/pooling.cc
@@ -166,8 +166,8 @@ bool Pool2DRel(const Array<Type>& types,
 }
 
 template<typename AttrType, topi::nn::PoolType mode>
-Array<top::Tensor> Pool2DCompute(const Attrs& attrs,
-                            const Array<top::Tensor>& inputs,
+Array<te::Tensor> Pool2DCompute(const Attrs& attrs,
+                            const Array<te::Tensor>& inputs,
                             const Type& out_type,
                             const Target& target) {
   static const Layout kNCHW("NCHW");
@@ -203,11 +203,11 @@ Array<top::Tensor> Pool2DCompute(const Attrs& attrs,
   }
   if (mode == topi::nn::kAvgPool) {
     bool count_include_pad = reinterpret_cast<const AvgPool2DAttrs*>(param)->count_include_pad;
-    return Array<top::Tensor>{
+    return Array<te::Tensor>{
       topi::nn::pool(inputs[0], pool_size, strides, padding,
                      mode, ceil_mode, layout.name(), count_include_pad)};
   } else {
-    return Array<top::Tensor>{
+    return Array<te::Tensor>{
       topi::nn::pool(inputs[0], pool_size, strides, padding,
                      mode, ceil_mode, layout.name())};
   }
@@ -333,8 +333,8 @@ bool GlobalPool2DRel(const Array<Type>& types,
 
 
 template<topi::nn::PoolType mode>
-Array<top::Tensor> GlobalPool2DCompute(const Attrs& attrs,
-                                  const Array<top::Tensor>& inputs,
+Array<te::Tensor> GlobalPool2DCompute(const Attrs& attrs,
+                                  const Array<te::Tensor>& inputs,
                                   const Type& out_type,
                                   const Target& target) {
   static const Layout kNCHW("NCHW");
@@ -351,7 +351,7 @@ Array<top::Tensor> GlobalPool2DCompute(const Attrs& attrs,
   CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U)
     << "Pool2D only support 4-D input (e.g., NCHW)"
     << " or 5-D input (last dimension is a split of channel)";
-  return Array<top::Tensor>{
+  return Array<te::Tensor>{
     topi::nn::global_pool(inputs[0], mode, layout.name()) };
 }
 
@@ -467,8 +467,8 @@ bool AdaptivePool2DRel(const Array<Type>& types,
 }
 
 template<topi::nn::PoolType mode>
-Array<top::Tensor> AdaptivePool2DCompute(const Attrs& attrs,
-                                    const Array<top::Tensor>& inputs,
+Array<te::Tensor> AdaptivePool2DCompute(const Attrs& attrs,
+                                    const Array<te::Tensor>& inputs,
                                     const Type& out_type,
                                     const Target& target) {
   static const Layout kNCHW("NCHW");
@@ -500,7 +500,7 @@ Array<top::Tensor> AdaptivePool2DCompute(const Attrs& attrs,
     output_height = output_size[0];
     output_width = output_size[1];
   }
-  return Array<top::Tensor>{
+  return Array<te::Tensor>{
     topi::nn::adaptive_pool(inputs[0], Array<IndexExpr>{ output_height, output_width },
                             mode, layout.name()) };
 }
@@ -596,7 +596,7 @@ bool Pool2DGradRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
 }
 
 template <typename AttrType, topi::nn::PoolType mode>
-Array<top::Tensor> Pool2DGradCompute(const Attrs& attrs, const Array<top::Tensor>& inputs,
+Array<te::Tensor> Pool2DGradCompute(const Attrs& attrs, const Array<te::Tensor>& inputs,
                                 const Type& out_type, const Target& target) {
   static const Layout kNCHW("NCHW");
   const auto* param = attrs.as<AttrType>();
@@ -633,10 +633,10 @@ Array<top::Tensor> Pool2DGradCompute(const Attrs& attrs, const Array<top::Tensor
   }
   if (mode == topi::nn::kAvgPool) {
     bool count_include_pad = reinterpret_cast<const AvgPool2DAttrs*>(param)->count_include_pad;
-    return Array<top::Tensor>{topi::nn::pool_grad(inputs[0], inputs[1], pool_size, strides, padding,
+    return Array<te::Tensor>{topi::nn::pool_grad(inputs[0], inputs[1], pool_size, strides, padding,
         mode, ceil_mode, layout.name(), count_include_pad)};
   } else {
-    return Array<top::Tensor>{topi::nn::pool_grad(inputs[0], inputs[1], pool_size, strides, padding,
+    return Array<te::Tensor>{topi::nn::pool_grad(inputs[0], inputs[1], pool_size, strides, padding,
         mode, ceil_mode, layout.name())};
   }
 }
@@ -798,8 +798,8 @@ bool Pool1DRel(const Array<Type>& types,
 
 
 template<typename AttrType, topi::nn::PoolType mode>
-Array<top::Tensor> Pool1DCompute(const Attrs& attrs,
-                            const Array<top::Tensor>& inputs,
+Array<te::Tensor> Pool1DCompute(const Attrs& attrs,
+                            const Array<te::Tensor>& inputs,
                             const Type& out_type,
                             const Target& target) {
   static const Layout kNCW("NCW");
@@ -825,11 +825,11 @@ Array<top::Tensor> Pool1DCompute(const Attrs& attrs,
 
   if (mode == topi::nn::kAvgPool) {
     bool count_include_pad = reinterpret_cast<const AvgPool1DAttrs*>(param)->count_include_pad;
-    return Array<top::Tensor>{
+    return Array<te::Tensor>{
       topi::nn::pool1d(inputs[0], pool_size, strides, padding,
                        mode, ceil_mode, layout.name(), count_include_pad)};
   } else {
-    return Array<top::Tensor>{
+    return Array<te::Tensor>{
       topi::nn::pool1d(inputs[0], pool_size, strides, padding,
                        mode, ceil_mode, layout.name())};
   }
@@ -993,8 +993,8 @@ bool Pool3DRel(const Array<Type>& types,
 
 
 template<typename AttrType, topi::nn::PoolType mode>
-Array<top::Tensor> Pool3DCompute(const Attrs& attrs,
-                            const Array<top::Tensor>& inputs,
+Array<te::Tensor> Pool3DCompute(const Attrs& attrs,
+                            const Array<te::Tensor>& inputs,
                             const Type& out_type,
                             const Target& target) {
   static const Layout kNCDHW("NCDHW");
@@ -1033,11 +1033,11 @@ Array<top::Tensor> Pool3DCompute(const Attrs& attrs,
   }
   if (mode == topi::nn::kAvgPool) {
     bool count_include_pad = reinterpret_cast<const AvgPool3DAttrs*>(param)->count_include_pad;
-    return Array<top::Tensor>{
+    return Array<te::Tensor>{
       topi::nn::pool3d(inputs[0], pool_size, strides, padding,
                        mode, ceil_mode, layout.name(), count_include_pad)};
   } else {
-    return Array<top::Tensor>{
+    return Array<te::Tensor>{
       topi::nn::pool3d(inputs[0], pool_size, strides, padding,
                        mode, ceil_mode, layout.name())};
   }
diff --git a/src/relay/op/tensor/binary.cc b/src/relay/op/tensor/binary.cc
index 00ebddb..6561dd1 100644
--- a/src/relay/op/tensor/binary.cc
+++ b/src/relay/op/tensor/binary.cc
@@ -32,9 +32,9 @@ namespace relay {
 
 #define RELAY_BINARY_COMPUTE(FTOPI)                        \
   [] (const Attrs& attrs,                                  \
-      const Array<top::Tensor>& inputs,                         \
+      const Array<te::Tensor>& inputs,                         \
       const Type& out_type,                                \
-      const Target& target) -> Array<top::Tensor> {             \
+      const Target& target) -> Array<te::Tensor> {             \
     CHECK_EQ(inputs.size(), 2U);                           \
     return {FTOPI(inputs[0], inputs[1])};                  \
   }                                                        \
diff --git a/src/relay/op/tensor/reduce.cc b/src/relay/op/tensor/reduce.cc
index 880a337..acbde0d 100644
--- a/src/relay/op/tensor/reduce.cc
+++ b/src/relay/op/tensor/reduce.cc
@@ -173,8 +173,8 @@ Array<Array<Layout>> ReduceInferCorrectLayout(const Attrs& attrs,
 }
 
 template<typename F>
-Array<top::Tensor> ReduceCompute(const Attrs& attrs,
-                            const Array<top::Tensor>& inputs,
+Array<te::Tensor> ReduceCompute(const Attrs& attrs,
+                            const Array<te::Tensor>& inputs,
                             const Type& out_type,
                             const Target& target,
                             F f) {
@@ -320,8 +320,8 @@ bool ReduceRel(const Array<Type>& types,
   .add_argument("data", "Tensor", "The input tensor.")
 
 
-Array<top::Tensor> ArgMaxCompute(const Attrs& attrs,
-                            const Array<top::Tensor>& inputs,
+Array<te::Tensor> ArgMaxCompute(const Attrs& attrs,
+                            const Array<te::Tensor>& inputs,
                             const Type& out_type,
                             const Target& target) {
   return ReduceCompute(attrs, inputs, out_type, target, topi::argmax);
@@ -340,8 +340,8 @@ values over a given axis.
 .set_attr<TOpPattern>("TOpPattern", kCommReduce);
 
 
-Array<top::Tensor> ArgMinCompute(const Attrs& attrs,
-                            const Array<top::Tensor>& inputs,
+Array<te::Tensor> ArgMinCompute(const Attrs& attrs,
+                            const Array<te::Tensor>& inputs,
                             const Type& out_type,
                             const Target& target) {
   return ReduceCompute(attrs, inputs, out_type, target, topi::argmin);
@@ -358,8 +358,8 @@ values over a given axis.
 .set_attr<FTVMCompute>("FTVMCompute", ArgMinCompute)
 .set_attr<TOpPattern>("TOpPattern", kCommReduce);
 
-Array<top::Tensor> SumCompute(const Attrs& attrs,
-                         const Array<top::Tensor>& inputs,
+Array<te::Tensor> SumCompute(const Attrs& attrs,
+                         const Array<te::Tensor>& inputs,
                          const Type& out_type,
                          const Target& target) {
   return ReduceCompute(attrs, inputs, out_type, target, topi::sum);
@@ -392,8 +392,8 @@ Example::
 .set_attr<TOpPattern>("TOpPattern", kCommReduce);
 
 
-Array<top::Tensor> AllCompute(const Attrs& attrs,
-                         const Array<top::Tensor>& inputs,
+Array<te::Tensor> AllCompute(const Attrs& attrs,
+                         const Array<te::Tensor>& inputs,
                          const Type& out_type,
                          const Target& target) {
   return ReduceCompute(attrs, inputs, out_type, target, topi::all);
@@ -429,8 +429,8 @@ Example::
 .set_attr<TOpPattern>("TOpPattern", kCommReduce);
 
 
-Array<top::Tensor> AnyCompute(const Attrs& attrs,
-                         const Array<top::Tensor>& inputs,
+Array<te::Tensor> AnyCompute(const Attrs& attrs,
+                         const Array<te::Tensor>& inputs,
                          const Type& out_type,
                          const Target& target) {
   return ReduceCompute(attrs, inputs, out_type, target, topi::any);
@@ -466,8 +466,8 @@ Example::
 .set_attr<TOpPattern>("TOpPattern", kCommReduce);
 
 
-Array<top::Tensor> MaxCompute(const Attrs& attrs,
-                         const Array<top::Tensor>& inputs,
+Array<te::Tensor> MaxCompute(const Attrs& attrs,
+                         const Array<te::Tensor>& inputs,
                          const Type& out_type,
                          const Target& target) {
   return ReduceCompute(attrs, inputs, out_type, target, topi::max);
@@ -484,8 +484,8 @@ RELAY_REGISTER_REDUCE_OP("max")
 .set_attr<TOpPattern>("TOpPattern", kCommReduce);
 
 
-Array<top::Tensor> MinCompute(const Attrs& attrs,
-                         const Array<top::Tensor>& inputs,
+Array<te::Tensor> MinCompute(const Attrs& attrs,
+                         const Array<te::Tensor>& inputs,
                          const Type& out_type,
                          const Target& target) {
   return ReduceCompute(attrs, inputs, out_type, target, topi::min);
@@ -503,8 +503,8 @@ RELAY_REGISTER_REDUCE_OP("min")
 .set_attr<TOpPattern>("TOpPattern", kCommReduce);
 
 
-Array<top::Tensor> ProdCompute(const Attrs& attrs,
-                          const Array<top::Tensor>& inputs,
+Array<te::Tensor> ProdCompute(const Attrs& attrs,
+                          const Array<te::Tensor>& inputs,
                           const Type& out_type,
                           const Target& target) {
   return ReduceCompute(attrs, inputs, out_type, target, topi::prod);
@@ -533,8 +533,8 @@ Example::
 .set_attr<TOpPattern>("TOpPattern", kCommReduce);
 
 
-Array<top::Tensor> MeanCompute(const Attrs& attrs,
-                          const Array<top::Tensor>& inputs,
+Array<te::Tensor> MeanCompute(const Attrs& attrs,
+                          const Array<te::Tensor>& inputs,
                           const Type& out_type,
                           const Target& target) {
   IndexExpr count = tir::make_const(inputs[0]->dtype, 1);
@@ -598,8 +598,8 @@ bool VarianceRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> VarianceCompute(const Attrs& attrs,
-                              const Array<top::Tensor>& inputs,
+Array<te::Tensor> VarianceCompute(const Attrs& attrs,
+                              const Array<te::Tensor>& inputs,
                               const Type& out_type,
                               const Target& target) {
   IndexExpr count = tir::make_const(inputs[0]->dtype, 1);
diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
index b958755..9c4138c 100644
--- a/src/relay/op/tensor/transform.cc
+++ b/src/relay/op/tensor/transform.cc
@@ -65,8 +65,8 @@ bool CastRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> CastCompute(const Attrs& attrs,
-                          const Array<top::Tensor>& inputs,
+Array<te::Tensor> CastCompute(const Attrs& attrs,
+                          const Array<te::Tensor>& inputs,
                           const Type& out_type,
                           const Target& target) {
   const CastAttrs *param = attrs.as<CastAttrs>();
@@ -125,8 +125,8 @@ bool CastLikeRel(const Array<Type>& types,
 }
 
 
-Array<top::Tensor> CastLikeCompute(const Attrs& attrs,
-                              const Array<top::Tensor>& inputs,
+Array<te::Tensor> CastLikeCompute(const Attrs& attrs,
+                              const Array<te::Tensor>& inputs,
                               const Type& out_type,
                               const Target& target) {
   return { topi::cast(inputs[0], inputs[1]->dtype) };
@@ -156,7 +156,7 @@ RELAY_REGISTER_OP("cast_like")
 .set_attr<FInferCorrectLayout>("FInferCorrectLayout", ElemwiseArbitraryLayout);
 
 
-Array<top::Tensor> ReinterpretCompute(const Attrs& attrs, const Array<top::Tensor>& inputs,
+Array<te::Tensor> ReinterpretCompute(const Attrs& attrs, const Array<te::Tensor>& inputs,
                                  const Type& out_type, const Target& target) {
   const CastAttrs* param = attrs.as<CastAttrs>();
   CHECK(param != nullptr);
@@ -230,8 +230,8 @@ bool ExpandDimsRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> ExpandDimsCompute(const Attrs& attrs,
-                                const Array<top::Tensor>& inputs,
+Array<te::Tensor> ExpandDimsCompute(const Attrs& attrs,
+                                const Array<te::Tensor>& inputs,
                                 const Type& out_type,
                                 const Target& target) {
   const ExpandDimsAttrs *param = attrs.as<ExpandDimsAttrs>();
@@ -269,8 +269,8 @@ RELAY_REGISTER_OP("expand_dims")
 // relay.concatenate
 TVM_REGISTER_NODE_TYPE(ConcatenateAttrs);
 
-Array<top::Tensor> ConcatenateCompute(const Attrs& attrs,
-                          const Array<top::Tensor>& inputs,
+Array<te::Tensor> ConcatenateCompute(const Attrs& attrs,
+                          const Array<te::Tensor>& inputs,
                           const Type& out_type,
                           const Target& target) {
   const ConcatenateAttrs *param = attrs.as<ConcatenateAttrs>();
@@ -412,8 +412,8 @@ bool StackRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> StackCompute(const Attrs& attrs,
-                           const Array<top::Tensor>& inputs,
+Array<te::Tensor> StackCompute(const Attrs& attrs,
+                           const Array<te::Tensor>& inputs,
                            const Type& out_type,
                            const Target& target) {
   const StackAttrs *param = attrs.as<StackAttrs>();
@@ -504,13 +504,13 @@ bool TransposeRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> TransposeCompute(const Attrs& attrs,
-                               const Array<top::Tensor>& inputs,
+Array<te::Tensor> TransposeCompute(const Attrs& attrs,
+                               const Array<te::Tensor>& inputs,
                                const Type& out_type,
                                const Target& target) {
   const auto* param = attrs.as<TransposeAttrs>();
   CHECK(param != nullptr);
-  return Array<top::Tensor>{ topi::transpose(inputs[0], param->axes) };
+  return Array<te::Tensor>{ topi::transpose(inputs[0], param->axes) };
 }
 
 Expr MakeTranspose(Expr data,
@@ -687,8 +687,8 @@ bool ReshapeRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> ReshapeCompute(const Attrs& attrs,
-                             const Array<top::Tensor>& inputs,
+Array<te::Tensor> ReshapeCompute(const Attrs& attrs,
+                             const Array<te::Tensor>& inputs,
                              const Type& out_type,
                              const Target& target) {
   const auto* out_ttype = out_type.as<TensorTypeNode>();
@@ -922,16 +922,16 @@ bool TakeRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> TakeCompute(const Attrs& attrs,
-                          const Array<top::Tensor>& inputs,
+Array<te::Tensor> TakeCompute(const Attrs& attrs,
+                          const Array<te::Tensor>& inputs,
                           const Type& out_type,
                           const Target& target) {
   const auto* param = attrs.as<TakeAttrs>();
   CHECK(param != nullptr);
   if (!param->axis.defined()) {
-    return Array<top::Tensor>{ topi::take(inputs[0], inputs[1], param->mode) };
+    return Array<te::Tensor>{ topi::take(inputs[0], inputs[1], param->mode) };
   } else {
-    return Array<top::Tensor>{ topi::take(inputs[0], inputs[1], param->axis, param->mode) };
+    return Array<te::Tensor>{ topi::take(inputs[0], inputs[1], param->axis, param->mode) };
   }
 }
 
@@ -1009,8 +1009,8 @@ bool FullRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> FullCompute(const Attrs& attrs,
-                          const Array<top::Tensor>& inputs,
+Array<te::Tensor> FullCompute(const Attrs& attrs,
+                          const Array<te::Tensor>& inputs,
                           const Type& out_type,
                           const Target& target) {
   const auto* out_ttype = out_type.as<TensorTypeNode>();
@@ -1117,8 +1117,8 @@ bool FullLikeRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> FullLikeCompute(const Attrs& attrs,
-                              const Array<top::Tensor>& inputs,
+Array<te::Tensor> FullLikeCompute(const Attrs& attrs,
+                              const Array<te::Tensor>& inputs,
                               const Type& out_type,
                               const Target& target) {
   return { topi::full_like(inputs[0], inputs[1]()) };
@@ -1217,26 +1217,26 @@ bool ArangeRel(const Array<Type>& types,
   }
 }
 
-inline top::Tensor DynamicArange(const top::Tensor& start,
-                                 const top::Tensor& stop,
-                                 const top::Tensor& step,
+inline te::Tensor DynamicArange(const te::Tensor& start,
+                                 const te::Tensor& stop,
+                                 const te::Tensor& step,
                                  tvm::DataType dtype,
                                  std::string name = "tensor",
                                  std::string tag = topi::kInjective) {
   tvm::PrimExpr num_elem = tvm::tir::Var("num_elem");
-  return top::compute({num_elem}, [&](const Array<tvm::tir::Var>& indices) {
+  return te::compute({num_elem}, [&](const Array<tvm::tir::Var>& indices) {
     return tvm::cast(dtype, start[0] + step[0] * indices[0]);
   }, name, tag);
 }
 
-Array<top::Tensor> ArangeCompute(const Attrs& attrs,
-                                 const Array<top::Tensor>& inputs,
+Array<te::Tensor> ArangeCompute(const Attrs& attrs,
+                                 const Array<te::Tensor>& inputs,
                                  const Type& out_type,
                                  const Target& target) {
   const ArangeAttrs* param = attrs.as<ArangeAttrs>();
-  top::Tensor start = inputs[0];
-  top::Tensor stop =  inputs[1];
-  top::Tensor step = inputs[2];
+  te::Tensor start = inputs[0];
+  te::Tensor stop =  inputs[1];
+  te::Tensor step = inputs[2];
   Array<tvm::PrimExpr> empty = {0};
   return { DynamicArange(start, stop, step, param->dtype) };
 }
@@ -1324,8 +1324,8 @@ bool RepeatRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> RepeatCompute(const Attrs& attrs,
-                            const Array<top::Tensor>& inputs,
+Array<te::Tensor> RepeatCompute(const Attrs& attrs,
+                            const Array<te::Tensor>& inputs,
                             const Type& out_type,
                             const Target& target) {
   const RepeatAttrs *param = attrs.as<RepeatAttrs>();
@@ -1435,8 +1435,8 @@ bool TileRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> TileCompute(const Attrs& attrs,
-                          const Array<top::Tensor>& inputs,
+Array<te::Tensor> TileCompute(const Attrs& attrs,
+                          const Array<te::Tensor>& inputs,
                           const Type& out_type,
                           const Target& target) {
   const TileAttrs *param = attrs.as<TileAttrs>();
@@ -1496,8 +1496,8 @@ bool ReverseRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> ReverseCompute(const Attrs& attrs,
-                             const Array<top::Tensor>& inputs,
+Array<te::Tensor> ReverseCompute(const Attrs& attrs,
+                             const Array<te::Tensor>& inputs,
                              const Type& out_type,
                              const Target& target) {
   const ReverseAttrs *param = attrs.as<ReverseAttrs>();
@@ -1570,8 +1570,8 @@ Expr MakeWhere(const Expr& condition, const Expr& x, const Expr& y) {
   return CallNode::make(op, {condition, x, y});
 }
 
-Array<top::Tensor> WhereCompute(const Attrs& attrs,
-                           const Array<top::Tensor>& inputs,
+Array<te::Tensor> WhereCompute(const Attrs& attrs,
+                           const Array<te::Tensor>& inputs,
                            const Type& out_type,
                            const Target& target) {
   return { topi::where(inputs[0], inputs[1], inputs[2]) };
@@ -1687,8 +1687,8 @@ bool SqueezeRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> SqueezeCompute(const Attrs& attrs,
-                             const Array<top::Tensor>& inputs,
+Array<te::Tensor> SqueezeCompute(const Attrs& attrs,
+                             const Array<te::Tensor>& inputs,
                              const Type& out_type,
                              const Target& target) {
   const SqueezeAttrs *param = attrs.as<SqueezeAttrs>();
@@ -1728,8 +1728,8 @@ Expr MakeCollapseSumLike(Expr data,
   return CallNode::make(op, {data, collapse_type}, Attrs(), {});
 }
 
-Array<top::Tensor> CollapseSumLikeCompute(const Attrs& attrs,
-                                     const Array<top::Tensor>& inputs,
+Array<te::Tensor> CollapseSumLikeCompute(const Attrs& attrs,
+                                     const Array<te::Tensor>& inputs,
                                      const Type& out_type,
                                      const Target& target) {
   const auto* out_ttype = out_type.as<TensorTypeNode>();
@@ -1773,8 +1773,8 @@ Expr MakeBroadCastTo(Expr data, Array<IndexExpr> shape) {
   return CallNode::make(op, {data}, Attrs(attrs), {});
 }
 
-Array<top::Tensor> BroadCastToCompute(const Attrs& attrs,
-                                 const Array<top::Tensor>& inputs,
+Array<te::Tensor> BroadCastToCompute(const Attrs& attrs,
+                                 const Array<te::Tensor>& inputs,
                                  const Type& out_type,
                                  const Target& target) {
   auto ioattrs = attrs.as<InitOpAttrs>();
@@ -1811,8 +1811,8 @@ Expr MakeBroadCastToLike(Expr data,
   return CallNode::make(op, {data, broadcast_type}, Attrs(), {});
 }
 
-Array<top::Tensor> BroadCastToLikeCompute(const Attrs& attrs,
-                                     const Array<top::Tensor>& inputs,
+Array<te::Tensor> BroadCastToLikeCompute(const Attrs& attrs,
+                                     const Array<te::Tensor>& inputs,
                                      const Type& out_type,
                                      const Target& target) {
   const auto* out_ttype = out_type.as<TensorTypeNode>();
@@ -2018,13 +2018,13 @@ Expr MakeStridedSlice(Expr data,
   return CallNode::make(op, {data}, Attrs(attrs), {});
 }
 
-Array<top::Tensor> StridedSliceCompute(const Attrs& attrs,
-                                  const Array<top::Tensor>& inputs,
+Array<te::Tensor> StridedSliceCompute(const Attrs& attrs,
+                                  const Array<te::Tensor>& inputs,
                                   const Type& out_type,
                                   const Target& target) {
   const StridedSliceAttrs *param = attrs.as<StridedSliceAttrs>();
   CHECK(param != nullptr);
-  return Array<top::Tensor>{
+  return Array<te::Tensor>{
     topi::strided_slice(inputs[0], param->begin, param->end, param->strides)
   };
 }
@@ -2175,8 +2175,8 @@ bool SplitRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> SplitCompute(const Attrs& attrs,
-                           const Array<top::Tensor>& inputs,
+Array<te::Tensor> SplitCompute(const Attrs& attrs,
+                           const Array<te::Tensor>& inputs,
                            const Type& out_type,
                            const Target& target) {
   const auto param = attrs.as<SplitAttrs>();
@@ -2184,11 +2184,11 @@ Array<top::Tensor> SplitCompute(const Attrs& attrs,
 
   if (const IntImmNode* sections = param->indices_or_sections.as<IntImmNode>()) {
     int64_t num_sections = sections->value;
-    return Array<top::Tensor>{
+    return Array<te::Tensor>{
       topi::split_sections(inputs[0], num_sections, param->axis) };
   } else {
     auto indices = Downcast<Array<Integer> >(param->indices_or_sections);
-    return Array<top::Tensor>{ topi::split(inputs[0], indices, param->axis) };
+    return Array<te::Tensor>{ topi::split(inputs[0], indices, param->axis) };
   }
 }
 
@@ -2304,8 +2304,8 @@ Expr MakeSliceLike(Expr data,
   return CallNode::make(op, {data, shape_like}, Attrs(attrs), {});
 }
 
-Array<top::Tensor> SliceLikeCompute(const Attrs& attrs,
-                               const Array<top::Tensor>& inputs,
+Array<te::Tensor> SliceLikeCompute(const Attrs& attrs,
+                               const Array<te::Tensor>& inputs,
                                const Type& out_type,
                                const Target& target) {
   const auto* param = attrs.as<SliceLikeAttrs>();
@@ -2342,7 +2342,7 @@ Array<top::Tensor> SliceLikeCompute(const Attrs& attrs,
         << topi::GetConstInt(src_shape[axis]);
     }
   }
-  return Array<top::Tensor>{
+  return Array<te::Tensor>{
     topi::strided_slice(inputs[0],
                         GetIntArray(begin_idx),
                         GetIntArray(end_idx),
@@ -2370,13 +2370,13 @@ RELAY_REGISTER_OP("slice_like")
 // relay.layout_transform
 TVM_REGISTER_NODE_TYPE(LayoutTransformAttrs);
 
-Array<top::Tensor> LayoutTransformCompute(const Attrs& attrs,
-                                     const Array<top::Tensor>& inputs,
+Array<te::Tensor> LayoutTransformCompute(const Attrs& attrs,
+                                     const Array<te::Tensor>& inputs,
                                      const Type& out_type,
                                      const Target& target) {
   const auto* param = attrs.as<LayoutTransformAttrs>();
   CHECK(param != nullptr);
-  return Array<top::Tensor>{
+  return Array<te::Tensor>{
     topi::layout_transform(inputs[0], param->src_layout, param->dst_layout)
   };
 }
@@ -2503,8 +2503,8 @@ bool GatherNDRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> GatherNDCompute(const Attrs& attrs,
-                              const Array<top::Tensor>& inputs,
+Array<te::Tensor> GatherNDCompute(const Attrs& attrs,
+                              const Array<te::Tensor>& inputs,
                               const Type& out_type,
                               const Target& target) {
   return { topi::gather_nd(inputs[0], inputs[1]) };
@@ -2557,13 +2557,13 @@ bool SequenceMaskRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> SequenceMaskCompute(const Attrs& attrs,
-                                  const Array<top::Tensor>& inputs,
+Array<te::Tensor> SequenceMaskCompute(const Attrs& attrs,
+                                  const Array<te::Tensor>& inputs,
                                   const Type& out_type,
                                   const Target& target) {
   const auto* param = attrs.as<SequenceMaskAttrs>();
   CHECK(param != nullptr);
-  return Array<top::Tensor>{
+  return Array<te::Tensor>{
     topi::sequence_mask(inputs[0], inputs[1], param->mask_value, param->axis) };
 }
 
@@ -2670,13 +2670,13 @@ bool OneHotRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> OneHotCompute(const Attrs& attrs,
-                            const Array<top::Tensor>& inputs,
+Array<te::Tensor> OneHotCompute(const Attrs& attrs,
+                            const Array<te::Tensor>& inputs,
                             const Type& out_type,
                             const Target& target) {
   const auto* param = attrs.as<OneHotAttrs>();
   CHECK(param != nullptr);
-  return Array<top::Tensor> {
+  return Array<te::Tensor> {
     topi::one_hot(inputs[0],
                   inputs[1](),
                   inputs[2](),
diff --git a/src/relay/op/tensor/unary.cc b/src/relay/op/tensor/unary.cc
index 98ff099..d85d316 100644
--- a/src/relay/op/tensor/unary.cc
+++ b/src/relay/op/tensor/unary.cc
@@ -34,9 +34,9 @@ namespace relay {
 
 #define RELAY_UNARY_COMPUTE(FTOPI)                      \
   [] (const Attrs& attrs,                               \
-      const Array<top::Tensor>& inputs,                      \
+      const Array<te::Tensor>& inputs,                      \
       const Type& out_type,                             \
-      const Target& target) -> Array<top::Tensor> {          \
+      const Target& target) -> Array<te::Tensor> {          \
     return {FTOPI(inputs[0])};                          \
   }                                                     \
 
@@ -290,8 +290,8 @@ bool ShapeOfRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> ShapeOfCompute(const Attrs& attrs,
-                             const Array<top::Tensor>& inputs,
+Array<te::Tensor> ShapeOfCompute(const Attrs& attrs,
+                             const Array<te::Tensor>& inputs,
                              const Type& out_type,
                              const Target& target) {
   CHECK_EQ(inputs.size(), 1);
@@ -341,14 +341,14 @@ bool NdarraySizeRel(const Array<Type>& types,
   return true;
 }
 
-Array<top::Tensor> NdarraySizeCompute(const Attrs& attrs,
-                          const Array<top::Tensor>& inputs,
+Array<te::Tensor> NdarraySizeCompute(const Attrs& attrs,
+                          const Array<te::Tensor>& inputs,
                           const Type& out_type,
                           const Target& target) {
   CHECK_EQ(inputs.size(), 1);
   const auto* param = attrs.as<NdarraySizeAttrs>();
   CHECK(param != nullptr);
-  return Array<top::Tensor>{topi::ndarray_size(inputs[0], param->dtype)};
+  return Array<te::Tensor>{topi::ndarray_size(inputs[0], param->dtype)};
 }
 
 TVM_REGISTER_GLOBAL("relay.op.contrib._make.ndarray_size")
diff --git a/src/relay/op/vision/yolo.cc b/src/relay/op/vision/yolo.cc
index 5a59a74..9c4a285 100644
--- a/src/relay/op/vision/yolo.cc
+++ b/src/relay/op/vision/yolo.cc
@@ -82,12 +82,12 @@ Its function is mostly shape transform.")doc" TVM_ADD_FILELINE)
 .set_attrs_type<YoloReorgAttrs>()
 .add_type_rel("YoloReorg", YoloReorgRel)
 .set_attr<FTVMCompute>("FTVMCompute", [](const Attrs& attrs,
-                                         const Array<top::Tensor>& inputs,
+                                         const Array<te::Tensor>& inputs,
                                          const Type& out_type,
                                          const Target& target) {
   const auto* params = attrs.as<YoloReorgAttrs>();
   CHECK(params != nullptr);
-  return Array<top::Tensor>{ topi::vision::reorg(inputs[0], params->stride) };
+  return Array<te::Tensor>{ topi::vision::reorg(inputs[0], params->stride) };
 });
 
 }  // namespace relay
diff --git a/src/relay/pass/alter_op_layout.cc b/src/relay/pass/alter_op_layout.cc
index 8e819bc..0cc3ff0 100644
--- a/src/relay/pass/alter_op_layout.cc
+++ b/src/relay/pass/alter_op_layout.cc
@@ -28,7 +28,7 @@
 #include <tvm/relay/op_attr_types.h>
 #include <tvm/relay/attrs/transform.h>
 #include <tvm/relay/transform.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tuple>
 #include <vector>
 #include <functional>
@@ -78,10 +78,10 @@ class AlterTransformMemorizer : public TransformMemorizer {
     Expr new_e;
     bool modified = false;
     if (falter_layout.count(op)) {
-      tvm::Array<tvm::top::Tensor> tinfos;
+      tvm::Array<tvm::te::Tensor> tinfos;
       for (auto expr : ref_call->args) {
         auto ttype = expr->type_as<TensorTypeNode>();
-        tinfos.push_back(tvm::top::placeholder(ttype->shape, ttype->dtype));
+        tinfos.push_back(tvm::te::placeholder(ttype->shape, ttype->dtype));
       }
       Expr altered_value = falter_layout[op](ref_call->attrs, new_args, tinfos);
       if (altered_value.defined()) {
diff --git a/src/relay/pass/convert_layout.cc b/src/relay/pass/convert_layout.cc
index 5a90651..be44db0 100644
--- a/src/relay/pass/convert_layout.cc
+++ b/src/relay/pass/convert_layout.cc
@@ -28,7 +28,7 @@
 #include <tvm/relay/op_attr_types.h>
 #include <tvm/relay/attrs/transform.h>
 #include <tvm/relay/transform.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tuple>
 #include <vector>
 #include <functional>
@@ -86,10 +86,10 @@ class ConvertTransformMemorizer : public TransformMemorizer {
     Expr new_e;
     bool modified = false;
     if (fconvert_layout.count(op)) {
-      tvm::Array<tvm::top::Tensor> tinfos;
+      tvm::Array<tvm::te::Tensor> tinfos;
       for (auto expr : ref_call->args) {
         auto ttype = expr->type_as<TensorTypeNode>();
-        tinfos.push_back(tvm::top::placeholder(ttype->shape, ttype->dtype));
+        tinfos.push_back(tvm::te::placeholder(ttype->shape, ttype->dtype));
       }
       Expr altered_value =
           fconvert_layout[op](ref_call->attrs, new_args, tinfos, operator->()->desired_layout_);
diff --git a/src/relay/pass/gradient.cc b/src/relay/pass/gradient.cc
index 7d94d4e..9a03fde 100644
--- a/src/relay/pass/gradient.cc
+++ b/src/relay/pass/gradient.cc
@@ -23,7 +23,7 @@
  */
 #include <tvm/ir/type_functor.h>
 #include <tvm/tir/lowered_func.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/relay/expr_functor.h>
 #include <tvm/relay/analysis.h>
 #include <tvm/relay/transform.h>
diff --git a/src/relay/pass/legalize.cc b/src/relay/pass/legalize.cc
index 4480861..250dd69 100644
--- a/src/relay/pass/legalize.cc
+++ b/src/relay/pass/legalize.cc
@@ -23,7 +23,7 @@
  * shape, dtype or layout to another op or a sequence of ops.
  */
 
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/relay/expr_functor.h>
 #include <tvm/relay/op_attr_types.h>
 #include <tvm/relay/transform.h>
diff --git a/src/top/operation/compute_op.cc b/src/te/operation/compute_op.cc
similarity index 98%
rename from src/top/operation/compute_op.cc
rename to src/te/operation/compute_op.cc
index 598a0f7..1886d97 100644
--- a/src/top/operation/compute_op.cc
+++ b/src/te/operation/compute_op.cc
@@ -21,7 +21,7 @@
  * \brief Compute Op.
  * \file compute_op.cc
  */
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/arith/analyzer.h>
 #include <tvm/tir/expr.h>
 #include <tvm/tir/ir_pass.h>
@@ -36,7 +36,7 @@
 #include "../../arith/interval_set.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 using namespace tir;
 
 TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
@@ -184,7 +184,7 @@ Operation ComputeOpNode::ReplaceInputs(
   if (this->body[0]->IsInstance<tir::ReduceNode>()) {
     // Specially handle reduce so the replaced op
     // still share all the components
-    PrimExpr new_reduce = top::ReplaceTensor(this->body[0], rmap);
+    PrimExpr new_reduce = te::ReplaceTensor(this->body[0], rmap);
     if (!new_reduce.same_as(this->body[0])) {
       const tir::ReduceNode* r = new_reduce.as<tir::ReduceNode>();
       for (size_t k = 0; k < this->body.size(); ++k) {
@@ -198,7 +198,7 @@ Operation ComputeOpNode::ReplaceInputs(
     }
   } else {
     arr = UpdateArray(this->body, [&rmap] (const PrimExpr& e) {
-        return top::ReplaceTensor(e, rmap);
+        return te::ReplaceTensor(e, rmap);
       });
   }
   if (!arr.same_as(this->body)) {
@@ -495,7 +495,7 @@ ComputeLoopNest ComputeLoopNest::make(
       update_state[self->axis[i]] = 1;
     }
     // find which iter var is related to reduction and which is related to axis.
-    top::PassDownBitMaskOr(stage, &update_state);
+    te::PassDownBitMaskOr(stage, &update_state);
     auto leaf_iter_vars = stage->leaf_iter_vars;
     // first first loop that is related to reduction.
     size_t begin_loop = leaf_iter_vars.size();
@@ -638,5 +638,5 @@ Stmt TransformUpdate(const Stage& stage,
                           update, body);
 }
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/operation/compute_op.h b/src/te/operation/compute_op.h
similarity index 95%
rename from src/top/operation/compute_op.h
rename to src/te/operation/compute_op.h
index bbe9731..3e07532 100644
--- a/src/top/operation/compute_op.h
+++ b/src/te/operation/compute_op.h
@@ -21,17 +21,17 @@
  * \brief Helper utilities to implement compute_op.
  * \file compute_op.h
  */
-#ifndef TVM_TOP_OPERATION_COMPUTE_OP_H_
-#define TVM_TOP_OPERATION_COMPUTE_OP_H_
+#ifndef TVM_TE_OPERATION_COMPUTE_OP_H_
+#define TVM_TE_OPERATION_COMPUTE_OP_H_
 
 #include <tvm/tir/expr.h>
 #include <tvm/tir/expr.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <vector>
 #include <unordered_map>
 
 namespace tvm {
-namespace top {
+namespace te {
 // loop nest structure for general compute
 // This the loop nest structured used in compute.
 // Does not include the loop body.
@@ -107,7 +107,7 @@ Stmt TransformUpdate(const Stage& stage,
                      const ComputeLoopNest& n,
                      Stmt body,
                      Stmt update);
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
 
-#endif  // TVM_TOP_OPERATION_COMPUTE_OP_H_
+#endif  // TVM_TE_OPERATION_COMPUTE_OP_H_
diff --git a/src/top/operation/cross_thread_reduction.cc b/src/te/operation/cross_thread_reduction.cc
similarity index 99%
rename from src/top/operation/cross_thread_reduction.cc
rename to src/te/operation/cross_thread_reduction.cc
index 30ee7b8..180ee12 100644
--- a/src/top/operation/cross_thread_reduction.cc
+++ b/src/te/operation/cross_thread_reduction.cc
@@ -26,7 +26,7 @@
 #include "op_util.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 using namespace tir;
 
 Stmt MakeCrossThreadReduction(
@@ -114,5 +114,5 @@ Stmt MakeCrossThreadReduction(
   body = Substitute(body, value_map);
   return MergeNest(nest, body);
 }
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/operation/extern_op.cc b/src/te/operation/extern_op.cc
similarity index 98%
rename from src/top/operation/extern_op.cc
rename to src/te/operation/extern_op.cc
index 8bc812e..c1e5504 100644
--- a/src/top/operation/extern_op.cc
+++ b/src/te/operation/extern_op.cc
@@ -21,14 +21,14 @@
  * \brief External computation rule.
  * \file extern_op.cc
  */
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/arith/analyzer.h>
 #include <tvm/tir/expr.h>
 #include <unordered_set>
 #include "op_util.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 using namespace tir;
 // ExternOpNode
 TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
@@ -182,5 +182,5 @@ Stmt ExternOpNode::BuildProvide(
   }
   return ret;
 }
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/operation/hybrid_op.cc b/src/te/operation/hybrid_op.cc
similarity index 98%
rename from src/top/operation/hybrid_op.cc
rename to src/te/operation/hybrid_op.cc
index 4c1ab70..bb883ae 100644
--- a/src/top/operation/hybrid_op.cc
+++ b/src/te/operation/hybrid_op.cc
@@ -21,7 +21,7 @@
  * \brief Hybrid computation rule.
  * \file hybrid_op.cc
  */
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/arith/analyzer.h>
 #include <tvm/tir/expr.h>
 #include <tvm/tir/stmt_functor.h>
@@ -34,7 +34,7 @@
 #include "hybrid_op.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 using namespace tir;
 // HybridOpNode
 TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
@@ -77,7 +77,7 @@ Operation HybridOpNode::make(std::string name,
   n->attrs = std::move(attrs);
   n->inputs = std::move(inputs);
   n->outputs = std::move(outputs);
-  n->axis = top::GatherLoopVars(body);
+  n->axis = te::GatherLoopVars(body);
   n->body = std::move(body);
   Operation res = Operation(n);
   return res;
@@ -110,7 +110,7 @@ Operation HybridOpNode::ReplaceInputs(
     const std::unordered_map<Tensor, Tensor> &rmap) const {
   CHECK_EQ(self.operator->(), this);
   auto n = make_object<HybridOpNode>(*this);
-  n->body = top::ReplaceTensor(this->body, rmap);
+  n->body = te::ReplaceTensor(this->body, rmap);
   for (size_t i = 0; i < n->inputs.size(); ++i) {
     Tensor t = n->inputs[i];
     if (rmap.count(t)) {
@@ -210,10 +210,10 @@ Stmt HybridOpNode::BuildProvide(
    * This is a major difference that HybridOpNode is NOT the same as
    * ExternOpNode.
    * */
-  ret = top::ReplaceTensor(ret, rmap);
-  ret = top::ReplaceProvideTensor(ret, rmap);
+  ret = te::ReplaceTensor(ret, rmap);
+  ret = te::ReplaceProvideTensor(ret, rmap);
 
-  ret = top::ApplySchedule(stage, dom_map, ret);
+  ret = te::ApplySchedule(stage, dom_map, ret);
   return ret;
 }
 
@@ -506,5 +506,5 @@ Stmt ReplaceProvideTensor(Stmt stmt,
   Stmt ret = repl(stmt);
   return repl.found ? ret : stmt;
 }
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/operation/hybrid_op.h b/src/te/operation/hybrid_op.h
similarity index 94%
rename from src/top/operation/hybrid_op.h
rename to src/te/operation/hybrid_op.h
index d0de706..a7b2cb1 100644
--- a/src/top/operation/hybrid_op.h
+++ b/src/te/operation/hybrid_op.h
@@ -21,11 +21,11 @@
  * \brief Helper utilities to implement hybrid_op.
  * \file hybrid_op.h
  */
-#ifndef TVM_TOP_OPERATION_HYBRID_OP_H_
-#define TVM_TOP_OPERATION_HYBRID_OP_H_
+#ifndef TVM_TE_OPERATION_HYBRID_OP_H_
+#define TVM_TE_OPERATION_HYBRID_OP_H_
 
 #include <tvm/tir/expr.h>
-#include <tvm/top/schedule.h>
+#include <tvm/te/schedule.h>
 
 #include <unordered_map>
 #include <unordered_set>
@@ -36,7 +36,7 @@
 #include "../../tir/pass/arg_binder.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 
 /*!
  * \brief Find all the iteration variables in the given statement body.
@@ -91,7 +91,7 @@ Stmt ApplyLoopOrder(const Stage &stage,
                     const std::unordered_map<IterVar, Range> &dom_map,
                     const std::unordered_map<IterVar, IterVar> &rebased, Stmt stmt);
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
 
-#endif  // TVM_TOP_OPERATION_HYBRID_OP_H_
+#endif  // TVM_TE_OPERATION_HYBRID_OP_H_
diff --git a/src/top/operation/op_util.cc b/src/te/operation/op_util.cc
similarity index 98%
rename from src/top/operation/op_util.cc
rename to src/te/operation/op_util.cc
index 47ad82f..8bc35e3 100644
--- a/src/top/operation/op_util.cc
+++ b/src/te/operation/op_util.cc
@@ -24,14 +24,14 @@
 #include <tvm/tir/expr.h>
 #include <tvm/tir/ir_pass.h>
 #include <tvm/tir/stmt_functor.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <string>
 #include "op_util.h"
 #include "../schedule/message_passing.h"
 #include "../../arith/compute_expr.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 
 using namespace arith;
 using namespace tir;
@@ -172,7 +172,7 @@ MakeLoopNest(const Stage& stage,
     }
   }
   // message passing to get offset of root iter vars.
-  top::PassUpIndex(stage, dom_map, &value_map);
+  te::PassUpIndex(stage, dom_map, &value_map);
   return nest;
 }
 
@@ -266,5 +266,5 @@ tir::ForType IterVarTypeToForType(IterVarType iter_type) {
   }
 }
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/operation/op_util.h b/src/te/operation/op_util.h
similarity index 95%
rename from src/top/operation/op_util.h
rename to src/te/operation/op_util.h
index bc6e49e..5e16b8e 100644
--- a/src/top/operation/op_util.h
+++ b/src/te/operation/op_util.h
@@ -21,11 +21,11 @@
  * \file op_util.h
  * \brief Common utility used in operator construction.
  */
-#ifndef TVM_TOP_OPERATION_OP_UTIL_H_
-#define TVM_TOP_OPERATION_OP_UTIL_H_
+#ifndef TVM_TE_OPERATION_OP_UTIL_H_
+#define TVM_TE_OPERATION_OP_UTIL_H_
 
 #include <tvm/tir/expr.h>
-#include <tvm/top/schedule.h>
+#include <tvm/te/schedule.h>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
@@ -34,7 +34,7 @@
 #include "../schedule/message_passing.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 
 using tir::MergeNest;
 
@@ -102,6 +102,6 @@ IterVarType ForTypeToIterVarType(tir::ForType for_type);
  */
 tir::ForType IterVarTypeToForType(IterVarType iter_type);
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
-#endif  // TVM_TOP_OPERATION_OP_UTIL_H_
+#endif  // TVM_TE_OPERATION_OP_UTIL_H_
diff --git a/src/top/operation/placeholder_op.cc b/src/te/operation/placeholder_op.cc
similarity index 97%
rename from src/top/operation/placeholder_op.cc
rename to src/te/operation/placeholder_op.cc
index 13311a8..866ef94 100644
--- a/src/top/operation/placeholder_op.cc
+++ b/src/te/operation/placeholder_op.cc
@@ -21,10 +21,10 @@
  * \brief Placeholder op.
  * \file placeholder_op.cc
  */
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 
 namespace tvm {
-namespace top {
+namespace te {
 
 // PlaceholderOpNode
 TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
@@ -103,5 +103,5 @@ Stmt PlaceholderOpNode::BuildProvide(
     bool debug_keep_trivial_loop) const {
   return Stmt();
 }
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/operation/scan_op.cc b/src/te/operation/scan_op.cc
similarity index 99%
rename from src/top/operation/scan_op.cc
rename to src/te/operation/scan_op.cc
index 62ddecb..cacfd8c 100644
--- a/src/top/operation/scan_op.cc
+++ b/src/te/operation/scan_op.cc
@@ -21,14 +21,14 @@
  * \brief Scan Operator.
  * \file scan_op.cc
  */
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/tir/expr.h>
 #include <tvm/tir/ir_pass.h>
 #include "op_util.h"
 #include "../schedule/graph.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 using namespace tir;
 
 TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
@@ -304,5 +304,5 @@ Stmt ScanOpNode::BuildProvide(
           MakeBoundCheck(stage, dom_map, vmap, false, empty)));
   return MergeNest(nest, provide);
 }
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/operation/tensor_compute_op.cc b/src/te/operation/tensor_compute_op.cc
similarity index 97%
rename from src/top/operation/tensor_compute_op.cc
rename to src/te/operation/tensor_compute_op.cc
index 5011c16..8ce621c 100644
--- a/src/top/operation/tensor_compute_op.cc
+++ b/src/te/operation/tensor_compute_op.cc
@@ -21,7 +21,7 @@
  * \brief Tensor Compute Op.
  * \file tensor_compute_op.cc
  */
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/arith/analyzer.h>
 #include <tvm/tir/expr.h>
 #include <tvm/tir/ir_pass.h>
@@ -31,7 +31,7 @@
 #include "../../arith/compute_expr.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 using namespace tir;
 // TensorComputeOpNode
 TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
@@ -217,7 +217,7 @@ Stmt TensorComputeOpNode::BuildProvide(
     body = MergeNest(input_bind_nest, body);
     body = tir::Substitute(body, vmap);
     body = MergeNest(binder.asserts(), body);
-    body = top::Substitute(body, n.main_vmap);
+    body = te::Substitute(body, n.main_vmap);
     Stmt ret =  MergeNest(nest, body);
     return ret;
   } else {
@@ -238,14 +238,14 @@ Stmt TensorComputeOpNode::BuildProvide(
           n.init_nest.begin(), n.init_nest.begin() + tloc + 1);
       init_nest.emplace_back(MakeIfNest(n.init_predicates));
       Stmt init = MergeNest(output_bind_nest, this->intrin->reduce_init);
-      init = top::Substitute(init, n.init_vmap);
+      init = te::Substitute(init, n.init_vmap);
       init = MergeNest(init_nest, init);
       // The update
       Stmt update = MergeNest(output_bind_nest, this->intrin->reduce_update);
       update = MergeNest(input_bind_nest, update);
       update = tir::Substitute(update, vmap);
       update = MergeNest(binder.asserts(), update);
-      update = top::Substitute(update, n.main_vmap);
+      update = te::Substitute(update, n.main_vmap);
       update = MergeNest(update_nest, update);
       return MergeNest(common, SeqStmt::Flatten(init, update));
     } else {
@@ -259,11 +259,11 @@ Stmt TensorComputeOpNode::BuildProvide(
       update = MergeNest(input_bind_nest, update);
       update = tir::Substitute(update, vmap);
       update = MergeNest(binder.asserts(), update);
-      update = top::Substitute(update, n.main_vmap);
+      update = te::Substitute(update, n.main_vmap);
       update = MergeNest(update_nest, update);
       return MergeNest(common, update);
     }
   }
 }
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/operation/tensorize.cc b/src/te/operation/tensorize.cc
similarity index 98%
rename from src/top/operation/tensorize.cc
rename to src/te/operation/tensorize.cc
index b096ee6..ba84a90 100644
--- a/src/top/operation/tensorize.cc
+++ b/src/te/operation/tensorize.cc
@@ -31,7 +31,7 @@
 #include "../schedule/message_passing.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 
 using namespace tir;
 
@@ -81,7 +81,7 @@ size_t InferTensorizeRegion(
   }
   CHECK(found_point);
   // Get domain of the tensorized scope.
-  top::PassUpDomain(stage, dom_map, &up_state);
+  te::PassUpDomain(stage, dom_map, &up_state);
   // Get domains if inputs
   std::unordered_map<Tensor, TensorDom> in_dom;
   std::unordered_map<const VarNode*, IntSet> temp_dmap;
@@ -452,7 +452,7 @@ Stmt MakeTensorize(const ComputeOpNode* self,
     body = MergeNest(input_bind_nest, body);
     body = tir::Substitute(body, vmap);
     body = MergeNest(binder.asserts(), body);
-    body = top::Substitute(body, n.main_vmap);
+    body = te::Substitute(body, n.main_vmap);
     return MergeNest(nest, body);
   } else {
     // Need to split reduction
@@ -472,14 +472,14 @@ Stmt MakeTensorize(const ComputeOpNode* self,
           n.init_nest.begin(), n.init_nest.begin() + tloc + 1);
       init_nest.emplace_back(MakeIfNest(n.init_predicates));
       Stmt init = MergeNest(output_bind_nest, intrin->reduce_init);
-      init = top::Substitute(init, n.init_vmap);
+      init = te::Substitute(init, n.init_vmap);
       init = MergeNest(init_nest, init);
       // The update
       Stmt update = MergeNest(output_bind_nest, intrin->reduce_update);
       update = MergeNest(input_bind_nest, update);
       update = tir::Substitute(update, vmap);
       update = MergeNest(binder.asserts(), update);
-      update = top::Substitute(update, n.main_vmap);
+      update = te::Substitute(update, n.main_vmap);
       update = MergeNest(update_nest, update);
       return MergeNest(common, SeqStmt::Flatten(init, update));
     } else {
@@ -493,7 +493,7 @@ Stmt MakeTensorize(const ComputeOpNode* self,
       update = MergeNest(input_bind_nest, update);
       update = tir::Substitute(update, vmap);
       update = MergeNest(binder.asserts(), update);
-      update = top::Substitute(update, n.main_vmap);
+      update = te::Substitute(update, n.main_vmap);
       update = MergeNest(update_nest, update);
       return MergeNest(common, update);
     }
@@ -532,5 +532,5 @@ TVM_REGISTER_GLOBAL("test.op.MatchTensorizeBody")
                               intrin,
                               &vrange);
   });
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/schedule/auto_inline_elem_wise.cc b/src/te/schedule/auto_inline_elem_wise.cc
similarity index 96%
rename from src/top/schedule/auto_inline_elem_wise.cc
rename to src/te/schedule/auto_inline_elem_wise.cc
index d889a17..3a22267 100644
--- a/src/top/schedule/auto_inline_elem_wise.cc
+++ b/src/te/schedule/auto_inline_elem_wise.cc
@@ -20,12 +20,12 @@
 /*!
  * \file auto_inline_elem_wise.cc
  */
-#include <tvm/top/schedule_pass.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/schedule_pass.h>
+#include <tvm/te/operation.h>
 #include <tvm/tir/expr_functor.h>
 
 namespace tvm {
-namespace top {
+namespace te {
 
 using namespace tir;
 
@@ -111,5 +111,5 @@ void AutoInlineInjective(Schedule sch) {
   }
 }
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/schedule/bound.cc b/src/te/schedule/bound.cc
similarity index 98%
rename from src/top/schedule/bound.cc
rename to src/te/schedule/bound.cc
index 97b9930..27896e6 100644
--- a/src/top/schedule/bound.cc
+++ b/src/te/schedule/bound.cc
@@ -21,8 +21,8 @@
  * \file bound.cc
  * \brief The bound inference logic.
  */
-#include <tvm/top/schedule_pass.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/schedule_pass.h>
+#include <tvm/te/operation.h>
 #include <tvm/tir/ir_pass.h>
 #include <unordered_map>
 #include <unordered_set>
@@ -31,7 +31,7 @@
 #include "../../runtime/thread_storage_scope.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 
 using runtime::StorageRank;
 using runtime::StorageScope;
@@ -259,5 +259,5 @@ Map<IterVar, Range> InferBound(const Schedule& sch) {
   return Map<IterVar, Range>(ret.begin(), ret.end());
 }
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/schedule/graph.cc b/src/te/schedule/graph.cc
similarity index 98%
rename from src/top/schedule/graph.cc
rename to src/te/schedule/graph.cc
index 1e44fac..eff0a25 100644
--- a/src/top/schedule/graph.cc
+++ b/src/te/schedule/graph.cc
@@ -23,14 +23,14 @@
  */
 #include <tvm/tir/expr.h>
 #include <tvm/tir/stmt_functor.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <utility>
 #include <unordered_set>
 #include <unordered_map>
 #include "graph.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 // key to specific tensor dimension.
 struct TensorDimKey {
   tir::FunctionRef f;
@@ -55,13 +55,13 @@ struct TensorDimKey {
     return !operator==(other);
   }
 };
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
 
 namespace std {
 template <>
-struct hash<::tvm::top::TensorDimKey> {
-  std::size_t operator()(const ::tvm::top::TensorDimKey& k) const {
+struct hash<::tvm::te::TensorDimKey> {
+  std::size_t operator()(const ::tvm::te::TensorDimKey& k) const {
     size_t lhs = ::tvm::ObjectHash()(k.f);
     size_t rhs = static_cast<size_t>(k.value_index) << 16UL |
         static_cast<size_t>(k.dim);
@@ -73,7 +73,7 @@ struct hash<::tvm::top::TensorDimKey> {
 
 
 namespace tvm {
-namespace top {
+namespace te {
 
 // construct a read graph that gives readers of each operation
 // that the root depend on
@@ -429,5 +429,5 @@ Map<IterVar, PrimExpr> ScanFixPointAnalysis(const Operation& scan_op) {
   return ret;
 }
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/schedule/graph.h b/src/te/schedule/graph.h
similarity index 95%
rename from src/top/schedule/graph.h
rename to src/te/schedule/graph.h
index 8d2d856..c3478c7 100644
--- a/src/top/schedule/graph.h
+++ b/src/te/schedule/graph.h
@@ -21,18 +21,18 @@
  * \file graph.h
  * \brief Utilities to get information about schedule graph.
  */
-#ifndef TVM_TOP_SCHEDULE_GRAPH_H_
-#define TVM_TOP_SCHEDULE_GRAPH_H_
+#ifndef TVM_TE_SCHEDULE_GRAPH_H_
+#define TVM_TE_SCHEDULE_GRAPH_H_
 
 #include <tvm/tir/expr.h>
-#include <tvm/top/schedule.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/schedule.h>
+#include <tvm/te/operation.h>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
 
 namespace tvm {
-namespace top {
+namespace te {
 
 /*!
  * \brief data structure of Operation->Tensors it reads
@@ -125,7 +125,7 @@ Array<Operation> ScanGetBody(const Operation& scan_op);
  */
 Map<IterVar, PrimExpr> ScanFixPointAnalysis(const Operation& scan);
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
 
-#endif  // TVM_TOP_SCHEDULE_GRAPH_H_
+#endif  // TVM_TE_SCHEDULE_GRAPH_H_
diff --git a/src/top/schedule/message_passing.cc b/src/te/schedule/message_passing.cc
similarity index 99%
rename from src/top/schedule/message_passing.cc
rename to src/te/schedule/message_passing.cc
index a979df4..5b6fa86 100644
--- a/src/top/schedule/message_passing.cc
+++ b/src/te/schedule/message_passing.cc
@@ -28,7 +28,7 @@
 #include "../../arith/compute_expr.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 
 using namespace tir;
 
@@ -539,5 +539,5 @@ std::vector<PrimExpr> MakeBoundCheck(
   }
   return preds;
 }
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/schedule/message_passing.h b/src/te/schedule/message_passing.h
similarity index 95%
rename from src/top/schedule/message_passing.h
rename to src/te/schedule/message_passing.h
index beaf301..1877235 100644
--- a/src/top/schedule/message_passing.h
+++ b/src/te/schedule/message_passing.h
@@ -22,19 +22,19 @@
  * \brief Common utilities to do message passing
  *  on the schedule hyper graph.
  */
-#ifndef TVM_TOP_SCHEDULE_MESSAGE_PASSING_H_
-#define TVM_TOP_SCHEDULE_MESSAGE_PASSING_H_
+#ifndef TVM_TE_SCHEDULE_MESSAGE_PASSING_H_
+#define TVM_TE_SCHEDULE_MESSAGE_PASSING_H_
 
 #include <tvm/tir/expr.h>
-#include <tvm/top/schedule.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/schedule.h>
+#include <tvm/te/operation.h>
 #include <tvm/arith/analyzer.h>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
 
 namespace tvm {
-namespace top {
+namespace te {
 /*!
  * \brief Downward inference of domain of each IterVar.
  *  Caller set the range of the root, then the function
@@ -128,6 +128,6 @@ MakeBoundCheck(
     bool skip_ivar_domain,
     const std::unordered_set<IterVar>& skip_iter);
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
-#endif  // TVM_TOP_SCHEDULE_MESSAGE_PASSING_H_
+#endif  // TVM_TE_SCHEDULE_MESSAGE_PASSING_H_
diff --git a/src/top/schedule/schedule_dataflow_rewrite.cc b/src/te/schedule/schedule_dataflow_rewrite.cc
similarity index 98%
rename from src/top/schedule/schedule_dataflow_rewrite.cc
rename to src/te/schedule/schedule_dataflow_rewrite.cc
index 9ffb511..d9638c7 100644
--- a/src/top/schedule/schedule_dataflow_rewrite.cc
+++ b/src/te/schedule/schedule_dataflow_rewrite.cc
@@ -20,8 +20,8 @@
 /*!
  * \file schedule_dataflow_rewrite.cc
  */
-#include <tvm/top/schedule.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/schedule.h>
+#include <tvm/te/operation.h>
 #include <tvm/tir/stmt_functor.h>
 #include <tvm/tir/ir_pass.h>
 #include <unordered_set>
@@ -30,7 +30,7 @@
 #include "../../arith/compute_expr.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 // find first occurance location in leaf
 template<typename T>
 size_t FindNodeRef(ArrayNode* array_node, const T& v) {
@@ -211,7 +211,7 @@ void PrepareAxisMapping(Stage orig_stage,
     dom_map[iv] = iv->dom;
     analyzer.Bind(iv->var, iv->dom);
   }
-  top::PassDownDomain(orig_stage, &dom_map, &analyzer, true);
+  te::PassDownDomain(orig_stage, &dom_map, &analyzer, true);
   {
     // The source->cache
     std::unordered_map<IterVar, PrimExpr> value_map;
@@ -347,7 +347,7 @@ Array<Tensor> CacheWriteWithReLayout(Schedule sch,
     for (IterVar iv : compute->axis) {
       value_map[iv] = iv->var;
     }
-    top::PassDownIndex(orig_stage, dom_map, &value_map, true);
+    te::PassDownIndex(orig_stage, dom_map, &value_map, true);
     for (IterVar iv : orig_stage->leaf_iter_vars) {
       if (red_axis.count(iv)) continue;
       args.push_back(value_map.at(iv));
@@ -692,8 +692,8 @@ Array<Tensor> Schedule::rfactor(const Tensor& tensor,
   // Find touched reduction axis.
   std::unordered_map<IterVar, int> touch_map;
   touch_map[axis] = 1;
-  top::PassUpBitMaskOr(reduce_stage, &touch_map, true);
-  top::PassDownBitMaskOr(reduce_stage, &touch_map, true);
+  te::PassUpBitMaskOr(reduce_stage, &touch_map, true);
+  te::PassDownBitMaskOr(reduce_stage, &touch_map, true);
   // skip reduction iteration.
   std::unordered_set<IterVar> skip_bound_check;
   // Verify normal axis are not touched.
@@ -715,7 +715,7 @@ Array<Tensor> Schedule::rfactor(const Tensor& tensor,
     }
     analyzer.Bind(iv->var, iv->dom);
   }
-  top::PassDownDomain(reduce_stage, &dom_map, &analyzer, true);
+  te::PassDownDomain(reduce_stage, &dom_map, &analyzer, true);
   for (IterVar iv : reduce_stage->leaf_iter_vars) {
     if (touch_map.count(iv)) {
       Range dom = dom_map.at(iv);
@@ -726,7 +726,7 @@ Array<Tensor> Schedule::rfactor(const Tensor& tensor,
       }
     }
   }
-  top::PassUpIndex(reduce_stage, dom_map, &value_map, true);
+  te::PassUpIndex(reduce_stage, dom_map, &value_map, true);
   std::vector<PrimExpr> predicates = MakeBoundCheck(
       reduce_stage, dom_map, value_map, true, skip_bound_check);
 
@@ -881,5 +881,5 @@ Array<Tensor> Schedule::rfactor(const Tensor& tensor,
   reduce_stage->relations = Array<IterVarRelation>();
   return factor_tensors;
 }
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/schedule/schedule_lang.cc b/src/te/schedule/schedule_lang.cc
similarity index 99%
rename from src/top/schedule/schedule_lang.cc
rename to src/te/schedule/schedule_lang.cc
index 130d06b..1763bd6 100644
--- a/src/top/schedule/schedule_lang.cc
+++ b/src/te/schedule/schedule_lang.cc
@@ -20,13 +20,13 @@
 /*!
  * \file schedule_lang.cc
  */
-#include <tvm/top/schedule.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/schedule.h>
+#include <tvm/te/operation.h>
 #include <unordered_set>
 #include "graph.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 
 // find first occurance location in leaf
 template<typename T>
@@ -591,7 +591,7 @@ Stage Schedule::create_group(const Array<Tensor>& outputs,
   self->InitCache();
   const auto& op2stage_cache = self->op2stage_cache_;
   // Get the ops.
-  Array<Operation> ops = top::GetSubGraph(
+  Array<Operation> ops = te::GetSubGraph(
       RemapTensor(self, outputs),
       RemapTensor(self, inputs),
       include_inputs);
@@ -715,8 +715,8 @@ Schedule ScheduleNode::make(Array<Operation> ops) {
   auto n = make_object<ScheduleNode>();
   Schedule sch(n);
   n->outputs = ops;
-  auto g = top::CreateReadGraph(n->outputs);
-  Array<Operation> post_order = top::PostDFSOrder(n->outputs, g);
+  auto g = te::CreateReadGraph(n->outputs);
+  Array<Operation> post_order = te::PostDFSOrder(n->outputs, g);
   // output set.
   std::unordered_set<Operation> output_set;
   for (Operation x : ops) {
@@ -848,5 +848,5 @@ TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
     auto* op = static_cast<const ScheduleNode*>(node.get());
     p->stream << "schedule(" << op << ")";
   });
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/schedule/schedule_ops.cc b/src/te/schedule/schedule_ops.cc
similarity index 99%
rename from src/top/schedule/schedule_ops.cc
rename to src/te/schedule/schedule_ops.cc
index 2bfe96b..0930f26 100644
--- a/src/top/schedule/schedule_ops.cc
+++ b/src/te/schedule/schedule_ops.cc
@@ -23,8 +23,8 @@
 #include <tvm/tir/expr.h>
 #include <tvm/tir/ir_pass.h>
 #include <tvm/tir/stmt_functor.h>
-#include <tvm/top/operation.h>
-#include <tvm/top/schedule_pass.h>
+#include <tvm/te/operation.h>
+#include <tvm/te/schedule_pass.h>
 #include <utility>
 #include <unordered_map>
 #include <unordered_set>
@@ -33,7 +33,7 @@
 #include "../../tir/pass/ir_util.h"
 
 namespace tvm {
-namespace top {
+namespace te {
 
 using namespace tir;
 
@@ -423,5 +423,5 @@ Stmt ScheduleOps(
   return post_proc(std::move(body));
 }
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/top/tensor.cc b/src/te/tensor.cc
similarity index 97%
rename from src/top/tensor.cc
rename to src/te/tensor.cc
index 85232b9..f200514 100644
--- a/src/top/tensor.cc
+++ b/src/te/tensor.cc
@@ -20,13 +20,13 @@
 /*!
  * \file tensor.cc
  */
-#include <tvm/top/tensor.h>
-#include <tvm/top/operation.h>
-#include <tvm/top/tensor_intrin.h>
+#include <tvm/te/tensor.h>
+#include <tvm/te/operation.h>
+#include <tvm/te/tensor_intrin.h>
 #include <memory>
 
 namespace tvm {
-namespace top {
+namespace te {
 
 IterVar thread_axis(Range dom, std::string tag) {
   return IterVarNode::make(
@@ -147,5 +147,5 @@ TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
 
 TVM_REGISTER_NODE_TYPE(TensorIntrinCallNode);
 
-}  // namespace top
+}  // namespace te
 }  // namespace tvm
diff --git a/src/tir/pass/inject_prefetch.cc b/src/tir/pass/inject_prefetch.cc
index f04d5d4..894ff38 100644
--- a/src/tir/pass/inject_prefetch.cc
+++ b/src/tir/pass/inject_prefetch.cc
@@ -39,7 +39,7 @@ class PrefetchInjector : public StmtMutator {
     Stmt ret = StmtMutator::VisitStmt_(op);
     op = ret.as<AttrStmtNode>();
     if (op && op->attr_key == attr::prefetch_scope) {
-      top::Tensor ts = Downcast<top::Tensor>(op->node);
+      te::Tensor ts = Downcast<te::Tensor>(op->node);
       CHECK_NE(loop_nest_.size(), 0U);
       Domain domain = DomainTouched(op->body, ts, true, false);
       Region region;
diff --git a/src/tir/pass/storage_flatten.cc b/src/tir/pass/storage_flatten.cc
index bed4879..f9533fa 100644
--- a/src/tir/pass/storage_flatten.cc
+++ b/src/tir/pass/storage_flatten.cc
@@ -25,7 +25,7 @@
 #include <tvm/arith/analyzer.h>
 #include <tvm/tir/expr.h>
 #include <tvm/tir/stmt.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/tir/stmt_functor.h>
 #include <tvm/tir/op.h>
 #include <tvm/tir/ir_pass.h>
@@ -49,7 +49,7 @@ using intrinsic::tvm_address_of;
 
 class StorageFlattener : public StmtExprMutator {
  public:
-  explicit StorageFlattener(Map<top::Tensor, Buffer> extern_buffer,
+  explicit StorageFlattener(Map<te::Tensor, Buffer> extern_buffer,
                             int cache_line_size, bool create_bound_attributes,
                             IRVisitorWithAnalyzer* bounded_analyzer)
       : bounded_analyzer_(bounded_analyzer),
@@ -82,8 +82,8 @@ class StorageFlattener : public StmtExprMutator {
       storage_scope_[op->node.get()] = op->value.as<StringImmNode>()->value;
       return this->VisitStmt(op->body);
     } else if (op->attr_key == attr::double_buffer_scope &&
-               op->node->IsInstance<top::OperationNode>()) {
-      auto func = Downcast<top::Operation>(op->node);
+               op->node->IsInstance<te::OperationNode>()) {
+      auto func = Downcast<te::Operation>(op->node);
       Stmt body = this->VisitStmt(op->body);
       for (int i = 0; i < func->num_outputs(); ++i) {
         TensorKey key{func, i};
@@ -104,7 +104,7 @@ class StorageFlattener : public StmtExprMutator {
     } else if (op->attr_key == attr::buffer_bind_scope) {
       return HandleBufferBindScope(op);
     } else if (op->attr_key == attr::buffer_dim_align) {
-      auto tensor = Downcast<top::Tensor>(op->node);
+      auto tensor = Downcast<te::Tensor>(op->node);
       const CallNode* tuple = op->value.as<CallNode>();
       CHECK(tuple && tuple->is_intrinsic(intrinsic::tvm_tuple));
       TensorKey key{tensor->op, tensor->value_index};
@@ -406,7 +406,7 @@ class StorageFlattener : public StmtExprMutator {
     Array<ObjectRef> arr = Downcast<Array<ObjectRef> > (op->node);
     CHECK_EQ(arr.size(), 2U);
     const BufferNode* buffer = arr[0].as<BufferNode>();
-    const top::TensorNode* tensor = arr[1].as<top::TensorNode>();
+    const te::TensorNode* tensor = arr[1].as<te::TensorNode>();
     const CallNode* tuple = op->value.as<CallNode>();
     CHECK(buffer && tensor);
     CHECK(tuple && tuple->is_intrinsic(intrinsic::tvm_tuple));
@@ -529,7 +529,7 @@ class StorageFlattener : public StmtExprMutator {
   bool create_bound_attributes_{false};
 };
 
-Stmt StorageFlatten(Stmt stmt, Map<top::Tensor, Buffer> extern_buffer,
+Stmt StorageFlatten(Stmt stmt, Map<te::Tensor, Buffer> extern_buffer,
                     int cache_line_size, bool create_bound_attributes) {
   IRVisitorWithAnalyzer bounded_analyzer;
   bounded_analyzer(stmt);
diff --git a/src/tir/pass/tensor_core.cc b/src/tir/pass/tensor_core.cc
index eb07d92..6a5e015 100644
--- a/src/tir/pass/tensor_core.cc
+++ b/src/tir/pass/tensor_core.cc
@@ -23,7 +23,7 @@
 // IR Passes for TensorCore CodeGen
 #include <tvm/tir/expr.h>
 #include <tvm/tir/stmt.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/tir/stmt_functor.h>
 #include <tvm/tir/op.h>
 #include <tvm/tir/ir_pass.h>
@@ -39,7 +39,7 @@
 namespace tvm {
 namespace tir {
 
-using namespace top;
+using namespace te;
 using runtime::StorageRank;
 using runtime::StorageScope;
 using runtime::ThreadScope;
@@ -418,7 +418,7 @@ class BufferAnalyser : public StmtExprVisitor {
       storage_scope_[op->node.get()] = op->value.as<StringImmNode>()->value;
       this->VisitStmt(op->body);
     } else if (op->attr_key == attr::buffer_dim_align) {
-      top::Tensor tensor = Downcast<top::Tensor>(op->node);
+      te::Tensor tensor = Downcast<te::Tensor>(op->node);
       const CallNode* tuple = op->value.as<CallNode>();
       CHECK(tuple && tuple->is_intrinsic(intrinsic::tvm_tuple));
       auto& vinfo = dim_align_[TensorKey{tensor->op, tensor->value_index}];
@@ -832,7 +832,7 @@ class TensorCoreIRMutator : public StmtExprMutator {
   Stmt VisitStmt_(const AttrStmtNode* op) final {
     Stmt stmt = StmtExprMutator::VisitStmt_(op);
     if (op->attr_key == attr::realize_scope) {
-      auto node = op->node.as<top::OperationNode>();
+      auto node = op->node.as<te::OperationNode>();
       if (node != nullptr) {
         if (!frag_reg_.count(node->name)) {
           return stmt;
@@ -1120,9 +1120,9 @@ class TensorCoreIRMutator : public StmtExprMutator {
     buffer_node->offset_factor = 1;
     Buffer buffer(buffer_node);
 
-    ObjectPtr<top::TensorNode> tensor_node = make_object<top::TensorNode>();
+    ObjectPtr<te::TensorNode> tensor_node = make_object<te::TensorNode>();
     tensor_node->value_index = key.value_index;
-    tensor_node->op = Downcast<top::Operation>(key.f);
+    tensor_node->op = Downcast<te::Operation>(key.f);
     tensor_node->shape = shape;
     tensor_node->dtype = datatype;
     Tensor tensor(tensor_node);
diff --git a/src/tir/pass/verify_compact_buffer.cc b/src/tir/pass/verify_compact_buffer.cc
index 0ca2e14..5328165 100644
--- a/src/tir/pass/verify_compact_buffer.cc
+++ b/src/tir/pass/verify_compact_buffer.cc
@@ -25,7 +25,7 @@
 #include <tvm/tir/expr.h>
 #include <tvm/tir/stmt.h>
 #include <tvm/tir/stmt_functor.h>
-#include <tvm/top/tensor.h>
+#include <tvm/te/tensor.h>
 
 #include <unordered_map>
 
diff --git a/tests/cpp/build_module_test.cc b/tests/cpp/build_module_test.cc
index b717b6e..1f640f5 100644
--- a/tests/cpp/build_module_test.cc
+++ b/tests/cpp/build_module_test.cc
@@ -20,7 +20,7 @@
 #include <dmlc/logging.h>
 #include <gtest/gtest.h>
 #include <topi/cuda/injective.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/runtime/registry.h>
 #include <tvm/driver/driver.h>
 
@@ -29,7 +29,7 @@
 
 TEST(BuildModule, Basic) {
   using namespace tvm;
-  using namespace tvm::top;
+  using namespace tvm::te;
   auto n = var("n");
   Array<PrimExpr> shape;
   shape.push_back(n);
@@ -75,7 +75,7 @@ TEST(BuildModule, Heterogeneous) {
    */
 
   using namespace tvm;
-  using namespace tvm::top;
+  using namespace tvm::te;
   const runtime::PackedFunc* pf = runtime::Registry::Get("module._Enabled");
   bool enabled = (*pf)("cuda");
   if (!enabled) {
diff --git a/tests/cpp/expr_test.cc b/tests/cpp/expr_test.cc
index 61fd726..e17cc73 100644
--- a/tests/cpp/expr_test.cc
+++ b/tests/cpp/expr_test.cc
@@ -19,7 +19,7 @@
 
 #include <dmlc/logging.h>
 #include <gtest/gtest.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 
 TEST(Expr, Basic) {
   using namespace tvm;
diff --git a/tests/cpp/ir_simplify_test.cc b/tests/cpp/ir_simplify_test.cc
index f4d1a46..69cf129 100644
--- a/tests/cpp/ir_simplify_test.cc
+++ b/tests/cpp/ir_simplify_test.cc
@@ -20,10 +20,10 @@
 #include <dmlc/logging.h>
 #include <gtest/gtest.h>
 #include <tvm/tir/ir_pass.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 
 TEST(IRSIMPLIFY, MinMax) {
-  auto x = tvm::top::var("x");
+  auto x = tvm::te::var("x");
   auto e1 = (tvm::max(x, 1) - tvm::max(x, 1)) ;
   auto e1s = tvm::tir::CanonicalSimplify(e1);
   CHECK(tvm::tir::is_zero(e1s));
@@ -34,7 +34,7 @@ TEST(IRSIMPLIFY, MinMax) {
 }
 
 TEST(IRSIMPLIFY, Mul) {
-  auto x = tvm::top::var("x");
+  auto x = tvm::te::var("x");
   auto e = (x * x) - (x * x) ;
   auto es = tvm::tir::CanonicalSimplify(e);
   CHECK(tvm::tir::is_zero(es));
diff --git a/tests/cpp/relay_build_module_test.cc b/tests/cpp/relay_build_module_test.cc
index 9d954ea..8717bb2 100644
--- a/tests/cpp/relay_build_module_test.cc
+++ b/tests/cpp/relay_build_module_test.cc
@@ -19,7 +19,7 @@
 
 #include <gtest/gtest.h>
 #include <tvm/driver/driver.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/relay/expr.h>
 #include <tvm/relay/type.h>
 #include <tvm/relay/analysis.h>
diff --git a/tests/cpp/relay_pass_type_infer_test.cc b/tests/cpp/relay_pass_type_infer_test.cc
index dcb4443..5935e36 100644
--- a/tests/cpp/relay_pass_type_infer_test.cc
+++ b/tests/cpp/relay_pass_type_infer_test.cc
@@ -18,7 +18,7 @@
  */
 
 #include <gtest/gtest.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/relay/expr.h>
 #include <tvm/relay/type.h>
 #include <tvm/relay/analysis.h>
diff --git a/tests/cpp/relay_transform_sequential.cc b/tests/cpp/relay_transform_sequential.cc
index 5593f07..a96c060 100644
--- a/tests/cpp/relay_transform_sequential.cc
+++ b/tests/cpp/relay_transform_sequential.cc
@@ -27,7 +27,7 @@
 #include <tvm/relay/type.h>
 #include <tvm/runtime/packed_func.h>
 #include <tvm/runtime/registry.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 
 TVM_REGISTER_GLOBAL("schedule")
     .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
diff --git a/tests/cpp/simple_passes_test.cc b/tests/cpp/simple_passes_test.cc
index cff0a56..a3c6b07 100644
--- a/tests/cpp/simple_passes_test.cc
+++ b/tests/cpp/simple_passes_test.cc
@@ -20,15 +20,15 @@
 #include <dmlc/logging.h>
 #include <gtest/gtest.h>
 #include <tvm/tir/ir_pass.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 
 TEST(SimplePasses, HasSideEffect) {
   using namespace tvm;
-  auto n = top::var("n");
+  auto n = te::var("n");
   Array<PrimExpr> shape;
   shape.push_back(n);
 
-  auto A = top::placeholder(shape, DataType::Float(32), "A");
+  auto A = te::placeholder(shape, DataType::Float(32), "A");
 
   CHECK(!tvm::tir::HasSideEffect(A[0]));
 }
diff --git a/tests/cpp/tensor_test.cc b/tests/cpp/tensor_test.cc
index 5d6dc23..a9566cb 100644
--- a/tests/cpp/tensor_test.cc
+++ b/tests/cpp/tensor_test.cc
@@ -19,11 +19,11 @@
 
 #include <dmlc/logging.h>
 #include <gtest/gtest.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 
 TEST(Tensor, Basic) {
   using namespace tvm;
-  using namespace tvm::top;
+  using namespace tvm::te;
 
   Var m("m"), n("n"), l("l");
 
@@ -39,17 +39,17 @@ TEST(Tensor, Basic) {
 
 TEST(Tensor, Reduce) {
   using namespace tvm;
-  using namespace tvm::top;
+  using namespace tvm::te;
 
   Var m("m"), n("n"), l("l");
-  top::Tensor A = top::placeholder({m, l}, DataType::Float(32), "A");
-  top::Tensor B = top::placeholder({n, l}, DataType::Float(32), "B");
+  te::Tensor A = te::placeholder({m, l}, DataType::Float(32), "A");
+  te::Tensor B = te::placeholder({n, l}, DataType::Float(32), "B");
   IterVar rv = reduce_axis(Range{0, l}, "k");
 
-  auto C = top::compute({m, n}, [&](Var i, Var j) {
+  auto C = te::compute({m, n}, [&](Var i, Var j) {
       return sum(max(1 + A[i][rv] + 1, B[j][rv]), {rv});
       }, "C");
-  LOG(INFO) << C->op.as<top::ComputeOpNode>()->body;
+  LOG(INFO) << C->op.as<te::ComputeOpNode>()->body;
 }
 
 int main(int argc, char ** argv) {
diff --git a/tests/cpp/topi_ewise_test.cc b/tests/cpp/topi_ewise_test.cc
index 5f89bdf..a1ca6d7 100644
--- a/tests/cpp/topi_ewise_test.cc
+++ b/tests/cpp/topi_ewise_test.cc
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <topi/elemwise.h>
 #include <gtest/gtest.h>
 
diff --git a/tests/cpp/utvm_runtime_standalone_test.cc b/tests/cpp/utvm_runtime_standalone_test.cc
index 55f5c97..cb4d2a4 100644
--- a/tests/cpp/utvm_runtime_standalone_test.cc
+++ b/tests/cpp/utvm_runtime_standalone_test.cc
@@ -32,7 +32,7 @@
 #include <gtest/gtest.h>
 #include <topi/generic/injective.h>
 #include <tvm/driver/driver.h>
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <tvm/relay/analysis.h>
 #include <tvm/relay/expr.h>
 #include <tvm/relay/transform.h>
diff --git a/topi/include/topi/broadcast.h b/topi/include/topi/broadcast.h
index 33034a1..a56d206 100644
--- a/topi/include/topi/broadcast.h
+++ b/topi/include/topi/broadcast.h
@@ -24,11 +24,12 @@
 #ifndef TOPI_BROADCAST_H_
 #define TOPI_BROADCAST_H_
 
+#include <topi/detail/broadcast.h>
+#include <topi/detail/constant_utils.h>
+#include <topi/tags.h>
+
 #include <string>
 #include <algorithm>
-#include "topi/detail/broadcast.h"
-#include "topi/detail/constant_utils.h"
-#include "topi/tags.h"
 
 namespace topi {
 
@@ -43,7 +44,7 @@ namespace topi {
  *
  * \return A Tensor whose op member is a broadcast operation
  */
-inline tvm::top::Tensor broadcast_to(const tvm::top::Tensor& t,
+inline tvm::te::Tensor broadcast_to(const tvm::te::Tensor& t,
                                 const tvm::Array<tvm::PrimExpr>& output_shape,
                                 std::string name = "T_broadcast_to",
                                 std::string tag = kBroadcast) {
@@ -58,7 +59,7 @@ inline tvm::top::Tensor broadcast_to(const tvm::top::Tensor& t,
   auto l = [&](tvm::Array<tvm::tir::Var> ovars) {
     return t(detail::InputIndexFromBroadcast(ovars, t, bh.vars2, bh.all_vars));
   };
-  return tvm::top::compute(
+  return tvm::te::compute(
       tvm::Array<tvm::PrimExpr>(bh.common_shape.begin(), bh.common_shape.end()),
       l,
       name,
@@ -70,44 +71,44 @@ inline tvm::top::Tensor broadcast_to(const tvm::top::Tensor& t,
                             const tvm::PrimExpr& b) {                 \
     ComputeRule;                                                      \
   }                                                                   \
-  inline tvm::top::Tensor Name(const tvm::top::Tensor& A,             \
-                               const tvm::top::Tensor& B,             \
-                               std::string name = "T_" #Name,         \
-                               std::string tag = kBroadcast) {        \
+  inline tvm::te::Tensor Name(const tvm::te::Tensor& A,               \
+                              const tvm::te::Tensor& B,               \
+                              std::string name = "T_" #Name,          \
+                              std::string tag = kBroadcast) {         \
     auto l = [](tvm::PrimExpr a, tvm::PrimExpr b) { ComputeRule; };   \
     return detail::WithBroadcast(l, A, B, name, tag);                 \
   }                                                                   \
-  inline tvm::top::Tensor Name(const tvm::top::Tensor& A,             \
-                          const tvm::PrimExpr& B,                     \
-                          std::string name = "T_" #Name,              \
-                          std::string tag = kElementWise) {           \
+  inline tvm::te::Tensor Name(const tvm::te::Tensor& A,               \
+                              const tvm::PrimExpr& B,                 \
+                              std::string name = "T_" #Name,          \
+                              std::string tag = kElementWise) {       \
     auto l = [](tvm::PrimExpr a, tvm::PrimExpr b) { ComputeRule; };   \
-    return tvm::top::compute(A->shape, [&](const ::tvm::Array<::tvm::tir::Var>& i) { \
+    return tvm::te::compute(A->shape, [&](const ::tvm::Array<::tvm::tir::Var>& i) { \
         return l(A(i), B);                                            \
       }, name, tag);                                                  \
   }                                                                   \
-  inline tvm::top::Tensor Name(const tvm::PrimExpr& A,                \
-                               const tvm::top::Tensor& B,             \
-                               std::string name = "T_" #Name,         \
-                               std::string tag = kElementWise) {      \
+  inline tvm::te::Tensor Name(const tvm::PrimExpr& A,                 \
+                              const tvm::te::Tensor& B,               \
+                              std::string name = "T_" #Name,          \
+                              std::string tag = kElementWise) {       \
     auto l = [&](tvm::PrimExpr a, tvm::PrimExpr b) { ComputeRule; };  \
-    return tvm::top::compute(B->shape, [&](const ::tvm::Array<::tvm::tir::Var>& i) { \
+    return tvm::te::compute(B->shape, [&](const ::tvm::Array<::tvm::tir::Var>& i) { \
         return l(A, B(i));                                            \
       }, name, tag);                                                  \
   }
 
 
 #define TOPI_DEFINE_OP_OVERLOAD(Name, OpName)                       \
-  inline tvm::top::Tensor Name(const tvm::top::Tensor& A,           \
-                              const tvm::top::Tensor& B) {          \
+  inline tvm::te::Tensor Name(const tvm::te::Tensor& A,             \
+                              const tvm::te::Tensor& B) {           \
     return topi::OpName(A, B);                                      \
   }                                                                 \
-  inline tvm::top::Tensor Name(const tvm::PrimExpr& A,              \
-                               const tvm::top::Tensor& B) {         \
+  inline tvm::te::Tensor Name(const tvm::PrimExpr& A,               \
+                              const tvm::te::Tensor& B) {           \
     return topi::OpName(A, B);                                      \
   }                                                                 \
-  inline tvm::top::Tensor Name(const tvm::top::Tensor& A,           \
-                               const tvm::PrimExpr& B) {            \
+  inline tvm::te::Tensor Name(const tvm::te::Tensor& A,             \
+                              const tvm::PrimExpr& B) {             \
     return topi::OpName(A, B);                                      \
   }
 
diff --git a/topi/include/topi/contrib/cublas.h b/topi/include/topi/contrib/cublas.h
index 44685fc..66b8a10 100644
--- a/topi/include/topi/contrib/cublas.h
+++ b/topi/include/topi/contrib/cublas.h
@@ -24,13 +24,13 @@
 #ifndef TOPI_CONTRIB_CUBLAS_H_
 #define TOPI_CONTRIB_CUBLAS_H_
 
-#include "tvm/top/operation.h"
-#include "topi/detail/extern.h"
+#include <tvm/te/operation.h>
+#include <topi/detail/extern.h>
 
 namespace topi {
 namespace contrib {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 using namespace topi::detail;
 /*!
 * \brief Create an op that multiplies lhs and rhs with cuBLAS
diff --git a/topi/include/topi/contrib/rocblas.h b/topi/include/topi/contrib/rocblas.h
index 062ad40..2fcafc7 100644
--- a/topi/include/topi/contrib/rocblas.h
+++ b/topi/include/topi/contrib/rocblas.h
@@ -24,13 +24,13 @@
 #ifndef TOPI_CONTRIB_ROCBLAS_H_
 #define TOPI_CONTRIB_ROCBLAS_H_
 
-#include "tvm/top/operation.h"
+#include <tvm/te/operation.h>
 #include "topi/detail/extern.h"
 
 namespace topi {
 namespace contrib {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 /*!
 * \brief Create an op that multiplies lhs and rhs with rocBLAS
 *
diff --git a/topi/include/topi/cuda/dense.h b/topi/include/topi/cuda/dense.h
index c220178..1f0701e 100644
--- a/topi/include/topi/cuda/dense.h
+++ b/topi/include/topi/cuda/dense.h
@@ -24,18 +24,18 @@
 #ifndef TOPI_CUDA_DENSE_H_
 #define TOPI_CUDA_DENSE_H_
 
-#include "tvm/top/operation.h"
-#include "tvm/top/schedule_pass.h"
-#include "tvm/target/generic_func.h"
-#include "topi/tags.h"
-#include "topi/detail/array_utils.h"
-#include "topi/nn/dense.h"
-#include "topi/contrib/cublas.h"
-#include "topi/generic/extern.h"
+#include <tvm/te/operation.h>
+#include <tvm/te/schedule_pass.h>
+#include <tvm/target/generic_func.h>
+#include <topi/tags.h>
+#include <topi/detail/array_utils.h>
+#include <topi/nn/dense.h>
+#include <topi/contrib/cublas.h>
+#include <topi/generic/extern.h>
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace cuda {
 /*!
@@ -49,10 +49,10 @@ namespace cuda {
 *
 * \return Tensor with shape [batch, out_dim]
 */
-inline tvm::top::Tensor dense_cuda(const Target& target,
-                              const tvm::top::Tensor& data,
-                              const tvm::top::Tensor& weight,
-                              const tvm::top::Tensor& bias,
+inline tvm::te::Tensor dense_cuda(const Target& target,
+                              const tvm::te::Tensor& data,
+                              const tvm::te::Tensor& weight,
+                              const tvm::te::Tensor& bias,
                               const DataType& out_dtype) {
   CHECK_EQ(data->shape.size(), 2) << "dense requires 2-D data";
   CHECK_EQ(weight->shape.size(), 2) << "dense requires 2-D weight";
@@ -68,7 +68,7 @@ inline tvm::top::Tensor dense_cuda(const Target& target,
     CHECK_EQ(data->dtype, out_dtype) << "Mixed precision not supported.";
     auto mm = topi::contrib::cublas_matmul(data, weight, false, true);
     if (bias.defined()) {
-      mm = tvm::top::compute({ batch, out_dim },
+      mm = tvm::te::compute({ batch, out_dim },
                         [&](Var i, Var j) {
                           return mm(i, j) + bias(j);
                         }, "tensor", kBroadcast);
@@ -115,12 +115,12 @@ inline Schedule schedule_dense(const Target &target, const Array<Tensor>& outs)
       s[dense].compute_at(s[out], s[out]->op.as<ComputeOpNode>()->axis[1]);
     }
     s[out].bind(s[out]->op.as<ComputeOpNode>()->axis[0],
-                tvm::top::thread_axis(Range(), "blockIdx.y"));
+                tvm::te::thread_axis(Range(), "blockIdx.y"));
     s[out].bind(s[out]->op.as<ComputeOpNode>()->axis[1],
-                tvm::top::thread_axis(Range(), "blockIdx.x"));
+                tvm::te::thread_axis(Range(), "blockIdx.x"));
 
     auto tx = s[dense]->op.as<ComputeOpNode>()->reduce_axis[0];
-    auto thread_x = tvm::top::thread_axis(Range(), "threadIdx.x");
+    auto thread_x = tvm::te::thread_axis(Range(), "threadIdx.x");
     s[dense].bind(tx, thread_x);
     s[dense_f].compute_at(s[dense], tx);
     s[dense].set_store_predicate(static_cast<PrimExpr>(thread_x) == 0);
diff --git a/topi/include/topi/cuda/injective.h b/topi/include/topi/cuda/injective.h
index 5541b38..a7792a5 100644
--- a/topi/include/topi/cuda/injective.h
+++ b/topi/include/topi/cuda/injective.h
@@ -24,15 +24,15 @@
 #ifndef TOPI_CUDA_INJECTIVE_H_
 #define TOPI_CUDA_INJECTIVE_H_
 
-#include "topi/tags.h"
-#include "topi/detail/fuse.h"
-#include "tvm/top/operation.h"
-#include "tvm/top/schedule_pass.h"
-#include "tvm/target/generic_func.h"
+#include <tvm/te/operation.h>
+#include <tvm/te/schedule_pass.h>
+#include <tvm/target/generic_func.h>
+#include <topi/tags.h>
+#include <topi/detail/fuse.h>
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace cuda {
 
@@ -69,7 +69,7 @@ inline Schedule schedule_injective(const Target &target, const Array<Tensor>& ou
     out_ops.push_back(t->op);
   }
   auto s = create_schedule(out_ops);
-  tvm::top::AutoInlineInjective(s);
+  tvm::te::AutoInlineInjective(s);
   for (auto out : outs) {
     schedule_injective_from_existing(s, out);
   }
diff --git a/topi/include/topi/cuda/normalization.h b/topi/include/topi/cuda/normalization.h
index 851756a..66b20b7 100644
--- a/topi/include/topi/cuda/normalization.h
+++ b/topi/include/topi/cuda/normalization.h
@@ -24,14 +24,14 @@
 #ifndef TOPI_CUDA_NORMALIZATION_H_
 #define TOPI_CUDA_NORMALIZATION_H_
 
-#include "tvm/top/operation.h"
-#include "tvm/top/schedule_pass.h"
-#include "tvm/target/generic_func.h"
-#include "topi/tags.h"
+#include <tvm/te/operation.h>
+#include <tvm/te/schedule_pass.h>
+#include <tvm/target/generic_func.h>
+#include <topi/tags.h>
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 namespace cuda {
 /*!
 * \brief Create a CUDA schedule for LRN
@@ -48,8 +48,8 @@ inline Schedule schedule_lrn(const Target &target, const Array<Tensor>& outs) {
   }
   Schedule s = create_schedule(out_ops);
   int num_thread = 64;
-  IterVar block_x = tvm::top::thread_axis(Range(), "blockIdx.x");
-  IterVar thread_x = tvm::top::thread_axis(Range(0, num_thread), "threadIdx.x");
+  IterVar block_x = tvm::te::thread_axis(Range(), "blockIdx.x");
+  IterVar thread_x = tvm::te::thread_axis(Range(0, num_thread), "threadIdx.x");
   Tensor lrn = outs[0];
   Tensor sqr_sum_up = lrn->op->InputTensors()[1];
   Tensor sqr_sum = sqr_sum_up->op->InputTensors()[0];
@@ -111,8 +111,8 @@ inline Schedule schedule_l2_normalize(const Target &target, const Array<Tensor>&
   traverse(outs[0]->op);
   int num_thread = 64;
   Tensor l2_normalize = outs[0];
-  IterVar block_x = tvm::top::thread_axis(Range(), "blockIdx.x");
-  IterVar thread_x = tvm::top::thread_axis(Range(0, num_thread), "threadIdx.x");
+  IterVar block_x = tvm::te::thread_axis(Range(), "blockIdx.x");
+  IterVar thread_x = tvm::te::thread_axis(Range(0, num_thread), "threadIdx.x");
   IterVar xto, xti;
   s[l2_normalize].split_by_nparts(l2_normalize->op.as<ComputeOpNode>()->axis[1],
                                  num_thread, &xto, &xti);
diff --git a/topi/include/topi/cuda/pooling.h b/topi/include/topi/cuda/pooling.h
index 60e90e0..75b66b3 100644
--- a/topi/include/topi/cuda/pooling.h
+++ b/topi/include/topi/cuda/pooling.h
@@ -24,16 +24,16 @@
 #ifndef TOPI_CUDA_POOLING_H_
 #define TOPI_CUDA_POOLING_H_
 
-#include "topi/tags.h"
-#include "topi/detail/fuse.h"
-#include "topi/detail/array_utils.h"
-#include "tvm/top/operation.h"
-#include "tvm/top/schedule_pass.h"
-#include "tvm/target/generic_func.h"
+#include <tvm/te/operation.h>
+#include <tvm/te/schedule_pass.h>
+#include <tvm/target/generic_func.h>
+#include <topi/tags.h>
+#include <topi/detail/fuse.h>
+#include <topi/detail/array_utils.h>
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace cuda {
 
@@ -69,8 +69,8 @@ inline Schedule schedule_pool(const Target &target, const Array<Tensor>& outs) {
     auto fused = detail::Fuse(s[out], s[out]->op.as<ComputeOpNode>()->axis);
     IterVar bx, tx;
     s[out].split(fused, num_thread, &bx, &tx);
-    s[out].bind(bx, tvm::top::thread_axis(Range(), "blockIdx.x"));
-    s[out].bind(tx, tvm::top::thread_axis(Range(), "threadIdx.x"));
+    s[out].bind(bx, tvm::te::thread_axis(Range(), "blockIdx.x"));
+    s[out].bind(tx, tvm::te::thread_axis(Range(), "threadIdx.x"));
     if (detail::contains(s->outputs, pool->op)) {
       s[OL].compute_at(s[out], tx);
     } else {
@@ -121,10 +121,10 @@ inline Schedule schedule_global_pool(const Target &target, const Array<Tensor>&
 
   auto _schedule = [&](const Tensor& pool) {
     auto num_thread = 8;
-    auto block_x = tvm::top::thread_axis(Range(), "blockIdx.x");
-    auto block_y = tvm::top::thread_axis(Range(), "blockIdx.y");
-    auto thread_x = tvm::top::thread_axis(Range(0, num_thread), "threadIdx.x");
-    auto thread_y = tvm::top::thread_axis(Range(0, num_thread), "threadIdx.y");
+    auto block_x = tvm::te::thread_axis(Range(), "blockIdx.x");
+    auto block_y = tvm::te::thread_axis(Range(), "blockIdx.y");
+    auto thread_x = tvm::te::thread_axis(Range(0, num_thread), "threadIdx.x");
+    auto thread_y = tvm::te::thread_axis(Range(0, num_thread), "threadIdx.y");
     Tensor out;
     Tensor OL;
     if (detail::contains(s->outputs, pool->op)) {
diff --git a/topi/include/topi/cuda/reduction.h b/topi/include/topi/cuda/reduction.h
index 15241f9..add8d99 100644
--- a/topi/include/topi/cuda/reduction.h
+++ b/topi/include/topi/cuda/reduction.h
@@ -24,15 +24,15 @@
 #ifndef TOPI_CUDA_REDUCTION_H_
 #define TOPI_CUDA_REDUCTION_H_
 
-#include "topi/tags.h"
-#include "topi/detail/fuse.h"
-#include "tvm/top/operation.h"
-#include "tvm/top/schedule_pass.h"
-#include "tvm/target/generic_func.h"
+#include <tvm/te/operation.h>
+#include <tvm/te/schedule_pass.h>
+#include <tvm/target/generic_func.h>
+#include <topi/tags.h>
+#include <topi/detail/fuse.h>
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace cuda {
 /*!
@@ -76,13 +76,13 @@ Schedule ScheduleReduce(const Target& target,
       // Don't know why.
       num_thread = 16;
     }
-    block_x = tvm::top::thread_axis(Range(), "blockIdx.x");
-    thread_x = tvm::top::thread_axis(Range(0, num_thread), "threadIdx.x");
-    thread_y = tvm::top::thread_axis(Range(0, num_thread), "threadIdx.y");
+    block_x = tvm::te::thread_axis(Range(), "blockIdx.x");
+    thread_x = tvm::te::thread_axis(Range(0, num_thread), "threadIdx.x");
+    thread_y = tvm::te::thread_axis(Range(0, num_thread), "threadIdx.y");
   } else {
     all_reduce = true;
     num_thread = target->max_num_threads;
-    thread_x = tvm::top::thread_axis(Range(0, num_thread), "threadIdx.x");
+    thread_x = tvm::te::thread_axis(Range(0, num_thread), "threadIdx.x");
   }
 
   auto fused_reduce = detail::Fuse(out_stage, out_stage->op.as<ComputeOpNode>()->reduce_axis);
diff --git a/topi/include/topi/cuda/softmax.h b/topi/include/topi/cuda/softmax.h
index 61500c3..4c88c3e 100644
--- a/topi/include/topi/cuda/softmax.h
+++ b/topi/include/topi/cuda/softmax.h
@@ -24,15 +24,15 @@
 #ifndef TOPI_CUDA_SOFTMAX_H_
 #define TOPI_CUDA_SOFTMAX_H_
 
-#include "topi/tags.h"
-#include "topi/detail/fuse.h"
-#include "tvm/top/operation.h"
-#include "tvm/top/schedule_pass.h"
-#include "tvm/target/generic_func.h"
+#include <tvm/te/operation.h>
+#include <tvm/te/schedule_pass.h>
+#include <tvm/target/generic_func.h>
+#include <topi/tags.h>
+#include <topi/detail/fuse.h>
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace cuda {
 
@@ -52,9 +52,9 @@ inline Schedule schedule_softmax(const Target &target, const Array<Tensor>& outs
   auto s = create_schedule(out_ops);
 
   auto softmax = outs[0];
-  tvm::top::Tensor max_elem;
-  tvm::top::Tensor expsum;
-  tvm::top::Tensor exp;
+  tvm::te::Tensor max_elem;
+  tvm::te::Tensor expsum;
+  tvm::te::Tensor exp;
   bool has_exp = false;
 
   auto tag = softmax->op.as<ComputeOpNode>()->tag;
@@ -71,8 +71,8 @@ inline Schedule schedule_softmax(const Target &target, const Array<Tensor>& outs
   }
 
   int num_thread = 64;
-  auto block_x = tvm::top::thread_axis(Range(), "blockIdx.x");
-  auto thread_x = tvm::top::thread_axis(Range(0, num_thread), "threadIdx.x");
+  auto block_x = tvm::te::thread_axis(Range(), "blockIdx.x");
+  auto thread_x = tvm::te::thread_axis(Range(0, num_thread), "threadIdx.x");
 
   if (has_exp) {
     s[exp].bind(exp->op.as<ComputeOpNode>()->axis[0], block_x);
diff --git a/topi/include/topi/detail/array_utils.h b/topi/include/topi/detail/array_utils.h
index 0c0feec..3a3453a 100644
--- a/topi/include/topi/detail/array_utils.h
+++ b/topi/include/topi/detail/array_utils.h
@@ -24,12 +24,12 @@
 #ifndef TOPI_DETAIL_ARRAY_UTILS_H_
 #define TOPI_DETAIL_ARRAY_UTILS_H_
 
-#include "tvm/top/operation.h"
+#include <tvm/te/operation.h>
 
 namespace topi {
 namespace detail {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
  * \brief Search an array for a specific item
diff --git a/topi/include/topi/detail/broadcast.h b/topi/include/topi/detail/broadcast.h
index 17524b1..8622920 100644
--- a/topi/include/topi/detail/broadcast.h
+++ b/topi/include/topi/detail/broadcast.h
@@ -24,15 +24,13 @@
 #ifndef TOPI_DETAIL_BROADCAST_H_
 #define TOPI_DETAIL_BROADCAST_H_
 
+#include <tvm/te/operation.h>
+#include <topi/detail/constant_utils.h>
+
 #include <algorithm>
 #include <deque>
 #include <string>
 
-#include "tvm/tir/ir_pass.h"
-#include "tvm/top/operation.h"
-#include "tvm/tir/op.h"
-#include "topi/detail/constant_utils.h"
-
 namespace topi {
 namespace detail {
 
@@ -100,7 +98,7 @@ inline BroadcastHelper BroadcastShape(const tvm::Array<tvm::PrimExpr>& shape1,
 
 inline tvm::Array<tvm::PrimExpr> InputIndexFromBroadcast(
     const tvm::Array<tvm::tir::Var>& ovars,
-    const tvm::top::Tensor& T,
+    const tvm::te::Tensor& T,
     const std::deque<tvm::tir::Var>& my_vars,
     const std::deque<tvm::tir::Var>& all_vars) {
   tvm::Array<tvm::PrimExpr> ivars;
@@ -127,9 +125,9 @@ inline tvm::Array<tvm::PrimExpr> InputIndexFromBroadcast(
 }
 
 template <typename FBinaryExpr>
-inline tvm::top::Tensor WithBroadcast(FBinaryExpr op,
-                                 const tvm::top::Tensor& A,
-                                 const tvm::top::Tensor& B,
+inline tvm::te::Tensor WithBroadcast(FBinaryExpr op,
+                                 const tvm::te::Tensor& A,
+                                 const tvm::te::Tensor& B,
                                  const std::string& name = "tensor",
                                  const std::string& tag = "") {
   auto bh = BroadcastShape(A->shape, B->shape);
@@ -137,7 +135,7 @@ inline tvm::top::Tensor WithBroadcast(FBinaryExpr op,
     return op(A(InputIndexFromBroadcast(ovars, A, bh.vars1, bh.all_vars)),
               B(InputIndexFromBroadcast(ovars, B, bh.vars2, bh.all_vars)));
   };
-  return tvm::top::compute(
+  return tvm::te::compute(
       tvm::Array<tvm::PrimExpr>(bh.common_shape.begin(), bh.common_shape.end()),
       l,
       name,
diff --git a/topi/include/topi/detail/constant_utils.h b/topi/include/topi/detail/constant_utils.h
index 081495e..4da11d8 100644
--- a/topi/include/topi/detail/constant_utils.h
+++ b/topi/include/topi/detail/constant_utils.h
@@ -33,7 +33,7 @@
 namespace topi {
 namespace detail {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
  * \brief Test whether the given Expr is a constant integer
diff --git a/topi/include/topi/detail/extern.h b/topi/include/topi/detail/extern.h
index c8db4e1..ab83200 100644
--- a/topi/include/topi/detail/extern.h
+++ b/topi/include/topi/detail/extern.h
@@ -24,7 +24,7 @@
 #ifndef TOPI_DETAIL_EXTERN_H_
 #define TOPI_DETAIL_EXTERN_H_
 
-#include <tvm/top/operation.h>
+#include <tvm/te/operation.h>
 #include <vector>
 #include <string>
 
@@ -32,7 +32,7 @@
 namespace topi {
 namespace detail {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
  * \brief Construct a buffer to pass to an external function
diff --git a/topi/include/topi/detail/fuse.h b/topi/include/topi/detail/fuse.h
index 5a77db6..90c1c20 100644
--- a/topi/include/topi/detail/fuse.h
+++ b/topi/include/topi/detail/fuse.h
@@ -24,12 +24,12 @@
 #ifndef TOPI_DETAIL_FUSE_H_
 #define TOPI_DETAIL_FUSE_H_
 
-#include "tvm/top/operation.h"
+#include <tvm/te/operation.h>
 
 namespace topi {
 namespace detail {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
  * \brief Fuse all of the given args
diff --git a/topi/include/topi/detail/pad_utils.h b/topi/include/topi/detail/pad_utils.h
index a3f82de..1f2a7c5 100644
--- a/topi/include/topi/detail/pad_utils.h
+++ b/topi/include/topi/detail/pad_utils.h
@@ -32,7 +32,7 @@
 namespace topi {
 namespace detail {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
  * \brief Get padding size for each side given padding height and width
diff --git a/topi/include/topi/detail/ravel_unravel.h b/topi/include/topi/detail/ravel_unravel.h
index bd78e22..ca46da0 100644
--- a/topi/include/topi/detail/ravel_unravel.h
+++ b/topi/include/topi/detail/ravel_unravel.h
@@ -24,15 +24,14 @@
 #ifndef TOPI_DETAIL_RAVEL_UNRAVEL_H_
 #define TOPI_DETAIL_RAVEL_UNRAVEL_H_
 
-#include <vector>
+#include <tvm/te/operation.h>
 
-#include "tvm/top/operation.h"
-#include "tvm/tir/op.h"
+#include <vector>
 
 namespace topi {
 namespace detail {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
 * \brief Flatten the indices to 1D
diff --git a/topi/include/topi/detail/tensor_utils.h b/topi/include/topi/detail/tensor_utils.h
index e306880..dfa7943 100644
--- a/topi/include/topi/detail/tensor_utils.h
+++ b/topi/include/topi/detail/tensor_utils.h
@@ -28,7 +28,7 @@
 namespace topi {
 namespace detail {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
  * \brief Check whether input shape has dimension of size 0;
diff --git a/topi/include/topi/elemwise.h b/topi/include/topi/elemwise.h
index 3762e3f..b6343f1 100644
--- a/topi/include/topi/elemwise.h
+++ b/topi/include/topi/elemwise.h
@@ -32,7 +32,7 @@
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 // Unary intrinsic operators
 #define TOPI_DECLARE_UNARY_OP(OpName)                           \
diff --git a/topi/include/topi/generic/default.h b/topi/include/topi/generic/default.h
index f3d55fb..640ab95 100644
--- a/topi/include/topi/generic/default.h
+++ b/topi/include/topi/generic/default.h
@@ -24,15 +24,15 @@
 #ifndef TOPI_GENERIC_DEFAULT_H_
 #define TOPI_GENERIC_DEFAULT_H_
 
-#include "topi/tags.h"
-#include "topi/detail/fuse.h"
-#include "tvm/top/operation.h"
-#include "tvm/top/schedule_pass.h"
-#include "tvm/target/generic_func.h"
+#include <tvm/te/operation.h>
+#include <tvm/te/schedule_pass.h>
+#include <tvm/target/generic_func.h>
+#include <topi/tags.h>
+#include <topi/detail/fuse.h>
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace generic {
 /*!
@@ -68,7 +68,7 @@ inline Schedule default_schedule_auto_inline(const Target& target, Array<Tensor>
   }
   auto s = create_schedule(out_ops);
   auto x = outs[0];
-  tvm::top::AutoInlineInjective(s);
+  tvm::te::AutoInlineInjective(s);
   auto axis = s[x]->op.as<ComputeOpNode>()->axis;
   if (axis.size() > 0) {
     detail::Fuse(s[x], axis);
diff --git a/topi/include/topi/generic/extern.h b/topi/include/topi/generic/extern.h
index c9fe2c9..e08158f 100644
--- a/topi/include/topi/generic/extern.h
+++ b/topi/include/topi/generic/extern.h
@@ -24,16 +24,16 @@
 #ifndef TOPI_GENERIC_EXTERN_H_
 #define TOPI_GENERIC_EXTERN_H_
 
-#include "topi/tags.h"
-#include "topi/detail/fuse.h"
-#include "tvm/top/operation.h"
-#include "tvm/top/schedule_pass.h"
-#include "tvm/target/generic_func.h"
-#include "injective.h"
+#include <tvm/te/operation.h>
+#include <tvm/te/schedule_pass.h>
+#include <tvm/target/generic_func.h>
+#include <topi/tags.h>
+#include <topi/detail/fuse.h>
+#include <topi/generic/injective.h>
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace generic {
 /*!
@@ -51,7 +51,7 @@ inline Schedule schedule_extern(const Target& target, Array<Tensor> outs) {
   }
   auto s = create_schedule(out_ops);
 
-  tvm::top::AutoInlineInjective(s);
+  tvm::te::AutoInlineInjective(s);
   for (auto out : outs) {
     if (out->op->IsInstance<ExternOpNode>()) {
       continue;
diff --git a/topi/include/topi/generic/injective.h b/topi/include/topi/generic/injective.h
index e3ad688..7a5aff7 100644
--- a/topi/include/topi/generic/injective.h
+++ b/topi/include/topi/generic/injective.h
@@ -24,15 +24,15 @@
 #ifndef TOPI_GENERIC_INJECTIVE_H_
 #define TOPI_GENERIC_INJECTIVE_H_
 
-#include "topi/tags.h"
-#include "topi/detail/fuse.h"
-#include "tvm/top/operation.h"
-#include "tvm/top/schedule_pass.h"
-#include "tvm/target/generic_func.h"
+#include <tvm/te/operation.h>
+#include <tvm/te/schedule_pass.h>
+#include <tvm/target/generic_func.h>
+#include <topi/tags.h>
+#include <topi/detail/fuse.h>
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace generic {
 
@@ -63,7 +63,7 @@ inline Schedule schedule_injective(const Target &target, const Array<Tensor>& ou
     out_ops.push_back(t->op);
   }
   auto s = create_schedule(out_ops);
-  tvm::top::AutoInlineInjective(s);
+  tvm::te::AutoInlineInjective(s);
   auto x = outs[0];
   schedule_injective_from_existing(s, x);
 
diff --git a/topi/include/topi/image/resize.h b/topi/include/topi/image/resize.h
index cdc3a42..8f1d02a 100644
--- a/topi/include/topi/image/resize.h
+++ b/topi/include/topi/image/resize.h
@@ -24,22 +24,21 @@
 #ifndef TOPI_IMAGE_RESIZE_H_
 #define TOPI_IMAGE_RESIZE_H_
 
+#include <tvm/te/operation.h>
+#include <topi/tags.h>
+#include <topi/elemwise.h>
+#include <topi/detail/ravel_unravel.h>
+#include <topi/detail/constant_utils.h>
+
 #include <string>
 #include <vector>
 #include <iterator>
 #include <algorithm>
 
-#include "topi/tags.h"
-#include "topi/elemwise.h"
-#include "topi/detail/ravel_unravel.h"
-#include "topi/detail/constant_utils.h"
-#include "tvm/top/operation.h"
-#include "tvm/tir/op.h"
-
 namespace topi {
 namespace image {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
  * \brief Sample a point in a tensor using bilinear interpolation.
diff --git a/topi/include/topi/nn.h b/topi/include/topi/nn.h
index 16bcaef..a1ee8c1 100644
--- a/topi/include/topi/nn.h
+++ b/topi/include/topi/nn.h
@@ -24,19 +24,19 @@
 #ifndef TOPI_NN_H_
 #define TOPI_NN_H_
 
+#include <topi/tags.h>
+#include <topi/detail/constant_utils.h>
+#include <tvm/tir/expr.h>
+#include <tvm/tir/ir_pass.h>
+#include <tvm/tir/op.h>
+#include <tvm/te/operation.h>
+
 #include <algorithm>
 #include <string>
 
-#include "topi/tags.h"
-#include "topi/detail/constant_utils.h"
-#include "tvm/tir/expr.h"
-#include "tvm/tir/ir_pass.h"
-#include "tvm/top/operation.h"
-#include "tvm/tir/op.h"
-
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 namespace detail {
 
 template <typename T>
@@ -62,11 +62,11 @@ tvm::PrimExpr Map(const tvm::Array<tvm::PrimExpr>& exprs, T op) {
  * \return A Tensor whose op member is the relu operation
  */
 template <typename T>
-inline tvm::top::Tensor relu(const tvm::top::Tensor& t,
+inline tvm::te::Tensor relu(const tvm::te::Tensor& t,
                         T threshold = static_cast<T>(0),
                         std::string name = "T_relu",
                         std::string tag = kElementWise) {
-  return tvm::top::compute(
+  return tvm::te::compute(
       t->shape,
       [&](const tvm::Array<tvm::tir::Var>& i) {
         auto threshold_const = tvm::tir::make_const(t->dtype, threshold);
@@ -86,11 +86,11 @@ inline tvm::top::Tensor relu(const tvm::top::Tensor& t,
 *
 * \return A Tensor whose op member is the leaky relu operation
 */
-inline tvm::top::Tensor leaky_relu(const tvm::top::Tensor& t,
+inline tvm::te::Tensor leaky_relu(const tvm::te::Tensor& t,
                               double alpha = 0.1,
                               std::string name = "T_leaky_relu",
                               std::string tag = kElementWise) {
-  return tvm::top::compute(
+  return tvm::te::compute(
     t->shape,
     [&](const tvm::Array<tvm::tir::Var>& i) {
       auto value = t(i);
@@ -112,8 +112,8 @@ inline tvm::top::Tensor leaky_relu(const tvm::top::Tensor& t,
  *
  * \return A Tensor whose op member is the parametric relu operation
  */
-inline tvm::top::Tensor prelu(const tvm::top::Tensor &x,
-                         const tvm::top::Tensor &slope,
+inline tvm::te::Tensor prelu(const tvm::te::Tensor &x,
+                         const tvm::te::Tensor &slope,
                          const int axis = 1,
                          std::string name = "T_prelu",
                          std::string tag = kBroadcast) {
@@ -123,7 +123,7 @@ inline tvm::top::Tensor prelu(const tvm::top::Tensor &x,
         topi::detail::GetConstInt(x->shape[axis]))
         << "Wrong slope shape received.";
 
-  return tvm::top::compute(x->shape,
+  return tvm::te::compute(x->shape,
                      [&](const tvm::Array<tvm::tir::Var> &indices) {
                         auto xval = x(indices);
                         return tvm::tir::SelectNode::make(
@@ -172,7 +172,7 @@ inline tvm::top::Tensor prelu(const tvm::top::Tensor &x,
  *
  *
  */
-inline tvm::top::Tensor pad(const tvm::top::Tensor& t,
+inline tvm::te::Tensor pad(const tvm::te::Tensor& t,
                        const tvm::Array<tvm::PrimExpr>& pad_before,
                        tvm::Array<tvm::PrimExpr> pad_after = tvm::Array<tvm::PrimExpr>(),
                        PrimExpr pad_value = PrimExpr(),
@@ -252,7 +252,7 @@ inline tvm::top::Tensor pad(const tvm::top::Tensor& t,
     }
     return t(indices);
   };
-  return tvm::top::compute(output_shape, l, name, tag);
+  return tvm::te::compute(output_shape, l, name, tag);
 }
 
 /*!
@@ -275,8 +275,8 @@ inline tvm::top::Tensor pad(const tvm::top::Tensor& t,
  * \return A Tensor whose op member is the 2-D convolution operation (NCHW
  * layout)
  */
-inline tvm::top::Tensor conv2d_nchw(const tvm::top::Tensor& I,
-                               const tvm::top::Tensor& W,
+inline tvm::te::Tensor conv2d_nchw(const tvm::te::Tensor& I,
+                               const tvm::te::Tensor& W,
                                int pad_h = 0,
                                int pad_w = 0,
                                int stride_h = 1,
@@ -293,9 +293,9 @@ inline tvm::top::Tensor conv2d_nchw(const tvm::top::Tensor& I,
     indexdiv(I->shape[2] - W->shape[2] + 2 * pad_h, stride_h) + 1,  // H
     indexdiv(I->shape[3] - W->shape[3] + 2 * pad_w, stride_w) + 1   // W
   };
-  auto i = tvm::top::reduce_axis(tvm::Range{0, I->shape[1]}, "i");
-  auto kh = tvm::top::reduce_axis(tvm::Range{0, W->shape[2]}, "kh");
-  auto kw = tvm::top::reduce_axis(tvm::Range{0, W->shape[3]}, "kw");
+  auto i = tvm::te::reduce_axis(tvm::Range{0, I->shape[1]}, "i");
+  auto kh = tvm::te::reduce_axis(tvm::Range{0, W->shape[2]}, "kh");
+  auto kw = tvm::te::reduce_axis(tvm::Range{0, W->shape[3]}, "kw");
   auto T = (pad_h == 0 && pad_w == 0)
                ? I
                : pad(I, {tvm::PrimExpr(0), tvm::PrimExpr(0), pad_h, pad_w});
@@ -304,7 +304,7 @@ inline tvm::top::Tensor conv2d_nchw(const tvm::top::Tensor& I,
         T(b, i, stride_h * h + kh, stride_w * w + kw) * W(o, i, kh, kw),
         {i, kh, kw});
   };
-  return tvm::top::compute(output_shape, l, name, tag);
+  return tvm::te::compute(output_shape, l, name, tag);
 }
 
 /*!
@@ -326,8 +326,8 @@ inline tvm::top::Tensor conv2d_nchw(const tvm::top::Tensor& I,
  * \return A Tensor whose op member is the 2-D convolution operation
  * (HWCN layout)
  */
-inline tvm::top::Tensor conv2d_hwcn(const tvm::top::Tensor& I,
-                               const tvm::top::Tensor& W,
+inline tvm::te::Tensor conv2d_hwcn(const tvm::te::Tensor& I,
+                               const tvm::te::Tensor& W,
                                int pad_h = 0,
                                int pad_w = 0,
                                int stride_h = 1,
@@ -344,16 +344,16 @@ inline tvm::top::Tensor conv2d_hwcn(const tvm::top::Tensor& I,
       I->shape[2],                                             // B
       W->shape[3]                                              // O
   };
-  auto i = tvm::top::reduce_axis(tvm::Range{0, I->shape[3]}, "i");
-  auto kh = tvm::top::reduce_axis(tvm::Range{0, W->shape[0]}, "kh");
-  auto kw = tvm::top::reduce_axis(tvm::Range{0, W->shape[1]}, "kw");
+  auto i = tvm::te::reduce_axis(tvm::Range{0, I->shape[3]}, "i");
+  auto kh = tvm::te::reduce_axis(tvm::Range{0, W->shape[0]}, "kh");
+  auto kw = tvm::te::reduce_axis(tvm::Range{0, W->shape[1]}, "kw");
   auto T = (pad_h == 0 && pad_w == 0) ? I : pad(I, {pad_h, pad_w});
   auto l = [&](tvm::tir::Var b, tvm::tir::Var o, tvm::tir::Var h, tvm::tir::Var w) {
     return tvm::sum(
         T(stride_h * h + kh, stride_w * w + kw, i, b) * W(kh, kw, i, o),
         {i, kh, kw});
   };
-  return tvm::top::compute(output_shape, l, name, tag);
+  return tvm::te::compute(output_shape, l, name, tag);
 }
 
 
@@ -377,8 +377,8 @@ inline tvm::top::Tensor conv2d_hwcn(const tvm::top::Tensor& I,
  * \return A Tensor whose op member is the 2-D depthwise convolution operation
  * (NCHW layout)
  */
-inline tvm::top::Tensor depthwise_conv2d_nchw(const tvm::top::Tensor& I,
-                                         const tvm::top::Tensor& W,
+inline tvm::te::Tensor depthwise_conv2d_nchw(const tvm::te::Tensor& I,
+                                         const tvm::te::Tensor& W,
                                          int pad_h = 0,
                                          int pad_w = 0,
                                          int stride_h = 1,
@@ -396,9 +396,9 @@ inline tvm::top::Tensor depthwise_conv2d_nchw(const tvm::top::Tensor& I,
       indexdiv(I->shape[2] - W->shape[2] + 2 * pad_h, stride_h) + 1,  // H
       indexdiv(I->shape[3] - W->shape[3] + 2 * pad_w, stride_w) + 1   // W
   };
-  auto i = tvm::top::reduce_axis(tvm::Range{0, I->shape[1]}, "i");
-  auto kh = tvm::top::reduce_axis(tvm::Range{0, W->shape[2]}, "kh");
-  auto kw = tvm::top::reduce_axis(tvm::Range{0, W->shape[3]}, "kw");
+  auto i = tvm::te::reduce_axis(tvm::Range{0, I->shape[1]}, "i");
+  auto kh = tvm::te::reduce_axis(tvm::Range{0, W->shape[2]}, "kh");
+  auto kw = tvm::te::reduce_axis(tvm::Range{0, W->shape[3]}, "kw");
   auto T = (pad_h == 0 && pad_w == 0)
                ? I
                : pad(I, {tvm::PrimExpr(0), tvm::PrimExpr(0), pad_h, pad_w});
@@ -407,11 +407,11 @@ inline tvm::top::Tensor depthwise_conv2d_nchw(const tvm::top::Tensor& I,
                     W(indexdiv(i, pCM), indexmod(o, pCM), kh, kw),
                     {i, kh, kw});
   };
-  return tvm::top::compute(output_shape, l, name, tag);
+  return tvm::te::compute(output_shape, l, name, tag);
 }
 
-inline tvm::top::Tensor depthwise_conv2d_nhwc(const tvm::top::Tensor& I,
-                                         const tvm::top::Tensor& W,
+inline tvm::te::Tensor depthwise_conv2d_nhwc(const tvm::te::Tensor& I,
+                                         const tvm::te::Tensor& W,
                                          int pad_h = 0,
                                          int pad_w = 0,
                                          int stride_h = 1,
@@ -429,9 +429,9 @@ inline tvm::top::Tensor depthwise_conv2d_nhwc(const tvm::top::Tensor& I,
       indexdiv(I->shape[2] - W->shape[2] + 2 * pad_w, stride_w) + 1,   // W
       W->shape[3],                                            // O
   };
-  auto i = tvm::top::reduce_axis(tvm::Range{0, I->shape[3]}, "i");
-  auto kh = tvm::top::reduce_axis(tvm::Range{0, W->shape[0]}, "kh");
-  auto kw = tvm::top::reduce_axis(tvm::Range{0, W->shape[1]}, "kw");
+  auto i = tvm::te::reduce_axis(tvm::Range{0, I->shape[3]}, "i");
+  auto kh = tvm::te::reduce_axis(tvm::Range{0, W->shape[0]}, "kh");
+  auto kw = tvm::te::reduce_axis(tvm::Range{0, W->shape[1]}, "kw");
   auto T = (pad_h == 0 && pad_w == 0)
                ? I
                : pad(I, {tvm::PrimExpr(0), pad_h, pad_w, tvm::PrimExpr(0)});
@@ -440,7 +440,7 @@ inline tvm::top::Tensor depthwise_conv2d_nhwc(const tvm::top::Tensor& I,
                     W(kh, kw, indexdiv(i, pCM), indexmod(o, pCM)),
                     {kh, kw, i});
   };
-  return tvm::top::compute(output_shape, l, name, tag);
+  return tvm::te::compute(output_shape, l, name, tag);
 }
 
 /*!
@@ -463,8 +463,8 @@ inline tvm::top::Tensor depthwise_conv2d_nhwc(const tvm::top::Tensor& I,
  * \return A Tensor whose op member is the 2-D groupconvolution operation
  * (NCHW layout)
  */
-inline tvm::top::Tensor group_conv2d_ngchw(const tvm::top::Tensor& I,
-                                      const tvm::top::Tensor& W,
+inline tvm::te::Tensor group_conv2d_ngchw(const tvm::te::Tensor& I,
+                                      const tvm::te::Tensor& W,
                                       int pad_h = 0,
                                       int pad_w = 0,
                                       int stride_h = 1,
@@ -482,9 +482,9 @@ inline tvm::top::Tensor group_conv2d_ngchw(const tvm::top::Tensor& I,
       indexdiv(I->shape[3] - W->shape[3] + 2 * pad_h, stride_h) + 1,  // H
       indexdiv(I->shape[4] - W->shape[4] + 2 * pad_w, stride_w) + 1   // W
   };
-  auto i = tvm::top::reduce_axis(tvm::Range{0, I->shape[2]}, "i");
-  auto kh = tvm::top::reduce_axis(tvm::Range{0, W->shape[3]}, "kh");
-  auto kw = tvm::top::reduce_axis(tvm::Range{0, W->shape[4]}, "kw");
+  auto i = tvm::te::reduce_axis(tvm::Range{0, I->shape[2]}, "i");
+  auto kh = tvm::te::reduce_axis(tvm::Range{0, W->shape[3]}, "kh");
+  auto kw = tvm::te::reduce_axis(tvm::Range{0, W->shape[4]}, "kw");
 
   auto T = (pad_h == 0 && pad_w == 0)
                ? I
@@ -499,7 +499,7 @@ inline tvm::top::Tensor group_conv2d_ngchw(const tvm::top::Tensor& I,
         I(b, g, i, stride_h * h + kh, stride_w * w + kw) * W(g, i, o, kh, kw),
         {i, kh, kw});
   };
-  return tvm::top::compute(output_shape, l, name, tag);
+  return tvm::te::compute(output_shape, l, name, tag);
 }
 
 }  // namespace topi
diff --git a/topi/include/topi/nn/batch_matmul.h b/topi/include/topi/nn/batch_matmul.h
index e124aff..12075e6 100644
--- a/topi/include/topi/nn/batch_matmul.h
+++ b/topi/include/topi/nn/batch_matmul.h
@@ -24,15 +24,15 @@
 #ifndef TOPI_NN_BATCH_MATMUL_H_
 #define TOPI_NN_BATCH_MATMUL_H_
 
-#include <string>
+#include <tvm/te/operation.h>
+#include <topi/tags.h>
 
-#include "topi/tags.h"
-#include "tvm/top/operation.h"
+#include <string>
 
 namespace topi {
 namespace nn {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
 * \brief Creates an operation that calculates matrix multiplication in batch.
@@ -42,8 +42,8 @@ using namespace tvm::top;
 *
 * \return Tensor with shape [batch, M, N]
 */
-inline tvm::top::Tensor batch_matmul(const tvm::top::Tensor& x,
-                                const tvm::top::Tensor& y) {
+inline tvm::te::Tensor batch_matmul(const tvm::te::Tensor& x,
+                                const tvm::te::Tensor& y) {
   CHECK_EQ(x->shape.size(), 3) << "batch_matmul requires 3-D data";
   CHECK_EQ(y->shape.size(), 3) << "batch_matmul requires 3-D data";
 
@@ -52,8 +52,8 @@ inline tvm::top::Tensor batch_matmul(const tvm::top::Tensor& x,
   auto K = x->shape[2];
   auto N = y->shape[1];
 
-  auto k = tvm::top::reduce_axis(Range(0, K), "k");
-  auto result = tvm::top::compute(
+  auto k = tvm::te::reduce_axis(Range(0, K), "k");
+  auto result = tvm::te::compute(
       { batch, M, N },
       [&](Var b, Var i, Var j) {
         return tvm::sum(x(b, i, k) * y(b, j, k), { k });
diff --git a/topi/include/topi/nn/bias_add.h b/topi/include/topi/nn/bias_add.h
index eca9989..209c30c 100644
--- a/topi/include/topi/nn/bias_add.h
+++ b/topi/include/topi/nn/bias_add.h
@@ -24,13 +24,12 @@
 #ifndef TOPI_NN_BIAS_ADD_H_
 #define TOPI_NN_BIAS_ADD_H_
 
-#include <string>
+#include <tvm/te/operation.h>
+#include <topi/tags.h>
+#include <topi/broadcast.h>
+#include <topi/transform.h>
 
-#include "topi/tags.h"
-#include "topi/broadcast.h"
-#include "topi/transform.h"
-#include "tvm/top/operation.h"
-#include "tvm/tir/op.h"
+#include <string>
 
 namespace topi {
 namespace nn {
@@ -43,8 +42,8 @@ namespace nn {
 * \param axis The axis to add the bias to.
 * \return Tensor with shape [batch, in_dim]
 */
-inline tvm::top::Tensor bias_add(const tvm::top::Tensor& data,
-                                 const tvm::top::Tensor& bias,
+inline tvm::te::Tensor bias_add(const tvm::te::Tensor& data,
+                                 const tvm::te::Tensor& bias,
                                  int axis) {
   int data_ndim = data->shape.size();
   if (axis < 0) {
diff --git a/topi/include/topi/nn/bnn.h b/topi/include/topi/nn/bnn.h
index 16e75f1..6bda653 100644
--- a/topi/include/topi/nn/bnn.h
+++ b/topi/include/topi/nn/bnn.h
@@ -24,17 +24,17 @@
 #ifndef TOPI_NN_BNN_H_
 #define TOPI_NN_BNN_H_
 
-#include <string>
+#include <tvm/te/operation.h>
+#include <tvm/tir/ir_pass.h>
+#include <topi/tags.h>
+#include <topi/detail/constant_utils.h>
 
-#include "tvm/top/operation.h"
-#include "tvm/tir/ir_pass.h"
-#include "topi/tags.h"
-#include "topi/detail/constant_utils.h"
+#include <string>
 
 namespace topi {
 namespace nn {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
 * \brief Binarization and bit-packing along a certain axis.
@@ -47,7 +47,7 @@ using namespace tvm::top;
 *
 * \return Output tensor with dtype uint32
 */
-inline tvm::top::Tensor binarize_pack(const tvm::top::Tensor& data,
+inline tvm::te::Tensor binarize_pack(const tvm::te::Tensor& data,
                                  int axis,
                                  std::string name = "PackedInput",
                                  std::string tag = "binarize_pack") {
@@ -63,7 +63,7 @@ inline tvm::top::Tensor binarize_pack(const tvm::top::Tensor& data,
                      ishape[i]);
   }
 
-  return tvm::top::compute(
+  return tvm::te::compute(
     oshape,
     [&](const Array<Var>& indices) {
       Array<PrimExpr> start_idx;
@@ -99,8 +99,8 @@ inline tvm::top::Tensor binarize_pack(const tvm::top::Tensor& data,
 *
 * \return Tensor with shape [batch, out_dim], dtype is float32
 */
-inline tvm::top::Tensor binary_dense(const tvm::top::Tensor& data,
-                                const tvm::top::Tensor& weight) {
+inline tvm::te::Tensor binary_dense(const tvm::te::Tensor& data,
+                                const tvm::te::Tensor& weight) {
   CHECK_EQ(data->shape.size(), 2) << "binary_dense requires 2-D data";
   CHECK_EQ(weight->shape.size(), 2) << "binary_dense requires 2-D weight";
   CHECK_EQ(data->dtype, DataType::UInt(32)) << "binary_dense requires uint32 data";
@@ -110,14 +110,14 @@ inline tvm::top::Tensor binary_dense(const tvm::top::Tensor& data,
   auto in_dim = data->shape[1];
   auto out_dim = weight->shape[0];
 
-  auto k = tvm::top::reduce_axis(Range(0, in_dim), "k");
-  auto matmul = tvm::top::compute(
+  auto k = tvm::te::reduce_axis(Range(0, in_dim), "k");
+  auto matmul = tvm::te::compute(
     { batch, out_dim },
     [&](Var i, Var j) {
       return tvm::sum(popcount(data(i, k) ^ weight(j, k)), { k });
     }, "tensor", "binary_dense");
 
-  return tvm::top::compute(
+  return tvm::te::compute(
     { batch, out_dim },
     [&](Var i, Var j) {
       return 32 * in_dim - 2.0f * matmul(i, j);
diff --git a/topi/include/topi/nn/dense.h b/topi/include/topi/nn/dense.h
index 60e378a..57f071a 100644
--- a/topi/include/topi/nn/dense.h
+++ b/topi/include/topi/nn/dense.h
@@ -24,15 +24,15 @@
 #ifndef TOPI_NN_DENSE_H_
 #define TOPI_NN_DENSE_H_
 
-#include <string>
+#include <tvm/te/operation.h>
+#include <topi/tags.h>
 
-#include "topi/tags.h"
-#include "tvm/top/operation.h"
+#include <string>
 
 namespace topi {
 namespace nn {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
 * \brief Creates an operation that calculates data * weight^T + bias
@@ -44,9 +44,9 @@ using namespace tvm::top;
 *
 * \return Tensor with shape [batch, out_dim]
 */
-inline tvm::top::Tensor dense(const tvm::top::Tensor& data,
-                         const tvm::top::Tensor& weight,
-                         const tvm::top::Tensor& bias,
+inline tvm::te::Tensor dense(const tvm::te::Tensor& data,
+                         const tvm::te::Tensor& weight,
+                         const tvm::te::Tensor& bias,
                          const DataType& out_dtype) {
   CHECK_EQ(data->shape.size(), 2) << "dense requires 2-D data";
   CHECK_EQ(weight->shape.size(), 2) << "dense requires 2-D weight";
@@ -58,8 +58,8 @@ inline tvm::top::Tensor dense(const tvm::top::Tensor& data,
   auto in_dim = data->shape[1];
   auto out_dim = weight->shape[0];
 
-  auto k = tvm::top::reduce_axis(Range(0, in_dim), "k");
-  auto matmul = tvm::top::compute(
+  auto k = tvm::te::reduce_axis(Range(0, in_dim), "k");
+  auto matmul = tvm::te::compute(
     { batch, out_dim },
     [&](Var i, Var j) {
       return tvm::sum(tvm::cast(out_dtype, data(i, k)) *
@@ -67,7 +67,7 @@ inline tvm::top::Tensor dense(const tvm::top::Tensor& data,
     }, "tensor", "dense");
 
   if (bias.defined()) {
-    matmul = tvm::top::compute(
+    matmul = tvm::te::compute(
       { batch, out_dim },
       [&](Var i, Var j) {
         return matmul(i, j) + tvm::cast(out_dtype, bias(j));
diff --git a/topi/include/topi/nn/dilate.h b/topi/include/topi/nn/dilate.h
index afeeecb..a67bf3a 100644
--- a/topi/include/topi/nn/dilate.h
+++ b/topi/include/topi/nn/dilate.h
@@ -24,16 +24,16 @@
 #ifndef TOPI_NN_DILATE_H_
 #define TOPI_NN_DILATE_H_
 
-#include <string>
+#include <tvm/te/operation.h>
+#include <tvm/tir/ir_pass.h>
+#include <topi/tags.h>
 
-#include "tvm/top/operation.h"
-#include "tvm/tir/ir_pass.h"
-#include "topi/tags.h"
+#include <string>
 
 namespace topi {
 namespace nn {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
 * \brief Create a new expression of the logical and of all
@@ -80,7 +80,7 @@ inline Tensor dilate(const Tensor& x,
       (x->shape[i] - 1) * cast(DataType::Int(32), strides[i] + 1)));
   }
 
-  return tvm::top::compute(
+  return tvm::te::compute(
     out_shape,
     [&](const Array<Var>& indices) {
       Array<PrimExpr> not_zero;
diff --git a/topi/include/topi/nn/flatten.h b/topi/include/topi/nn/flatten.h
index a3e47b7..81cef2e 100644
--- a/topi/include/topi/nn/flatten.h
+++ b/topi/include/topi/nn/flatten.h
@@ -24,19 +24,17 @@
 #ifndef TOPI_NN_FLATTEN_H_
 #define TOPI_NN_FLATTEN_H_
 
+#include <tvm/te/operation.h>
+#include <topi/tags.h>
+#include <topi/detail/constant_utils.h>
+
 #include <string>
 #include <vector>
 
-#include "topi/tags.h"
-#include "topi/detail/constant_utils.h"
-#include "tvm/top/operation.h"
-#include "tvm/tir/op.h"
-
-
 namespace topi {
 namespace nn {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
 * \brief Flattens the input tensor into a 2-D tensor by collapsing higher dimensions.
@@ -65,7 +63,7 @@ inline Tensor flatten(const Tensor& x,
   }
   std::reverse(extra_shape.begin(), extra_shape.end());
 
-  return tvm::top::compute(
+  return tvm::te::compute(
     oshape, [&](Var i, Var j) {
       PrimExpr idx = j;
       std::vector<PrimExpr> index;
diff --git a/topi/include/topi/nn/l2_normalize.h b/topi/include/topi/nn/l2_normalize.h
index 6670e6d..aa4987a 100644
--- a/topi/include/topi/nn/l2_normalize.h
+++ b/topi/include/topi/nn/l2_normalize.h
@@ -24,14 +24,16 @@
 #ifndef TOPI_NN_L2_NORMALIZE_H_
 #define TOPI_NN_L2_NORMALIZE_H_
 
+#include <tvm/te/operation.h>
+#include <topi/tags.h>
+
 #include <string>
 #include <algorithm>
-#include "topi/tags.h"
-#include "tvm/top/operation.h"
+
 namespace topi {
 namespace nn {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
 * \brief L2 normalization inference operator
@@ -60,7 +62,7 @@ inline Tensor l2_normalize(const Tensor& data,
   Tensor sum_value = topi::sum(dot_value, axis, true);
   Tensor expand_sum = topi::broadcast_to(sum_value, input_shape);
   return topi::divide(data,
-                      topi::sqrt(tvm::top::compute(expand_sum->shape,
+                      topi::sqrt(tvm::te::compute(expand_sum->shape,
                                               [&](const Array<Var>& i){
                                                 return (max(expand_sum(i), eps));
                                               }, name, tag)));
diff --git a/topi/include/topi/nn/local_response_norm.h b/topi/include/topi/nn/local_response_norm.h
index 4766ee2..14dec39 100644
--- a/topi/include/topi/nn/local_response_norm.h
+++ b/topi/include/topi/nn/local_response_norm.h
@@ -24,15 +24,15 @@
 #ifndef TOPI_NN_LOCAL_RESPONSE_NORM_H_
 #define TOPI_NN_LOCAL_RESPONSE_NORM_H_
 
-#include <string>
+#include <tvm/te/operation.h>
+#include <topi/tags.h>
 
-#include "topi/tags.h"
-#include "tvm/top/operation.h"
+#include <string>
 
 namespace topi {
 namespace nn {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
 * \brief Local response normalization inference operator
@@ -65,24 +65,24 @@ inline Tensor lrn(const Tensor& data,
   pad_before.Set(axis, static_cast<PrimExpr>(size/2));
   pad_after.Set(axis, static_cast<PrimExpr>(size/2));
   auto pad_data = pad(data, pad_before, pad_after, 0, "pad_data");
-  auto rxs = tvm::top::reduce_axis(Range(0, size), "rxs");
+  auto rxs = tvm::te::reduce_axis(Range(0, size), "rxs");
   Tensor sqr_sum;
   if (axis == 1) {
-    sqr_sum = tvm::top::compute(input_shape,
+    sqr_sum = tvm::te::compute(input_shape,
                            [&](Var i, Var l, Var j, Var k) {
                            return tvm::sum(pad_data(i, l + rxs, j, k) *
                                            pad_data(i, l + rxs, j, k),
                                            {rxs});
                            });
   } else if (axis == 3) {
-    sqr_sum = tvm::top::compute(input_shape,
+    sqr_sum = tvm::te::compute(input_shape,
                            [&](Var i, Var l, Var j, Var k) {
                            return tvm::sum(pad_data(i, l, j, k + rxs) *
                                            pad_data(i, l, j, k + rxs),
                                            {rxs});
                            });
   }
-  auto sqrt_sum_up = tvm::top::compute(
+  auto sqrt_sum_up = tvm::te::compute(
       input_shape,
       [&](Var i, Var j, Var k, Var l) {
         return tvm::pow(bias +
diff --git a/topi/include/topi/nn/mapping.h b/topi/include/topi/nn/mapping.h
index 4cd2fe1..17d1404 100644
--- a/topi/include/topi/nn/mapping.h
+++ b/topi/include/topi/nn/mapping.h
@@ -24,15 +24,15 @@
 #ifndef TOPI_NN_MAPPING_H_
 #define TOPI_NN_MAPPING_H_
 
-#include <string>
+#include <tvm/te/operation.h>
+#include <topi/tags.h>
 
-#include "topi/tags.h"
-#include "tvm/top/operation.h"
+#include <string>
 
 namespace topi {
 namespace nn {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
 * \brief Scale and shift with NCHW order
@@ -50,7 +50,7 @@ inline Tensor scale_shift_nchw(const Tensor& x,
                                const Tensor& shift,
                                std::string name = "ScaleShift",
                                std::string tag = kBroadcast) {
-  return tvm::top::compute(
+  return tvm::te::compute(
     x->shape,
     [&](Var b, Var c, Var h, Var w) {
       return x(b, c, h, w) * scale(c) + shift(w);
@@ -73,7 +73,7 @@ inline Tensor scale_shift_nhwc(const Tensor& x,
                                const Tensor& shift,
                                std::string name = "ScaleShift",
                                std::string tag = kBroadcast) {
-  return tvm::top::compute(
+  return tvm::te::compute(
     x->shape,
     [&](Var b, Var h, Var w, Var c) {
       return x(b, h, w, c) * scale(c) + shift(w);
diff --git a/topi/include/topi/nn/pooling.h b/topi/include/topi/nn/pooling.h
index 86f797f..e6947ed 100644
--- a/topi/include/topi/nn/pooling.h
+++ b/topi/include/topi/nn/pooling.h
@@ -24,20 +24,20 @@
 #ifndef TOPI_NN_POOLING_H_
 #define TOPI_NN_POOLING_H_
 
+#include <topi/detail/pad_utils.h>
+#include <topi/nn.h>
+#include <topi/reduction.h>
+#include <topi/tags.h>
+#include <tvm/tir/ir_pass.h>
+
 #include <algorithm>
 #include <string>
 #include <vector>
 
-#include "topi/detail/pad_utils.h"
-#include "topi/nn.h"
-#include "topi/reduction.h"
-#include "topi/tags.h"
-#include "tvm/tir/ir_pass.h"
-
 namespace topi {
 namespace nn {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*! \brief Pooling type */
 enum PoolType : int {
@@ -108,8 +108,8 @@ inline Tensor pool_impl(const Tensor& x,
   auto out_width = tvm::tir::Simplify(
       indexdiv(width - kernel_width + pad_left + pad_right, stride_width) + 1);
 
-  auto dheight = tvm::top::reduce_axis(Range(0, kernel_height));
-  auto dwidth = tvm::top::reduce_axis(Range(0, kernel_width));
+  auto dheight = tvm::te::reduce_axis(Range(0, kernel_height));
+  auto dwidth = tvm::te::reduce_axis(Range(0, kernel_width));
 
   Array<PrimExpr> out_shape = x->shape;
   out_shape.Set(height_axis, out_height);
@@ -125,7 +125,7 @@ inline Tensor pool_impl(const Tensor& x,
   if (pool_type == kMaxPool) {
     auto temp = do_pad ? pad(
         x, pad_before, pad_after, tvm::min_value(x->dtype), "pad_temp") : x;
-    return tvm::top::compute(out_shape, [&](const Array<Var>& output) {
+    return tvm::te::compute(out_shape, [&](const Array<Var>& output) {
       Array<PrimExpr> indices;
       for (const Var& var : output) indices.push_back(var);
       indices.Set(height_axis, output[height_axis] * stride_height + dheight);
@@ -137,7 +137,7 @@ inline Tensor pool_impl(const Tensor& x,
     auto temp = do_pad ? pad(x, pad_before, pad_after, 0, "pad_temp") : x;
 
     // TVM compute for summing the pooling window.
-    auto pool_sum = tvm::top::compute(out_shape,
+    auto pool_sum = tvm::te::compute(out_shape,
     [&](const Array<Var>& output) {
       Array<PrimExpr> indices;
       for (const Var& var : output) indices.push_back(var);
@@ -147,7 +147,7 @@ inline Tensor pool_impl(const Tensor& x,
     }, "tensor", "pool_sum");
 
     // TVM compute for dividing the reduced window sum by kernel size.
-    return tvm::top::compute(out_shape,
+    return tvm::te::compute(out_shape,
     [&](const Array<Var>& output) {
       Array<PrimExpr> indices;
       for (const Var& var : output) indices.push_back(var);
@@ -218,8 +218,8 @@ inline Tensor pool_grad_impl(const Tensor& out_grad,
   auto out_width =
       tvm::tir::Simplify((width - kernel_width + pad_left + pad_right) / stride_width + 1);
 
-  auto dheight = tvm::top::reduce_axis(Range(0, kernel_height));
-  auto dwidth = tvm::top::reduce_axis(Range(0, kernel_width));
+  auto dheight = tvm::te::reduce_axis(Range(0, kernel_height));
+  auto dwidth = tvm::te::reduce_axis(Range(0, kernel_width));
 
   Array<PrimExpr> out_shape = x->shape;
   out_shape.Set(height_axis, out_height);
@@ -237,9 +237,9 @@ inline Tensor pool_grad_impl(const Tensor& out_grad,
     ravel_shape.Set(height_axis, ravel_shape[height_axis] + pad_top + pad_bottom);
     ravel_shape.Set(width_axis, ravel_shape[width_axis] + pad_left + pad_right);
 
-    auto windowh = tvm::top::reduce_axis(
+    auto windowh = tvm::te::reduce_axis(
         Range(0, (kernel_height + stride_height - 1) / stride_height));
-    auto windoww = tvm::top::reduce_axis(
+    auto windoww = tvm::te::reduce_axis(
         Range(0, (kernel_width + stride_width - 1) / stride_width));
 
     auto argmax = MakeArgmaxReducer();
@@ -247,7 +247,7 @@ inline Tensor pool_grad_impl(const Tensor& out_grad,
         x, pad_before, pad_after, tvm::min_value(x->dtype), "pad_temp") : x;
 
     auto mp_argmax =
-        tvm::top::compute(
+        tvm::te::compute(
             out_shape,
             [&](const Array<Var>& inds) {
               Array<PrimExpr> window_inds{inds.begin(), inds.end()};
@@ -260,7 +260,7 @@ inline Tensor pool_grad_impl(const Tensor& out_grad,
 
     auto mp_inds = mp_argmax[0];
 
-    return tvm::top::compute(
+    return tvm::te::compute(
         x->shape,
         [&](const Array<Var>& inds) {
           Array<PrimExpr> pad_inds {inds.begin(), inds.end()};
@@ -289,11 +289,11 @@ inline Tensor pool_grad_impl(const Tensor& out_grad,
         },
         "T_pool_grad", "pool_grad_max");
   } else if (pool_type == kAvgPool) {
-    auto windowh = tvm::top::reduce_axis(
+    auto windowh = tvm::te::reduce_axis(
         Range(0, (kernel_height + stride_height - 1) / stride_height));
-    auto windoww = tvm::top::reduce_axis(
+    auto windoww = tvm::te::reduce_axis(
         Range(0, (kernel_width + stride_width - 1) / stride_width));
-    return tvm::top::compute(
+    return tvm::te::compute(
         x->shape,
         [&](const Array<Var>& inds) {
           PrimExpr pad_h_idx = inds[height_axis] + pad_top;
@@ -518,21 +518,21 @@ inline Tensor adaptive_pool_impl(const Tensor& x,
   out_shape.Set(width_axis, out_width);
 
   if (pool_type == kMaxPool) {
-    return tvm::top::compute(out_shape, [&](const Array<Var>& output) {
+    return tvm::te::compute(out_shape, [&](const Array<Var>& output) {
       Array<PrimExpr> indices;
       for (const Var& var : output) indices.push_back(var);
       auto i_start_h = start_index(output[height_axis], out_height, height);
       auto i_end_h = end_index(output[height_axis], out_height, height);
       auto i_start_w = start_index(output[width_axis], out_width, width);
       auto i_end_w = end_index(output[width_axis], out_width, width);
-      auto dheight = tvm::top::reduce_axis(Range(0, i_end_h - i_start_h), "rv1");
-      auto dwidth = tvm::top::reduce_axis(Range(0, i_end_w - i_start_w), "rv2");
+      auto dheight = tvm::te::reduce_axis(Range(0, i_end_h - i_start_h), "rv1");
+      auto dwidth = tvm::te::reduce_axis(Range(0, i_end_w - i_start_w), "rv2");
       indices.Set(height_axis, i_start_h + dheight);
       indices.Set(width_axis, i_start_w + dwidth);
       return tvm::max(x(indices), { dheight, dwidth });  // NOLINT(*)
     }, "tensor", "adaptive_pool_max");
   } else if (pool_type == kAvgPool) {
-    auto pool_sum = tvm::top::compute(out_shape, [&](const Array<Var>& output) {
+    auto pool_sum = tvm::te::compute(out_shape, [&](const Array<Var>& output) {
       Array<PrimExpr> indices;
       for (const Var& var : output) indices.push_back(var);
       auto i_start_h = start_index(output[height_axis], out_height, height);
@@ -541,14 +541,14 @@ inline Tensor adaptive_pool_impl(const Tensor& x,
       auto i_end_w = end_index(output[width_axis], out_width, width);
       auto divide_factor = tvm::cast(x->dtype, (i_end_h - i_start_h)
                                                * (i_end_w - i_start_w));
-      auto dheight = tvm::top::reduce_axis(Range(0, i_end_h - i_start_h), "rv1");
-      auto dwidth = tvm::top::reduce_axis(Range(0, i_end_w - i_start_w), "rv2");
+      auto dheight = tvm::te::reduce_axis(Range(0, i_end_h - i_start_h), "rv1");
+      auto dwidth = tvm::te::reduce_axis(Range(0, i_end_w - i_start_w), "rv2");
       indices.Set(height_axis, i_start_h + dheight);
       indices.Set(width_axis, i_start_w + dwidth);
       return tvm::sum(x(indices), { dheight, dwidth });
     }, "tensor", "adaptive_pool_sum");
 
-    return tvm::top::compute(out_shape, [&](const Array<Var>& output) {
+    return tvm::te::compute(out_shape, [&](const Array<Var>& output) {
       Array<PrimExpr> indices;
       for (const Var& var : output) indices.push_back(var);
       auto i_start_h = start_index(output[height_axis], out_height, height);
@@ -688,7 +688,7 @@ inline Tensor pool_impl_nd(const Tensor& x,
       pad_tail[i] += stride[i] - 1;
     }
 
-    daxis.push_back(tvm::top::reduce_axis(Range(0, kernel[i])));
+    daxis.push_back(tvm::te::reduce_axis(Range(0, kernel[i])));
 
     pad_before.Set(ii, pad_head[i]);
     pad_after.Set(ii, pad_tail[i]);
@@ -702,7 +702,7 @@ inline Tensor pool_impl_nd(const Tensor& x,
   if (pool_type == kMaxPool) {
     auto temp = do_pad ? pad(
         x, pad_before, pad_after, tvm::min_value(x->dtype), "pad_temp") : x;
-    return tvm::top::compute(out_shape, [&](const Array<Var>& output) {
+    return tvm::te::compute(out_shape, [&](const Array<Var>& output) {
       Array<PrimExpr> indices;
       for (const Var& var : output) indices.push_back(var);
 
@@ -718,7 +718,7 @@ inline Tensor pool_impl_nd(const Tensor& x,
     auto temp = do_pad ? pad(x, pad_before, pad_after, 0, "pad_temp") : x;
 
     // TVM compute for summing the pooling window.
-    auto pool_sum = tvm::top::compute(out_shape,
+    auto pool_sum = tvm::te::compute(out_shape,
     [&](const Array<Var>& output) {
       Array<PrimExpr> indices;
       for (const Var& var : output) indices.push_back(var);
@@ -731,7 +731,7 @@ inline Tensor pool_impl_nd(const Tensor& x,
     }, "tensor", "pool_sum");
 
     // TVM compute for dividing the reduced window sum by kernel size.
-    return tvm::top::compute(out_shape,
+    return tvm::te::compute(out_shape,
     [&](const Array<Var>& output) {
       Array<PrimExpr> indices;
       for (const Var& var : output) indices.push_back(var);
diff --git a/topi/include/topi/nn/softmax.h b/topi/include/topi/nn/softmax.h
index 9cdc20d..dc76a9e 100644
--- a/topi/include/topi/nn/softmax.h
+++ b/topi/include/topi/nn/softmax.h
@@ -24,18 +24,17 @@
 #ifndef TOPI_NN_SOFTMAX_H_
 #define TOPI_NN_SOFTMAX_H_
 
+#include <tvm/te/operation.h>
+#include <topi/reduction.h>
+#include <topi/tags.h>
+
 #include <algorithm>
 #include <string>
 
-#include "topi/reduction.h"
-#include "topi/tags.h"
-#include "tvm/top/operation.h"
-#include "tvm/tir/op.h"
-
 namespace topi {
 namespace nn {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
 * \brief Softmax activation
@@ -58,8 +57,8 @@ inline Tensor softmax(const Tensor &x,
   }
   CHECK_LT(axis, ndim) << "axis parameter should be less than input dim";
 
-  auto k1 = tvm::top::reduce_axis(Range(0, input_shape[axis]), "k1");
-  auto k2 = tvm::top::reduce_axis(Range(0, input_shape[axis]), "k2");
+  auto k1 = tvm::te::reduce_axis(Range(0, input_shape[axis]), "k1");
+  auto k2 = tvm::te::reduce_axis(Range(0, input_shape[axis]), "k2");
   auto reduced_shape = MakeReduceTargetShape({axis}, x, false, false);
 
   tvm::Map<std::string, ObjectRef> attrs;
@@ -110,14 +109,14 @@ inline Tensor softmax(const Tensor &x,
     return exp(indices) / expsum(non_reduce_indices);
   };
 
-  auto max_elem = tvm::top::compute(reduced_shape, _compute_max);
-  auto exp = tvm::top::compute(input_shape, [&](const Array<Var> &indices) {
+  auto max_elem = tvm::te::compute(reduced_shape, _compute_max);
+  auto exp = tvm::te::compute(input_shape, [&](const Array<Var> &indices) {
       return _compute_exp(max_elem, indices);
   });
-  auto expsum = tvm::top::compute(reduced_shape, [&](const Array<Var> &indices) {
+  auto expsum = tvm::te::compute(reduced_shape, [&](const Array<Var> &indices) {
       return _compute_expsum(exp, indices);
   });
-  return tvm::top::compute(input_shape, [&](const Array<Var> &indices) {
+  return tvm::te::compute(input_shape, [&](const Array<Var> &indices) {
       return _normalize(exp, expsum, indices);
   }, name, tag, attrs);
 }
@@ -139,17 +138,17 @@ inline Tensor log_softmax(const Tensor& x,
   PrimExpr m = x->shape[0];
   PrimExpr n = x->shape[1];
 
-  auto k = tvm::top::reduce_axis(Range(0, n), "k");
-  auto max_elem = tvm::top::compute(
+  auto k = tvm::te::reduce_axis(Range(0, n), "k");
+  auto max_elem = tvm::te::compute(
     { m }, [&](Var i) {
       return tvm::max(x(i, k), Array<IterVar>{ k }); });
-  k = tvm::top::reduce_axis(Range(0, n), "k");
+  k = tvm::te::reduce_axis(Range(0, n), "k");
 
-  auto expsum = tvm::top::compute(
+  auto expsum = tvm::te::compute(
     { m }, [&](Var i) {
       return tvm::sum(tvm::exp(x(i, k) - max_elem(i)), { k }); });
 
-  return tvm::top::compute(
+  return tvm::te::compute(
     x->shape, [&](Var i, Var j) {
       return x(i, j) - max_elem(i) - tvm::log(expsum(i));
     }, name, tag);
diff --git a/topi/include/topi/nn/upsampling.h b/topi/include/topi/nn/upsampling.h
index f624f36..bd46d44 100644
--- a/topi/include/topi/nn/upsampling.h
+++ b/topi/include/topi/nn/upsampling.h
@@ -34,7 +34,7 @@
 namespace topi {
 namespace nn {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 using namespace topi::image;
 
 /*!
diff --git a/topi/include/topi/reduction.h b/topi/include/topi/reduction.h
index cb09990..81c6963 100644
--- a/topi/include/topi/reduction.h
+++ b/topi/include/topi/reduction.h
@@ -24,24 +24,22 @@
 #ifndef TOPI_REDUCTION_H_
 #define TOPI_REDUCTION_H_
 
+#include <tvm/te/operation.h>
+#include <topi/broadcast.h>
+#include <topi/elemwise.h>
+#include <topi/tags.h>
+#include <topi/transform.h>
+#include <topi/detail/ravel_unravel.h>
+#include <topi/detail/constant_utils.h>
+
 #include <algorithm>
 #include <string>
 #include <vector>
 #include <iterator>
 
-#include "topi/broadcast.h"
-#include "topi/elemwise.h"
-#include "topi/tags.h"
-#include "topi/transform.h"
-#include "topi/detail/ravel_unravel.h"
-#include "topi/detail/constant_utils.h"
-#include "tvm/top/operation.h"
-#include "tvm/tir/op.h"
-
-
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*! \brief The operation to use for CommReduce */
 using FReduce = std::function<PrimExpr(PrimExpr source, const Array<IterVar>& axis)>;
@@ -92,7 +90,7 @@ inline Array<IterVar> MakeReduceAxes(const std::vector<int>& real_axis, const Te
   for (auto i : real_axis) {
     std::string name = "k" + std::to_string(i);
     reduce_axes.push_back(
-      tvm::top::reduce_axis(Range(0, data->shape[i]), name));
+      tvm::te::reduce_axis(Range(0, data->shape[i]), name));
   }
   return reduce_axes;
 }
@@ -168,7 +166,7 @@ inline Tensor DoCommReduce(const Tensor& data,
     return func(data(eval_range), r_axes);
   };
 
-  return tvm::top::compute(target_shape, compute, data->op->name + "_red", kCommReduce);
+  return tvm::te::compute(target_shape, compute, data->op->name + "_red", kCommReduce);
 }
 
 /*!
@@ -252,11 +250,11 @@ inline Tensor CommReduceIdx(const Tensor& data,
     return func({ idx, data(eval_range) }, reduce_axes, nullptr);
   };
 
-  auto temp_idx_val = tvm::top::compute(target_shape, compute,
+  auto temp_idx_val = tvm::te::compute(target_shape, compute,
                                    data->op->name + "_red_temp", kCommReduceIdx);
   auto temp_idx = temp_idx_val[0];
   auto temp_val = temp_idx_val[1];
-  return tvm::top::compute(
+  return tvm::te::compute(
     target_shape,
     [&temp_idx](const Array<Var>& indices) { return temp_idx(indices); },
     data->op->name + "_red",
diff --git a/topi/include/topi/rocm/dense.h b/topi/include/topi/rocm/dense.h
index e68ec4f..629b34e 100644
--- a/topi/include/topi/rocm/dense.h
+++ b/topi/include/topi/rocm/dense.h
@@ -24,9 +24,9 @@
 #ifndef TOPI_ROCM_DENSE_H_
 #define TOPI_ROCM_DENSE_H_
 
-#include "tvm/top/operation.h"
-#include "tvm/target/generic_func.h"
-#include "topi/tags.h"
+#include <tvm/te/operation.h>
+#include <tvm/target/generic_func.h>
+#include <topi/tags.h>
 #include "topi/detail/array_utils.h"
 #include "topi/nn/dense.h"
 #include "topi/contrib/rocblas.h"
@@ -35,7 +35,7 @@
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace rocm {
 /*!
@@ -49,10 +49,10 @@ namespace rocm {
 *
 * \return Tensor with shape [batch, out_dim]
 */
-inline tvm::top::Tensor dense_rocm(const Target& target,
-                              const tvm::top::Tensor& data,
-                              const tvm::top::Tensor& weight,
-                              const tvm::top::Tensor& bias,
+inline tvm::te::Tensor dense_rocm(const Target& target,
+                              const tvm::te::Tensor& data,
+                              const tvm::te::Tensor& weight,
+                              const tvm::te::Tensor& bias,
                               const DataType& out_dtype) {
   CHECK_EQ(data->shape.size(), 2) << "dense requires 2-D data";
   CHECK_EQ(weight->shape.size(), 2) << "dense requires 2-D weight";
@@ -68,7 +68,7 @@ inline tvm::top::Tensor dense_rocm(const Target& target,
     CHECK_EQ(data->dtype, out_dtype) << "Mixed precision not supported.";
     auto mm = topi::contrib::rocblas_matmul(data, weight, false, true);
     if (bias.defined()) {
-      mm = tvm::top::compute({ batch, out_dim },
+      mm = tvm::te::compute({ batch, out_dim },
                         [&](Var i, Var j) {
                           return mm(i, j) + bias(j);
                         }, "tensor", kBroadcast);
diff --git a/topi/include/topi/rocm/injective.h b/topi/include/topi/rocm/injective.h
index 6213276..f3a3f3b 100644
--- a/topi/include/topi/rocm/injective.h
+++ b/topi/include/topi/rocm/injective.h
@@ -24,16 +24,16 @@
 #ifndef TOPI_ROCM_INJECTIVE_H_
 #define TOPI_ROCM_INJECTIVE_H_
 
-#include "topi/tags.h"
-#include "topi/detail/fuse.h"
-#include "tvm/top/operation.h"
-#include "tvm/target/generic_func.h"
+#include <topi/tags.h>
+#include <topi/detail/fuse.h>
+#include <tvm/te/operation.h>
+#include <tvm/target/generic_func.h>
 
 #include "topi/cuda/injective.h"
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace rocm {
 
diff --git a/topi/include/topi/rocm/normalization.h b/topi/include/topi/rocm/normalization.h
index 4740d06..bdeb37a 100644
--- a/topi/include/topi/rocm/normalization.h
+++ b/topi/include/topi/rocm/normalization.h
@@ -24,13 +24,13 @@
 #ifndef TOPI_ROCM_NORMALIZATION_H_
 #define TOPI_ROCM_NORMALIZATION_H_
 
-#include "tvm/top/operation.h"
-#include "tvm/target/generic_func.h"
-#include "topi/tags.h"
+#include <tvm/te/operation.h>
+#include <tvm/target/generic_func.h>
+#include <topi/tags.h>
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 namespace rocm {
 /*!
 * \brief Create a rocm schedule for LRN
diff --git a/topi/include/topi/rocm/pooling.h b/topi/include/topi/rocm/pooling.h
index 9e7883a..7d1f36f 100644
--- a/topi/include/topi/rocm/pooling.h
+++ b/topi/include/topi/rocm/pooling.h
@@ -24,17 +24,16 @@
 #ifndef TOPI_ROCM_POOLING_H_
 #define TOPI_ROCM_POOLING_H_
 
-#include "topi/tags.h"
-#include "topi/detail/fuse.h"
-#include "topi/detail/array_utils.h"
-#include "tvm/top/operation.h"
-#include "tvm/target/generic_func.h"
-
-#include "topi/cuda/pooling.h"
+#include <tvm/te/operation.h>
+#include <tvm/target/generic_func.h>
+#include <topi/tags.h>
+#include <topi/detail/fuse.h>
+#include <topi/detail/array_utils.h>
+#include <topi/cuda/pooling.h>
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace rocm {
 
diff --git a/topi/include/topi/rocm/reduction.h b/topi/include/topi/rocm/reduction.h
index f6e79fe..ea4b656 100644
--- a/topi/include/topi/rocm/reduction.h
+++ b/topi/include/topi/rocm/reduction.h
@@ -24,16 +24,16 @@
 #ifndef TOPI_ROCM_REDUCTION_H_
 #define TOPI_ROCM_REDUCTION_H_
 
-#include "topi/tags.h"
-#include "topi/detail/fuse.h"
-#include "tvm/top/operation.h"
-#include "tvm/target/generic_func.h"
+#include <topi/tags.h>
+#include <topi/detail/fuse.h>
+#include <tvm/te/operation.h>
+#include <tvm/target/generic_func.h>
 
 #include "topi/cuda/reduction.h"
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace rocm {
 /*!
diff --git a/topi/include/topi/rocm/softmax.h b/topi/include/topi/rocm/softmax.h
index 14b6471..63a0304 100644
--- a/topi/include/topi/rocm/softmax.h
+++ b/topi/include/topi/rocm/softmax.h
@@ -24,16 +24,16 @@
 #ifndef TOPI_ROCM_SOFTMAX_H_
 #define TOPI_ROCM_SOFTMAX_H_
 
-#include "topi/tags.h"
-#include "topi/detail/fuse.h"
-#include "tvm/top/operation.h"
-#include "tvm/target/generic_func.h"
+#include <topi/tags.h>
+#include <topi/detail/fuse.h>
+#include <tvm/te/operation.h>
+#include <tvm/target/generic_func.h>
 
 #include "topi/cuda/softmax.h"
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace rocm {
 
diff --git a/topi/include/topi/transform.h b/topi/include/topi/transform.h
index 9a6d82a..efbffad 100644
--- a/topi/include/topi/transform.h
+++ b/topi/include/topi/transform.h
@@ -24,6 +24,13 @@
 #ifndef TOPI_TRANSFORM_H_
 #define TOPI_TRANSFORM_H_
 
+#include <tvm/tir/data_layout.h>
+#include <tvm/te/operation.h>
+#include <topi/tags.h>
+#include <topi/detail/ravel_unravel.h>
+#include <topi/detail/constant_utils.h>
+#include <topi/detail/tensor_utils.h>
+
 #include <string>
 #include <vector>
 #include <iterator>
@@ -31,17 +38,9 @@
 #include <limits>
 #include <unordered_set>
 
-#include "topi/tags.h"
-#include "topi/detail/ravel_unravel.h"
-#include "topi/detail/constant_utils.h"
-#include "topi/detail/tensor_utils.h"
-#include "tvm/top/operation.h"
-#include "tvm/tir/op.h"
-#include "tvm/tir/data_layout.h"
-
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 using namespace topi::detail;
 
 /*!
@@ -1042,20 +1041,20 @@ inline Tensor gather_nd(const Tensor& data,
  *
  * \return A Tensor whose op member is the matmul operation
  */
-inline tvm::top::Tensor matmul(const tvm::top::Tensor& A,
-                           const tvm::top::Tensor& B,
+inline tvm::te::Tensor matmul(const tvm::te::Tensor& A,
+                           const tvm::te::Tensor& B,
                            bool trans_a = false,
                            bool trans_b = false,
                            std::string name = "T_matmul",
                            std::string tag = kMatMul) {
   tvm::Array<tvm::PrimExpr> output_shape{A->shape[trans_a ? 1 : 0],
                                      B->shape[trans_b ? 0 : 1]};
-  auto k = tvm::top::reduce_axis(tvm::Range{0, A->shape[trans_a ? 0 : 1]}, "k");
+  auto k = tvm::te::reduce_axis(tvm::Range{0, A->shape[trans_a ? 0 : 1]}, "k");
   auto l = [&](tvm::tir::Var i, tvm::tir::Var j) {
     return tvm::sum((trans_a ? A[k][i] : A[i][k]) * (trans_b ? B[j][k] : B[k][j]),
                     {k});
   };
-  return tvm::top::compute(output_shape, l, name, tag);
+  return tvm::te::compute(output_shape, l, name, tag);
 }
 
 /*!
@@ -1070,7 +1069,7 @@ inline tvm::top::Tensor matmul(const tvm::top::Tensor& A,
  * \return A Tensor computing the result
  */
 inline Tensor tensordot(const Tensor& A,
-                        const tvm::top::Tensor& B,
+                        const tvm::te::Tensor& B,
                         int axes = 2,
                         std::string name = "T_tensordot",
                         std::string tag = kMatMul) {
@@ -1125,7 +1124,7 @@ inline Tensor tensordot(const Tensor& A,
  * \return A Tensor computing the result
  */
 inline Tensor tensordot(const Tensor& A,
-                        const tvm::top::Tensor& B,
+                        const tvm::te::Tensor& B,
                         Array<PrimExpr> A_axes,
                         Array<PrimExpr> B_axes,
                         std::string name = "T_tensordot",
diff --git a/topi/include/topi/vision/reorg.h b/topi/include/topi/vision/reorg.h
index 4722c1f..06931e4 100644
--- a/topi/include/topi/vision/reorg.h
+++ b/topi/include/topi/vision/reorg.h
@@ -24,20 +24,19 @@
 #ifndef TOPI_VISION_REORG_H_
 #define TOPI_VISION_REORG_H_
 
+#include <tvm/te/operation.h>
+#include <topi/detail/constant_utils.h>
+#include <topi/reduction.h>
+#include <topi/tags.h>
+#include <topi/transform.h>
+
 #include <algorithm>
 #include <string>
 
-#include "topi/detail/constant_utils.h"
-#include "topi/reduction.h"
-#include "topi/tags.h"
-#include "topi/transform.h"
-#include "tvm/top/operation.h"
-#include "tvm/tir/op.h"
-
 namespace topi {
 namespace vision {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 /*!
 * \brief Reorg operation
@@ -61,7 +60,7 @@ inline Tensor reorg(const Tensor &data,
   int w_in = GetConstInt(input_shape[3]);
   int out_c = c_in / (stride * stride);
 
-  auto out = tvm::top::compute(input_shape,
+  auto out = tvm::te::compute(input_shape,
                           [&](Var b, Var k, Var j, Var i) {
                           return data(b * stride * stride,
                                       indexmod(k, out_c) * stride * stride,
diff --git a/topi/include/topi/x86/bnn.h b/topi/include/topi/x86/bnn.h
index b898821..53b7a8e 100644
--- a/topi/include/topi/x86/bnn.h
+++ b/topi/include/topi/x86/bnn.h
@@ -24,14 +24,14 @@
 #ifndef TOPI_X86_BNN_H_
 #define TOPI_X86_BNN_H_
 
-#include "topi/tags.h"
-#include "topi/detail/fuse.h"
-#include "tvm/top/operation.h"
-#include "tvm/target/generic_func.h"
+#include <topi/tags.h>
+#include <topi/detail/fuse.h>
+#include <tvm/te/operation.h>
+#include <tvm/target/generic_func.h>
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace x86 {
 /*!
diff --git a/topi/include/topi/x86/default.h b/topi/include/topi/x86/default.h
index 506932c..dec9a86 100644
--- a/topi/include/topi/x86/default.h
+++ b/topi/include/topi/x86/default.h
@@ -24,14 +24,14 @@
 #ifndef TOPI_X86_DEFAULT_H_
 #define TOPI_X86_DEFAULT_H_
 
-#include "topi/tags.h"
-#include "topi/detail/fuse.h"
-#include "tvm/top/operation.h"
-#include "tvm/target/generic_func.h"
+#include <topi/tags.h>
+#include <topi/detail/fuse.h>
+#include <tvm/te/operation.h>
+#include <tvm/target/generic_func.h>
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace x86 {
 /*!
@@ -55,7 +55,7 @@ inline Schedule MakeDefaultSchedule(const Target &target,
   auto axis = s[x]->op.as<ComputeOpNode>()->axis;
 
   if (auto_inline) {
-    tvm::top::AutoInlineInjective(s);
+    tvm::te::AutoInlineInjective(s);
     if (axis.size() > 0) {
       detail::Fuse(s[x], axis);
     }
diff --git a/topi/include/topi/x86/injective.h b/topi/include/topi/x86/injective.h
index b828cf4..182140d 100644
--- a/topi/include/topi/x86/injective.h
+++ b/topi/include/topi/x86/injective.h
@@ -24,14 +24,14 @@
 #ifndef TOPI_X86_INJECTIVE_H_
 #define TOPI_X86_INJECTIVE_H_
 
-#include "topi/tags.h"
-#include "topi/detail/fuse.h"
-#include "tvm/top/operation.h"
-#include "tvm/target/generic_func.h"
+#include <topi/tags.h>
+#include <topi/detail/fuse.h>
+#include <tvm/te/operation.h>
+#include <tvm/target/generic_func.h>
 
 namespace topi {
 using namespace tvm;
-using namespace tvm::top;
+using namespace tvm::te;
 
 namespace x86 {
 
@@ -70,7 +70,7 @@ inline Schedule schedule_injective(const Target &target, const Array<Tensor>& ou
     out_ops.push_back(t->op);
   }
   auto s = create_schedule(out_ops);
-  tvm::top::AutoInlineInjective(s);
+  tvm::te::AutoInlineInjective(s);
 
   auto x = outs[0];
   schedule_injective_from_existing(s, x);
diff --git a/topi/src/topi.cc b/topi/src/topi.cc
index f422c02..f86c2d4 100644
--- a/topi/src/topi.cc
+++ b/topi/src/topi.cc
@@ -93,7 +93,7 @@ Array<Integer> ArrayOrInt(TVMArgValue arg) {
 inline bool IsTensorType(TVMArgValue arg) {
   return (arg.type_code() == kTVMObjectHandle &&
           static_cast<Object*>(
-              arg.value().v_handle)->IsInstance<tvm::top::TensorNode>());
+              arg.value().v_handle)->IsInstance<tvm::te::TensorNode>());
 }
 
 
@@ -109,17 +109,17 @@ TVM_REGISTER_GLOBAL("topi.TEST_create_target")
       bool lhs_is_tensor = IsTensorType(args[0]);                       \
       bool rhs_is_tensor = IsTensorType(args[1]);                       \
       if (lhs_is_tensor && rhs_is_tensor) {                             \
-        *rv = Op(args[0].operator tvm::top::Tensor(),                        \
-                 args[1].operator tvm::top::Tensor());                       \
+        *rv = Op(args[0].operator tvm::te::Tensor(),                    \
+                 args[1].operator tvm::te::Tensor());                   \
       } else if (!lhs_is_tensor && rhs_is_tensor) {                     \
-        *rv = Op(args[0].operator tvm::PrimExpr(),                          \
-                 args[1].operator tvm::top::Tensor());                       \
+        *rv = Op(args[0].operator tvm::PrimExpr(),                      \
+                 args[1].operator tvm::te::Tensor());                   \
       } else if (lhs_is_tensor && !rhs_is_tensor) {                     \
-        *rv = Op(args[0].operator tvm::top::Tensor(),                        \
-                 args[1].operator tvm::PrimExpr());                         \
+        *rv = Op(args[0].operator tvm::te::Tensor(),                    \
+                 args[1].operator tvm::PrimExpr());                     \
       } else if (!lhs_is_tensor && !rhs_is_tensor) {                    \
-        *rv = Op(args[0].operator tvm::PrimExpr(),                          \
-                 args[1].operator tvm::PrimExpr());                         \
+        *rv = Op(args[0].operator tvm::PrimExpr(),                      \
+                 args[1].operator tvm::PrimExpr());                     \
       }                                                                 \
     });                                                                 \
 
@@ -757,7 +757,7 @@ TVM_REGISTER_GLOBAL("topi.util.is_empty_shape")
 
 /*! \brief Builder function for instantiating schedules. */
 using FTVMScheduleBuilder = std::function<
-  tvm::top::Schedule(const tvm::Target& target, const tvm::Array<tvm::top::Tensor>& outs)>;
+  tvm::te::Schedule(const tvm::Target& target, const tvm::Array<tvm::te::Tensor>& outs)>;
 
 /*!
  * \brief Helper function for registering generic functions matching the
@@ -826,7 +826,7 @@ TVM_REGISTER_GENERIC_FUNC(schedule_binary_dense)
 
 /*! \brief Builder function for instantiating schedules from existing schedules. */
 using FTVMScheduleFromExistingBuilder = std::function<
-  tvm::top::Schedule(tvm::top::Schedule sch, const tvm::top::Tensor& out)>;
+  tvm::te::Schedule(tvm::te::Schedule sch, const tvm::te::Tensor& out)>;
 
 /*!
  * \brief Helper function for registering generic functions matching the
@@ -850,10 +850,10 @@ TVM_REGISTER_GENERIC_FUNC(schedule_injective_from_existing)
   topi::cuda::schedule_injective_from_existing));
 
 /*! \brief Builder function for instantiating dense ops. */
-using FTVMDenseOpBuilder = std::function<tvm::top::Tensor(const Target& target,
-                                                     const tvm::top::Tensor& data,
-                                                     const tvm::top::Tensor& weight,
-                                                     const tvm::top::Tensor& bias,
+using FTVMDenseOpBuilder = std::function<tvm::te::Tensor(const Target& target,
+                                                     const tvm::te::Tensor& data,
+                                                     const tvm::te::Tensor& weight,
+                                                     const tvm::te::Tensor& bias,
                                                      const DataType& out_dtype)>;
 
 /*!
@@ -879,9 +879,9 @@ inline PackedFunc WrapDenseOp(FTVMDenseOpBuilder builder) {
 
 TVM_REGISTER_GENERIC_FUNC(dense)
 .set_default(WrapDenseOp([](const Target& target,
-                            const tvm::top::Tensor& data,
-                            const tvm::top::Tensor& weight,
-                            const tvm::top::Tensor& bias,
+                            const tvm::te::Tensor& data,
+                            const tvm::te::Tensor& weight,
+                            const tvm::te::Tensor& bias,
                             const DataType& out_dtype) {
   return topi::nn::dense(data, weight, bias, out_dtype);
 }))
-- 
2.7.4