From 64e08dbf9a3312b289066259187d76691ce8a4ef Mon Sep 17 00:00:00 2001
From: Zhi <5145158+zhiics@users.noreply.github.com>
Date: Thu, 30 Jul 2020 13:16:36 -0700
Subject: [PATCH] [BYOC] Retire the example json runtime (#6177)

---
 CMakeLists.txt                                     |   6 -
 cmake/config.cmake                                 |   3 -
 docs/dev/relay_bring_your_own_codegen.rst          |   2 +-
 .../example_ext_runtime/example_ext_runtime.cc     | 336 -------------
 tests/python/relay/test_external_runtime.py        | 546 ---------------------
 tests/scripts/task_config_build_cpu.sh             |   1 -
 tests/scripts/task_config_build_gpu.sh             |   1 -
 tests/scripts/task_config_build_gpu_vulkan.sh      |   1 -
 tests/scripts/task_config_build_i386.sh            |   1 -
 tests/scripts/task_config_build_wasm.sh            |   1 -
 10 files changed, 1 insertion(+), 897 deletions(-)
 delete mode 100644 src/runtime/contrib/example_ext_runtime/example_ext_runtime.cc
 delete mode 100644 tests/python/relay/test_external_runtime.py

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 19d582a..2911a61 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -303,12 +303,6 @@ if(USE_VM_PROFILER)
   list(APPEND RUNTIME_SRCS ${RUNTIME_VM_PROFILER_SRCS})
 endif(USE_VM_PROFILER)
 
-if(USE_EXAMPLE_EXT_RUNTIME)
-  message(STATUS "Build with example external runtime...")
-  file(GLOB RUNTIME_EXAMPLE_EXTERNAL_SRCS src/runtime/contrib/example_ext_runtime/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_EXAMPLE_EXTERNAL_SRCS})
-endif(USE_EXAMPLE_EXT_RUNTIME)
-
 # Module rules
 include(cmake/modules/VTA.cmake)
 include(cmake/modules/StandaloneCrt.cmake)
diff --git a/cmake/config.cmake b/cmake/config.cmake
index 4eae607..6963ece 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -217,9 +217,6 @@ set(USE_VTA_TSIM OFF)
 # Whether to build VTA FPGA driver (device side only)
 set(USE_VTA_FPGA OFF)
 
-# Whether to build the example external runtime module
-set(USE_EXAMPLE_EXT_RUNTIME OFF)
-
 # Whether use Thrust
 set(USE_THRUST OFF)
 
diff --git a/docs/dev/relay_bring_your_own_codegen.rst b/docs/dev/relay_bring_your_own_codegen.rst
index 4d761bf..3dc56ce 100644
--- a/docs/dev/relay_bring_your_own_codegen.rst
+++ b/docs/dev/relay_bring_your_own_codegen.rst
@@ -625,7 +625,7 @@ The next step is to implement a customized runtime to make use of the output of
 Implement a Customized Runtime
 ==============================
 
-In this section, we will implement a customized TVM runtime step-by-step and register it to TVM runtime modules. The customized runtime should be located at ``src/runtime/contrib/<your-runtime-name>/``. In our example, we name our runtime "example_ext_runtime" and put it under `/src/runtime/contrib/example_ext_runtime/ <https://github.com/apache/incubator-tvm/blob/master/src/runtime/contrib/example_ext_runtime/example_ext_runtime.cc>`_. Feel free to check this file for a complete implementation.
+In this section, we will implement a customized TVM runtime step-by-step and register it to TVM runtime modules. The customized runtime should be located at ``src/runtime/contrib/<your-runtime-name>/``. In our example, we name our runtime "example_ext_runtime".
 
 Again, we first define a customized runtime class as follows. The class has to be derived from TVM ``ModuleNode`` in order to be compatible with other TVM runtime modules.
 
diff --git a/src/runtime/contrib/example_ext_runtime/example_ext_runtime.cc b/src/runtime/contrib/example_ext_runtime/example_ext_runtime.cc
deleted file mode 100644
index 1a63ede..0000000
--- a/src/runtime/contrib/example_ext_runtime/example_ext_runtime.cc
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file external_runtime_test.cc
- * \brief Test an example runtime module to interpreting a json string.
- *
- * This is an exmaple runtime employed to show how we can interprete and execute
- * a json string that represents a simple computational (sub)graph. Users will
- * mainly need to implement four functions as follows:
- *  - GetFunction. It is used to get the packed function from the json runtime
- * module using a provided function name. This function returns a PackedFunc
- * that can be directly invoked by feeding it with parameters.
- *  - SaveToBinary. This function is used to achieve the serialization purpose.
- * The emitted binary stream can be directly saved to disk so that users can
- * load then back when needed.
- *  - LoadFromBinary. This function uses binary stream to load the json that
- * saved by SaveToBinary which essentially performs deserialization.
- */
-#include <dmlc/logging.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/memory.h>
-#include <tvm/runtime/module.h>
-#include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
-#include <tvm/runtime/registry.h>
-
-#include <cmath>
-#include <fstream>
-#include <map>
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace tvm {
-namespace runtime {
-
-// A simple JSON node that contains multiple inputs and a single output.
-struct NodeEntry {
-  int id;
-  int output;
-  std::vector<int> inputs;
-};
-
-/*!
- * \brief The following 6 functions are examples for demonstration. Users need
- * to provide their own API when they use the external library. The ones that
- * accecpt TVMValue are wrappers used to bridge the PackedFunc and user-defined
- * kernels.
- */
-void Add_(float* a, int len_a, float* b, int len_b, float* c) {
-  for (int i = 0; i < len_a * len_b; i++) {
-    c[i] = a[i] + b[i];
-  }
-}
-
-int Add(TVMValue* value, int* type_code, int nargs) {
-  CHECK_EQ(nargs, 3U) << "Expect 3 args, but get " << nargs << "\n";
-  DLTensor* arg0 = static_cast<DLTensor*>(value[0].v_handle);
-  DLTensor* arg1 = static_cast<DLTensor*>(value[1].v_handle);
-  DLTensor* out = static_cast<DLTensor*>(value[2].v_handle);
-  Add_(static_cast<float*>(arg0->data), arg0->shape[0], static_cast<float*>(arg1->data),
-       arg1->shape[0], static_cast<float*>(out->data));
-  return 0;
-}
-
-void Sub_(float* a, int len_a, float* b, int len_b, float* c) {
-  for (int i = 0; i < len_a * len_b; i++) {
-    c[i] = a[i] - b[i];
-  }
-}
-
-int Sub(TVMValue* value, int* type_code, int nargs) {
-  CHECK_EQ(nargs, 3U) << "Expect 3 args, but get " << nargs << "\n";
-  DLTensor* arg0 = static_cast<DLTensor*>(value[0].v_handle);
-  DLTensor* arg1 = static_cast<DLTensor*>(value[1].v_handle);
-  DLTensor* out = static_cast<DLTensor*>(value[2].v_handle);
-  Sub_(static_cast<float*>(arg0->data), arg0->shape[0], static_cast<float*>(arg1->data),
-       arg1->shape[0], static_cast<float*>(out->data));
-  return 0;
-}
-
-void Mul_(float* a, int len_a, float* b, int len_b, float* c) {
-  for (int i = 0; i < len_a * len_b; i++) {
-    c[i] = a[i] * b[i];
-  }
-}
-
-int Mul(TVMValue* value, int* type_code, int nargs) {
-  CHECK_EQ(nargs, 3U) << "Expect 3 args, but get " << nargs << "\n";
-  DLTensor* arg0 = static_cast<DLTensor*>(value[0].v_handle);
-  DLTensor* arg1 = static_cast<DLTensor*>(value[1].v_handle);
-  DLTensor* out = static_cast<DLTensor*>(value[2].v_handle);
-  Mul_(static_cast<float*>(arg0->data), arg0->shape[0], static_cast<float*>(arg1->data),
-       arg1->shape[0], static_cast<float*>(out->data));
-  return 0;
-}
-
-/*!
- * \brief The example json runtime module. Here we define a simple format for
- * the computational graph using json for demonstration purpose. Users should
- * customize their own format.
- */
-class ExampleJsonModule : public ModuleNode {
- public:
-  explicit ExampleJsonModule(std::string graph_json) {
-    this->graph_json_ = graph_json;
-    ParseJson(this->graph_json_);
-  }
-
-  /*!
-   * \brief Get a PackedFunc from the example json module.
-   *
-   * \param name the name of the function.
-   * \param sptr_to_self The ObjectPtr that points to this module node.
-   *
-   * \return The function pointer when it is found, otherwise, PackedFunc(nullptr).
-   */
-  PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) final {
-    if (this->graph_.find(name) != this->graph_.end()) {
-      this->curr_subgraph_ = name;
-      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        for (auto i = 0; i < args.size(); ++i) {
-          CHECK(args[i].type_code() == kTVMNDArrayHandle ||
-                args[i].type_code() == kTVMDLTensorHandle)
-              << "Expect NDArray or DLTensor as inputs"
-              << "\n";
-          if (args[i].type_code() == kTVMDLTensorHandle) {
-            DLTensor* arg = args[i];
-            this->data_entry_[i].CopyFrom(arg);
-          } else {
-            NDArray arg = args[i];
-            this->data_entry_[i].CopyFrom(arg);
-          }
-        }
-        for (const auto& it : this->graph_[this->curr_subgraph_]) {
-          this->Run(it.id, it.inputs, it.output);
-        }
-        CHECK_GT(graph_.count(this->curr_subgraph_), 0U);
-        auto out_idx = graph_[this->curr_subgraph_].back().output;
-        if (args[args.size() - 1].type_code() == kTVMDLTensorHandle) {
-          DLTensor* arg = args[args.size() - 1];
-          this->data_entry_[out_idx].CopyTo(arg);
-        } else {
-          NDArray arg = args[args.size() - 1];
-          this->data_entry_[out_idx].CopyTo(arg);
-        }
-        *rv = data_entry_.back();
-      });
-    } else {
-      LOG(FATAL) << "Unkown runtime type: " << name << "\n";
-      return PackedFunc();
-    }
-  }
-
-  /*!
-   * \brief Execute a function with provided arguments. The output will be
-   * packed to the last argument according to TVM's calling convention.
-   *
-   * \param id The id of the function.
-   * \param inputs The input indices that indicate where the data should be
-   * fetched in the data entry pool.
-   * \param output The output index.
-   */
-  void Run(int id, const std::vector<int>& inputs, int output) {
-    std::vector<int> args(inputs.begin(), inputs.end());
-    args.push_back(output);
-    std::vector<TVMValue> values(args.size());
-    std::vector<int> type_codes(args.size());
-    TVMArgsSetter setter(values.data(), type_codes.data());
-
-    if (op_id_[id] == "add" || op_id_[id] == "sub" || op_id_[id] == "mul") {
-      for (size_t i = 0; i < args.size(); i++) {
-        setter(i, data_entry_[args[i]]);
-      }
-    }
-
-    if (op_id_[id] == "add") {
-      Add(values.data(), type_codes.data(), args.size());
-    } else if (op_id_[id] == "sub") {
-      Sub(values.data(), type_codes.data(), args.size());
-    } else if (op_id_[id] == "mul") {
-      Mul(values.data(), type_codes.data(), args.size());
-    } else {
-      LOG(FATAL) << "Unknown op: " << op_id_[id] << "\n";
-    }
-  }
-
-  const char* type_key() const { return "examplejson"; }
-
-  /*!
-   * \brief Save the json runtime to a binary stream, which can then be
-   * serialized to disk.
-   *
-   * \param stream. The stream to save the binary.
-   */
-  void SaveToBinary(dmlc::Stream* stream) final { stream->Write(this->graph_json_); }
-
-  /*!
-   * \brief Parse the example json string.
-   *
-   * \param json. The json string that represents a simple computational graph.
-   *
-   * \Note this is a very simple json that only serves for demostration purpose.
-   * Users usually have their own format and they can serialize it using the
-   * SaveToBinary method and deserialize it using LoadFromFile.
-   */
-  void ParseJson(const std::string& json) {
-    std::string line;
-    std::string curr_subgraph;
-    std::stringstream ss(json);
-
-    while (std::getline(ss, line, '\n')) {
-      std::stringstream ss2(line);
-      std::string token;
-      int id = 0;
-
-      ss2 >> token;
-      if (token.find("json_rt_") != std::string::npos) {
-        curr_subgraph = token;
-        continue;
-      }
-
-      ss2 >> id;
-      if (op_id_.size() <= static_cast<size_t>(id)) {
-        op_id_.resize(id + 1);
-        data_entry_.resize(id + 1);
-      }
-
-      int64_t total_elements = 1;
-      std::vector<int64_t> shape;
-      if (token == "input") {
-        int64_t size = 0;
-        while (ss2 >> size) {
-          total_elements *= size;
-          shape.push_back(size);
-        }
-      } else {
-        op_id_[id] = token;
-        bool shape_data = false;
-        NodeEntry entry;
-        while (ss2 >> token) {
-          if (token == "shape:") {
-            shape_data = true;
-          } else if (shape_data) {
-            total_elements *= std::stoll(token);
-            shape.push_back(std::stoll(token));
-          } else if (token != "inputs:") {
-            entry.inputs.push_back(std::stoi(token));
-          }
-        }
-        entry.id = id;
-        entry.output = id;
-        graph_[curr_subgraph].push_back(entry);
-      }
-      DLContext ctx;
-      ctx.device_type = static_cast<DLDeviceType>(1);
-      ctx.device_id = 0;
-      data_entry_[id] = NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx);
-    }
-  }
-
-  /*!
-   * \brief Create a module from a file path of a serialized graph.
-   *
-   * \param path The file path contains a computational graph representation.
-   *
-   * \return The created json module.
-   */
-  static Module Create(const std::string& path) {
-    std::ifstream filep;
-    filep.open(path, std::ios::in);
-    std::string graph_json;
-    std::string line;
-    while (std::getline(filep, line)) {
-      graph_json += line;
-      graph_json += "\n";
-    }
-    filep.close();
-    auto n = tvm::runtime::make_object<ExampleJsonModule>(graph_json);
-    return Module(n);
-  }
-
-  /*!
-   * \brief Load a json module from stream.
-   *
-   * \param strm The binary stream to load json.
-   *
-   * \return The created json module.
-   */
-  static Module LoadFromBinary(void* strm) {
-    dmlc::Stream* stream = static_cast<dmlc::Stream*>(strm);
-    std::string graph_json;
-    stream->Read(&graph_json);
-    auto n = tvm::runtime::make_object<ExampleJsonModule>(graph_json);
-    return Module(n);
-  }
-
- private:
-  /* \brief The json string that represents a computational graph. */
-  std::string graph_json_;
-  /* \brief The subgraph that being processed. */
-  std::string curr_subgraph_;
-  /*! \brief A simple graph from subgraph id to node entries. */
-  std::map<std::string, std::vector<NodeEntry> > graph_;
-  /* \brief A simple pool to contain the tensor for each node in the graph. */
-  std::vector<NDArray> data_entry_;
-  /* \brief A mapping from node id to op name. */
-  std::vector<std::string> op_id_;
-};
-
-TVM_REGISTER_GLOBAL("runtime.module.loadfile_examplejson")
-    .set_body([](TVMArgs args, TVMRetValue* rv) { *rv = ExampleJsonModule::Create(args[0]); });
-
-TVM_REGISTER_GLOBAL("runtime.module.loadbinary_examplejson")
-    .set_body_typed(ExampleJsonModule::LoadFromBinary);
-
-}  // namespace runtime
-}  // namespace tvm
diff --git a/tests/python/relay/test_external_runtime.py b/tests/python/relay/test_external_runtime.py
deleted file mode 100644
index 7c6199a..0000000
--- a/tests/python/relay/test_external_runtime.py
+++ /dev/null
@@ -1,546 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-from shutil import which
-import json
-import pytest
-import sys
-import numpy as np
-
-import tvm
-from tvm import te
-import tvm.runtime._ffi_api
-from tvm import relay
-from tvm.contrib import util
-
-tmp_path = util.tempdir()
-
-
-def generate_csource_module():
-    """Mock the codegen with an external library (e.g., CBLAS/cuDNN)"""
-
-    code = r'''
-    #include <tvm/runtime/c_runtime_api.h>
-    #include <tvm/runtime/packed_func.h>
-    #include <dlpack/dlpack.h>
-    #include <cstdint>
-    #include <cstring>
-    #include <iostream>
-
-    #define GCC_BINARY_OP_1D(p_ID_, p_OP_, p_DIM1_)           \
-      extern "C" void p_ID_(float* a, float* b, float* out) { \
-        for (int64_t i = 0; i < p_DIM1_; ++i) {               \
-          out[i] = a[i] p_OP_ b[i];                           \
-        }                                                     \
-      }
-
-    #define GCC_BINARY_OP_2D(p_ID_, p_OP_, p_DIM1_, p_DIM2_)  \
-      extern "C" void p_ID_(float* a, float* b, float* out) { \
-        for (int64_t i = 0; i < p_DIM1_; ++i) {               \
-          for (int64_t j = 0; j < p_DIM2_; ++j) {             \
-            int64_t k = i * p_DIM2_ + j;                      \
-            out[k] = a[k] p_OP_ b[k];                         \
-          }                                                   \
-        }                                                     \
-      }
-    GCC_BINARY_OP_2D(gcc_1_0, *, 10, 10);
-    GCC_BINARY_OP_2D(gcc_1_1, -, 10, 10);
-    GCC_BINARY_OP_2D(gcc_1_2, +, 10, 10);
-
-    extern "C" void gcc_1_(float* gcc_input4, float* gcc_input5,
-                           float* gcc_input6, float* gcc_input7, float* out) {
-      float* buf_0 = (float*)malloc(4 * 100);
-      float* buf_1 = (float*)malloc(4 * 100);
-      gcc_1_2(gcc_input4, gcc_input5, buf_0);
-      gcc_1_1(buf_0, gcc_input6, buf_1);
-      gcc_1_0(buf_1, gcc_input7, out);
-      free(buf_0);
-      free(buf_1);
-    }
-
-    extern "C" int ccompiler_wrapper_1_(DLTensor* arg0, DLTensor* arg1,
-                                        DLTensor* arg2, DLTensor* arg3,
-                                        DLTensor* out) {
-        gcc_1_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
-               static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
-               static_cast<float*>(out->data));
-        return 0;
-    }
-
-    TVM_DLL_EXPORT_TYPED_FUNC(json_rt_1, ccompiler_wrapper_1_);
-
-    GCC_BINARY_OP_2D(gcc_0_0, *, 10, 10);
-    GCC_BINARY_OP_2D(gcc_0_1, -, 10, 10);
-    GCC_BINARY_OP_2D(gcc_0_2, +, 10, 10);
-
-    extern "C" void gcc_0_(float* gcc_input0, float* gcc_input1,
-                           float* gcc_input2, float* gcc_input3, float* out) {
-      float* buf_0 = (float*)malloc(4 * 100);
-      float* buf_1 = (float*)malloc(4 * 100);
-      gcc_0_2(gcc_input0, gcc_input1, buf_0);
-      gcc_0_1(buf_0, gcc_input2, buf_1);
-      gcc_0_0(buf_1, gcc_input3, out);
-      free(buf_0);
-      free(buf_1);
-    }
-
-    extern "C" int ccompiler_wrapper_0_(DLTensor* arg0, DLTensor* arg1,
-                                        DLTensor* arg2, DLTensor* arg3,
-                                        DLTensor* out) {
-        gcc_0_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
-               static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
-               static_cast<float*>(out->data));
-        return 0;
-    }
-
-    TVM_DLL_EXPORT_TYPED_FUNC(json_rt_0, ccompiler_wrapper_0_);
-
-    '''
-    csource_module = tvm.runtime._ffi_api.CSourceModuleCreate(code, "cc", "",
-                                                              None)
-    return csource_module
-
-
-def generate_engine_module():
-    """
-    Mock the codegen of an external backend with its own runtime engine
-    (e.g., MKL-DNN/TensorRT)
-    """
-
-    code = r'''
-    #include <tvm/runtime/c_runtime_api.h>
-    #include <tvm/runtime/packed_func.h>
-    #include <dlpack/dlpack.h>
-    #include "json_engine.h"
-
-    extern "C" void json_1_(float* json_input4, float* json_input5,
-                            float* json_input6, float* json_input7, float* out) {
-
-        std::string graph =
-            "add_2d,10,10\n"
-            "sub_2d,10,10\n"
-            "mul_2d,10,10\n";
-
-        Engine engine;
-        engine.run(graph, {json_input4, json_input5, json_input6, json_input7}, out);
-    }
-
-    extern "C" int json_wrapper_1_(DLTensor* arg0, DLTensor* arg1,
-                                   DLTensor* arg2, DLTensor* arg3,
-                                   DLTensor* out) {
-        json_1_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
-                static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
-                static_cast<float*>(out->data));
-        return 0;
-    }
-
-    TVM_DLL_EXPORT_TYPED_FUNC(json_rt_1, json_wrapper_1_);
-
-    extern "C" void json_0_(float* json_input0, float* json_input1,
-                            float* json_input2, float* json_input3, float* out) {
-
-        std::string graph =
-            "add_2d,10,10\n"
-            "sub_2d,10,10\n"
-            "mul_2d,10,10\n";
-
-        Engine engine;
-        engine.run(graph, {json_input0, json_input1, json_input2, json_input3}, out);
-
-    }
-
-    extern "C" int json_wrapper_0_(DLTensor* arg0, DLTensor* arg1,
-                                   DLTensor* arg2, DLTensor* arg3,
-                                   DLTensor* out) {
-        json_0_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
-                static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
-                static_cast<float*>(out->data));
-        return 0;
-    }
-
-    TVM_DLL_EXPORT_TYPED_FUNC(json_rt_0, json_wrapper_0_);
-
-    '''
-
-    gen_json_engine()
-    csource_module = tvm.runtime._ffi_api.CSourceModuleCreate(code, "cc", "",
-                                                              None)
-    return csource_module
-
-
-def gen_json_engine():
-    """An example of external backend runtime engine. This is supposed to be provided
-      by third-party vendors and included when building the generated external kernel code.
-    """
-
-    code = r'''
-    #ifndef _JSON_ENGINE_H_
-    #define _JSON_ENGINE_H_
-    #include <cstdint>
-    #include <string>
-    #include <sstream>
-    #include <vector>
-
-    #define GCC_BINARY_OP_2D(p_ID_, p_OP_)  \
-      void p_ID_(int64_t dim1, int64_t dim2, float* a, float* b, float* out) { \
-        for (int64_t i = 0; i < dim1; ++i) {                                   \
-          for (int64_t j = 0; j < dim2; ++j) {                                 \
-            int64_t k = i * dim2 + j;                                          \
-            out[k] = a[k] p_OP_ b[k];                                          \
-          }                                                                    \
-        }                                                                      \
-      }
-    GCC_BINARY_OP_2D(add_2d, +);
-    GCC_BINARY_OP_2D(sub_2d, -);
-    GCC_BINARY_OP_2D(mul_2d, *);
-
-    struct Layer {
-        void (*op)(int64_t, int64_t, float*, float*, float*);
-        std::vector<int64_t> shapes;
-        std::vector<float*> args;
-    };
-
-    class Engine {
-    public:
-        float* alloc_buffer(int64_t size) {
-            float* buf = (float*)malloc(sizeof(float) * size);
-            buffers.push_back(buf);
-            return buf;
-        }
-        void add(std::string op, int64_t dim1, int64_t dim2, float* in1, float* in2, float* out) {
-            Layer layer;
-            layer.shapes.push_back(dim1);
-            layer.shapes.push_back(dim2);
-            layer.args.push_back(in1);
-            layer.args.push_back(in2);
-            layer.args.push_back(out);
-
-            if (op == "add_2d")
-                layer.op = &add_2d;
-            else if (op == "sub_2d")
-                layer.op = &sub_2d;
-            else if (op == "mul_2d")
-                layer.op = &mul_2d;
-            net.push_back(layer);
-            return ;
-        }
-
-        void run(std::string graph, std::vector<float*> args, float* out) {
-            std::stringstream ss(graph);
-            std::string line;
-            int layer_idx = 0;
-            int arg_idx = 0;
-            float* buf = nullptr;
-
-            while (std::getline(ss, line, '\n')) {
-                std::stringstream ss2(line);
-                std::string token;
-                std::vector<std::string> attrs;
-                while (std::getline(ss2, token, ',')) {
-                    attrs.push_back(token);
-                }
-                int64_t dim1 = stoll(attrs[1]);
-                int64_t dim2 = stoll(attrs[2]);
-                auto out_buf = this->alloc_buffer(dim1 * dim2);
-
-                if (layer_idx == 0) {
-                    this->add(attrs[0], dim1, dim2, args[0], args[1], out_buf);
-                    buf = out_buf;
-                    arg_idx = 2;
-                }
-                else {
-                    this->add(attrs[0], dim1, dim2, buf, args[arg_idx], out_buf);
-                    buf = out_buf;
-                    arg_idx++;
-                }
-                layer_idx++;
-            }
-            this->net.back().args.back() = out;
-
-            for (auto layer : net) {
-                (*layer.op)(layer.shapes[0], layer.shapes[1], layer.args[0], layer.args[1], layer.args[2]);
-            }
-        }
-        ~Engine() {
-            for (auto buf : buffers) {
-                free(buf);
-            }
-        }
-    private:
-        std::vector<Layer> net;
-        std::vector<float*> buffers;
-    };
-
-    #endif  // _JSON_ENGINE_H_
-    '''
-    header_file = tmp_path.relpath("json_engine.h")
-    with open(header_file, 'w') as f:
-        f.write(code)
-
-
-def get_synthetic_lib():
-    x = relay.var('x', shape=(10, 10))
-    w0 = relay.var('w0', shape=(10, 10))
-    w1 = relay.var('w1', shape=(10, 10))
-    w2 = relay.var('w2', shape=(10, 10))
-    w3 = relay.var('w3', shape=(10, 10))
-    w4 = relay.var('w4', shape=(10, 10))
-    w5 = relay.var('w5', shape=(10, 10))
-    w6 = relay.var('w6', shape=(10, 10))
-    w7 = relay.var('w7', shape=(10, 10))
-
-    # subgraph0
-    gcc_input0 = relay.var('gcc_input0', shape=(10, 10))
-    gcc_input1 = relay.var('gcc_input1', shape=(10, 10))
-    gcc_input2 = relay.var('gcc_input2', shape=(10, 10))
-    gcc_input3 = relay.var('gcc_input3', shape=(10, 10))
-    subgraph0 = relay.Function([gcc_input0, gcc_input1, gcc_input2,
-                                gcc_input3], relay.copy(gcc_input0))
-    subgraph0 = subgraph0.with_attr(
-        "Primitive", tvm.tir.IntImm("int32", 1))
-
-    # Call subgraph0
-    subgraph0_ret = relay.Call(subgraph0, [x, w0, w1, w2])
-
-    # subgraph1
-    gcc_input4 = relay.var('gcc_input4', shape=(10, 10))
-    gcc_input5 = relay.var('gcc_input5', shape=(10, 10))
-    gcc_input6 = relay.var('gcc_input6', shape=(10, 10))
-    gcc_input7 = relay.var('gcc_input7', shape=(10, 10))
-    subgraph1 = relay.Function([gcc_input4, gcc_input5, gcc_input6,
-                                gcc_input7], relay.copy(gcc_input4))
-    subgraph1 = subgraph1.with_attr(
-        "Primitive", tvm.tir.IntImm("int32", 1))
-
-    # Call subgraph1
-    subgraph1_ret = relay.Call(subgraph1, [x, w3, w4, w5])
-
-    # Other ops that will be executed on TVM.
-    add2 = relay.add(x, w6)
-    sub2 = relay.subtract(add2, w7)
-    ret = relay.concatenate((subgraph0_ret, subgraph1_ret, sub2), 0)
-    func = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], ret)
-    mod = tvm.IRModule.from_expr(func)
-    _, lib, _ = relay.build(mod, "llvm")
-    return lib
-
-def get_whole_graph_json():
-    nodex = {"op": "null", "name": "x", "inputs": []}
-    node0 = {"op": "null", "name": "w0", "inputs": []}
-    node1 = {"op": "null", "name": "w1", "inputs": []}
-    node2 = {"op": "null", "name": "w2", "inputs": []}
-    node3 = {"op": "null", "name": "w3", "inputs": []}
-    node4 = {"op": "null", "name": "w4", "inputs": []}
-    node5 = {"op": "null", "name": "w5", "inputs": []}
-    node6 = {"op": "null", "name": "w6", "inputs": []}
-    node7 = {"op": "null", "name": "w7", "inputs": []}
-
-    subgraph0 = {
-        "op": "tvm_op",
-        "name": "json_rt_0",
-        "attrs": {
-            "num_outputs": "1",
-            "num_inputs": "4",
-            "func_name": "json_rt_0",
-            "flatten_data": "0"
-        },
-        "inputs": [
-            [0, 0, 0],
-            [1, 0, 0],
-            [2, 0, 0],
-            [3, 0, 0],
-        ]
-    }
-    subgraph1 = {
-        "op": "tvm_op",
-        "name": "json_rt_1",
-        "attrs": {
-            "num_outputs": "1",
-            "num_inputs": "4",
-            "func_name": "json_rt_1",
-            "flatten_data": "0"
-        },
-        "inputs": [
-            [0, 0, 0],
-            [4, 0, 0],
-            [5, 0, 0],
-            [6, 0, 0],
-        ]
-    }
-
-    fused_op = {
-        "op": "tvm_op",
-        "name": "fused_add_subtract_concatenate",
-        "attrs": {
-            "num_outputs": "1",
-            "num_inputs": "5",
-            "func_name": "fused_add_subtract_concatenate",
-            "flatten_data": "0"
-        },
-        "inputs": [
-            [9, 0, 0],
-            [10, 0, 0],
-            [0, 0, 0],
-            [7, 0, 0],
-            [8, 0, 0]
-        ]
-    }
-    nodes = [nodex, node0, node1, node2, node3, node4,
-             node5, node6, node7, subgraph0, subgraph1, fused_op]
-    arg_nodes = [0, 1, 2, 3, 4, 5, 6, 7, 8]
-    heads = [[11, 0, 0]]
-    node_row_ptr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
-    storage_id = ["list_int", [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]]
-
-    shape = ["list_shape", [
-        [10, 10], [10, 10], [10, 10], [10, 10], [10, 10], [10, 10],
-        [10, 10], [10, 10], [10, 10], [10, 10], [10, 10], [30, 10]]]
-
-    dltype = ["list_str", [
-        "float32", "float32", "float32", "float32", "float32", "float32",
-        "float32", "float32", "float32", "float32", "float32", "float32"]]
-
-    attrs = {
-        "shape": shape,
-        "dltype": dltype,
-        "storage_id": storage_id,
-    }
-
-    graph = {"nodes": nodes,
-             "arg_nodes": arg_nodes,
-             "node_row_ptr": node_row_ptr,
-             "heads": heads,
-             "attrs": attrs}
-
-    return json.dumps(graph)
-
-
-def run_extern(label, get_extern_src, **kwargs):
-    if which("gcc") is None:
-        print("Skip test because gcc is not available.")
-        return
-
-    obj_name = "{}.o".format(label)
-    lib_name = "external_{}.so".format(label)
-
-    # Get Json and the compiled library.
-    graph_json = get_whole_graph_json()
-    lib = get_synthetic_lib()
-    lib.save(obj_name)
-
-    # library that contains external code.
-    csource_module = get_extern_src()
-    kwargs["options"] = [obj_name] + kwargs["options"]
-    lib_path = tmp_path.relpath(lib_name)
-    csource_module.export_library(lib_path, fcompile=False, **kwargs)
-    # load module for execution.
-    lib = tvm.runtime.load_module(lib_path)
-    mod = tvm.contrib.graph_runtime.create(graph_json, lib, tvm.cpu(0))
-
-    x_data = np.random.rand(10, 10).astype('float32')
-    mod.set_input("x", x_data)
-    w_data = []
-    for i in range(8):
-        data = np.random.rand(10, 10).astype('float32')
-        w_data.append(data)
-        var = "w" + str(i)
-        mod.set_input(var, data)
-    mod.run()
-    out = tvm.nd.empty((30, 10), ctx=tvm.cpu())
-    out = mod.get_output(0, out)
-    tvm.testing.assert_allclose(
-        out.asnumpy(),
-        np.concatenate((((x_data + w_data[0]) - w_data[1]) * w_data[2],
-                        ((x_data + w_data[3]) - w_data[4]) * w_data[5],
-                        x_data + w_data[6] - w_data[7]),
-                       axis=0))
-
-
-def test_dso_extern():
-    run_extern("lib", generate_csource_module, options=["-O2", "-std=c++14"])
-
-
-def test_engine_extern():
-    run_extern("engine",
-               generate_engine_module,
-               options=["-O2", "-std=c++14", "-I" + tmp_path.relpath("")])
-
-def test_json_extern():
-    if not tvm.get_global_func("runtime.module.loadfile_examplejson", True):
-        print("Skip because JSON example runtime is not enabled.")
-        return
-
-    # Get subgraph Json.
-    subgraph_json = ("json_rt_0\n" +
-                     "input 0 10 10\n" +
-                     "input 1 10 10\n" +
-                     "input 2 10 10\n" +
-                     "input 3 10 10\n" +
-                     "add 4 inputs: 0 1 shape: 10 10\n" +
-                     "sub 5 inputs: 4 2 shape: 10 10\n" +
-                     "mul 6 inputs: 5 3 shape: 10 10\n" +
-                     "json_rt_1\n" +
-                     "input 0 10 10\n" +
-                     "input 1 10 10\n" +
-                     "input 2 10 10\n" +
-                     "input 3 10 10\n" +
-                     "add 4 inputs: 0 1 shape: 10 10\n" +
-                     "sub 5 inputs: 4 2 shape: 10 10\n" +
-                     "mul 6 inputs: 5 3 shape: 10 10")
-
-    subgraph_path = tmp_path.relpath('subgraph.examplejson')
-    with open(subgraph_path, 'w') as f:
-        f.write(subgraph_json)
-
-    # Get Json and module.
-    graph_json = get_whole_graph_json()
-
-
-    lib = get_synthetic_lib()
-    ext_lib = tvm.runtime.load_module(subgraph_path, "examplejson")
-    lib.import_module(ext_lib)
-    lib_name = 'external.so'
-    lib_path = tmp_path.relpath(lib_name)
-    lib.export_library(lib_path)
-
-    # load module for execution.
-    lib = tvm.runtime.load_module(lib_path)
-    mod = tvm.contrib.graph_runtime.create(graph_json, lib, tvm.cpu(0))
-
-    x_data = np.random.rand(10, 10).astype('float32')
-    mod.set_input("x", x_data)
-    w_data = []
-    for i in range(8):
-        data = np.random.rand(10, 10).astype('float32')
-        w_data.append(data)
-        var = "w" + str(i)
-        mod.set_input(var, data)
-
-    mod.run()
-    out = tvm.nd.empty((30, 10), ctx=tvm.cpu())
-    out = mod.get_output(0, out)
-    tvm.testing.assert_allclose(
-        out.asnumpy(),
-        np.concatenate((((x_data + w_data[0]) - w_data[1]) * w_data[2],
-                        ((x_data + w_data[3]) - w_data[4]) * w_data[5],
-                        x_data + w_data[6] - w_data[7]),
-                       axis=0))
-
-
-if __name__ == "__main__":
-    test_dso_extern()
-    test_engine_extern()
-    test_json_extern()
diff --git a/tests/scripts/task_config_build_cpu.sh b/tests/scripts/task_config_build_cpu.sh
index d1c076d..f36c1d9 100755
--- a/tests/scripts/task_config_build_cpu.sh
+++ b/tests/scripts/task_config_build_cpu.sh
@@ -29,7 +29,6 @@ echo set\(USE_MICRO_STANDALONE_RUNTIME ON\) >> config.cmake
 echo set\(USE_STANDALONE_CRT ON\) >> config.cmake
 echo set\(USE_GRAPH_RUNTIME_DEBUG ON\) >> config.cmake
 echo set\(USE_VM_PROFILER ON\) >> config.cmake
-echo set\(USE_EXAMPLE_EXT_RUNTIME ON\) >> config.cmake
 echo set\(USE_DNNL_CODEGEN ON\) >> config.cmake
 echo set\(USE_ARM_COMPUTE_LIB ON\) >> config.cmake
 echo set\(USE_LLVM llvm-config-10\) >> config.cmake
diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh
index 08af277..dedb561 100755
--- a/tests/scripts/task_config_build_gpu.sh
+++ b/tests/scripts/task_config_build_gpu.sh
@@ -39,7 +39,6 @@ echo set\(USE_GRAPH_RUNTIME ON\) >> config.cmake
 echo set\(USE_STACKVM_RUNTIME ON\) >> config.cmake
 echo set\(USE_GRAPH_RUNTIME_DEBUG ON\) >> config.cmake
 echo set\(USE_VM_PROFILER ON\) >> config.cmake
-echo set\(USE_EXAMPLE_EXT_RUNTIME ON\) >> config.cmake
 echo set\(USE_ANTLR ON\) >> config.cmake
 echo set\(USE_VTA_TSIM ON\) >> config.cmake
 echo set\(USE_VTA_FSIM ON\) >> config.cmake
diff --git a/tests/scripts/task_config_build_gpu_vulkan.sh b/tests/scripts/task_config_build_gpu_vulkan.sh
index e07f97d..a5742e2 100755
--- a/tests/scripts/task_config_build_gpu_vulkan.sh
+++ b/tests/scripts/task_config_build_gpu_vulkan.sh
@@ -30,6 +30,5 @@ echo set\(USE_MICRO ON\) >> config.cmake
 echo set\(USE_STANDALONE_CRT ON\) >> config.cmake
 echo set\(USE_GRAPH_RUNTIME_DEBUG ON\) >> config.cmake
 echo set\(USE_VM_PROFILER ON\) >> config.cmake
-echo set\(USE_EXAMPLE_EXT_RUNTIME ON\) >> config.cmake
 echo set\(CMAKE_CXX_COMPILER clang-7\) >> config.cmake
 echo set\(CMAKE_CXX_FLAGS -Werror\) >> config.cmake
diff --git a/tests/scripts/task_config_build_i386.sh b/tests/scripts/task_config_build_i386.sh
index 6837c28..e8eb668 100755
--- a/tests/scripts/task_config_build_i386.sh
+++ b/tests/scripts/task_config_build_i386.sh
@@ -29,7 +29,6 @@ echo set\(USE_GRAPH_RUNTIME_DEBUG ON\) >> config.cmake
 echo set\(USE_MICRO_STANDALONE_RUNTIME ON\) >> config.cmake
 echo set\(USE_STANDALONE_CRT ON\) >> config.cmake
 echo set\(USE_VM_PROFILER ON\) >> config.cmake
-echo set\(USE_EXAMPLE_EXT_RUNTIME ON\) >> config.cmake
 echo set\(USE_LLVM llvm-config-4.0\) >> config.cmake
 echo set\(CMAKE_CXX_COMPILER g++\) >> config.cmake
 echo set\(CMAKE_CXX_FLAGS -Werror\) >> config.cmake
diff --git a/tests/scripts/task_config_build_wasm.sh b/tests/scripts/task_config_build_wasm.sh
index cbdfa75..f3157bd 100755
--- a/tests/scripts/task_config_build_wasm.sh
+++ b/tests/scripts/task_config_build_wasm.sh
@@ -29,7 +29,6 @@ echo set\(USE_MICRO_STANDALONE_RUNTIME ON\) >> config.cmake
 echo set\(USE_STANDALONE_CRT ON\) >> config.cmake
 echo set\(USE_GRAPH_RUNTIME_DEBUG ON\) >> config.cmake
 echo set\(USE_VM_PROFILER ON\) >> config.cmake
-echo set\(USE_EXAMPLE_EXT_RUNTIME ON\) >> config.cmake
 echo set\(USE_LLVM llvm-config-11\) >> config.cmake
 echo set\(USE_ANTLR ON\) >> config.cmake
 echo set\(CMAKE_CXX_COMPILER g++\) >> config.cmake
-- 
2.7.4