use packed func macro for external codegen (#4710)
authorZhi <5145158+zhiics@users.noreply.github.com>
Wed, 15 Jan 2020 03:35:56 +0000 (19:35 -0800)
committerYizhi Liu <liuyizhi@apache.org>
Wed, 15 Jan 2020 03:35:56 +0000 (19:35 -0800)
python/tvm/_ffi/libinfo.py
src/relay/backend/contrib/codegen_c/codegen.cc
src/relay/backend/contrib/codegen_c/codegen_c.h
src/relay/backend/contrib/dnnl/codegen.cc
tests/python/relay/test_external_runtime.py

index b7b0c8f..cea34d3 100644 (file)
@@ -179,13 +179,20 @@ def find_include_path(name=None, search_path=None, optional=False):
         else:
             tvm_include_path = [os.path.join(p, name) for p in header_path]
         dlpack_include_path = []
+        dmlc_include_path = []
     else:
         tvm_include_path = [os.path.join(p, 'include') for p in header_path]
-        dlpack_include_path = [os.path.join(p, 'dlpack/include') for p in header_path]
+        dlpack_include_path = [os.path.join(p, 'dlpack/include') for p in
+                               header_path]
+        dmlc_include_path = [os.path.join(p, 'dmlc-core/include') for p in
+                             header_path]
 
         # try to find include path
         include_found = [p for p in tvm_include_path if os.path.exists(p) and os.path.isdir(p)]
-        include_found += [p for p in dlpack_include_path if os.path.exists(p) and os.path.isdir(p)]
+        include_found += [p for p in dlpack_include_path if os.path.exists(p)
+                          and os.path.isdir(p)]
+        include_found += [p for p in dmlc_include_path if os.path.exists(p)
+                          and os.path.isdir(p)]
 
     if not include_found:
         message = ('Cannot find the files.\n' +
index 0504b2e..0c9827f 100644 (file)
@@ -154,12 +154,9 @@ class CSourceCodegen : public CSourceModuleCodegenBase {
 
   runtime::Module CreateCSourceModule(const ObjectRef& ref) override {
     // Create headers
-    code_stream_ << "#include <cstdint>\n";
-    code_stream_ << "#include <iostream>\n";
-    code_stream_ << "#include <cstdlib>\n";
-    code_stream_ << "#include <stdio.h>\n";
     code_stream_ << "#include <cstring>\n";
     code_stream_ << "#include <tvm/runtime/c_runtime_api.h>\n";
+    code_stream_ << "#include <tvm/runtime/packed_func.h>\n";
     code_stream_ << "#include <dlpack/dlpack.h>\n";
 
     // Append some common macro for operator definition.
index f6fb222..c465139 100644 (file)
@@ -99,63 +99,52 @@ class CodegenCBase {
    * \code
    *
    * // An example code for the generated C function.
-   * extern "C" void foo(TVMValue* value, int* type_code, int nargs) {
-   *   if (nargs != 3) {
-   *     printf("foo expects 3 args, but received %d\n", nargs);
-   *     return 1;
-   *   }
-   *
-   *   DLTensor* arg0 = static_cast<DLTensor*>(value[0].v_handle);
-   *   DLTensor* arg1 = static_cast<DLTensor*>(value[1].v_handle);
-   *   DLTensor* out = static_cast<DLTensor*>(value[2].v_handle);
-   *
+   * extern "C" void foo_wrapper_(DLTensor* arg0,
+   *                              DLTensor* arg1,
+   *                              DLTensor* out) {
    *   foo_(static_cast<float*>(arg0->data),
    *        static_cast<float*>(arg1->data),
    *        static_cast<float*>(out->data));
    *   return 0;
    * }
    *
+   * TVM_DLL_EXPORT_TYPED_FUNC(foo, foo_wrapper_);
+   *
    * \endcode
    */
   void GenerateBackendCFunc(const std::string& func_name, int arg_cnt) {
     // Print signature
     code_stream_ << "\n";
-    code_stream_ << "extern \"C\" int " << func_name;
-    code_stream_ << "(TVMValue* value, int* type_code, int nargs) {\n";
-    EnterScope();
-    // Print guard
-    PrintIndents();
-    code_stream_ << "if (nargs != " << arg_cnt << "){\n";
+    code_stream_ << "extern \"C\" int " << func_name << "_wrapper_(";
+    for (int i = 0; i < arg_cnt - 1; i++) {
+      code_stream_ << "DLTensor* arg" << i << ",\n";
+      code_stream_ << "\t";
+    }
+    if (arg_cnt > 0) {
+      code_stream_ << "DLTensor* arg" << arg_cnt - 1 << ") {\n";
+    }
+
     EnterScope();
-    PrintIndents();
-    code_stream_ << "printf(\"" << func_name << " expects " << arg_cnt
-                 << " arguments, but received %d\\n\", nargs);\n";
-    PrintIndents();
-    code_stream_ << "return 1;\n";
-    ExitScope();
-    PrintIndents();
-    code_stream_ << "}\n";
 
-    // According to TVM's calling convention, the last one is output.
-    for (int i = 0; i < arg_cnt; i++) {
-      PrintIndents();
-      code_stream_ << "DLTensor* arg" << i << " = "
-                   << "static_cast<DLTensor*>(value[" << i << "].v_handle);\n";
-    }
-    // Generate the call.
+    // Generate the internal call.
     PrintIndents();
     code_stream_ << func_name << "_(";
     for (int i = 0; i < arg_cnt - 1; i++) {
-      code_stream_ << "static_cast<float*>(arg" << i << "->data), ";
+      code_stream_ << "static_cast<float*>(arg" << i << "->data),\n";
+      PrintIndents();
     }
     if (arg_cnt > 0) {
       code_stream_ << "static_cast<float*>(arg" << arg_cnt - 1 << "->data)";
     }
-    code_stream_ << ");\n\n";
+    code_stream_ << ");\n";
     PrintIndents();
     code_stream_ << "return 0;\n";
     ExitScope();
-    code_stream_ << "}";
+    code_stream_ << "}\n\n";
+
+    // Generate the macro
+    code_stream_ << "TVM_DLL_EXPORT_TYPED_FUNC(" << func_name << ", "
+                 << func_name << "_wrapper_);\n\n";
   }
 
   /*!
index fbe047d..759a442 100644 (file)
@@ -260,6 +260,7 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase {
     code_stream_ << "#include <cstdlib>\n";
     code_stream_ << "#include <cstring>\n";
     code_stream_ << "#include <tvm/runtime/c_runtime_api.h>\n";
+    code_stream_ << "#include <tvm/runtime/packed_func.h>\n";
     code_stream_ << "#include <dlpack/dlpack.h>\n";
     // dnnl_kernel file is saved under src/runtime/contrib/dnnl so that we don't
     // expose it to ordinary users. To make export_library use it, users need to
index 802ee6c..f25322c 100644 (file)
@@ -33,6 +33,7 @@ def generate_csource_module():
 
     code = r'''
     #include <tvm/runtime/c_runtime_api.h>
+    #include <tvm/runtime/packed_func.h>
     #include <dlpack/dlpack.h>
     #include <cstdint>
     #include <cstring>
@@ -69,22 +70,17 @@ def generate_csource_module():
       free(buf_1);
     }
 
-    extern "C" int json_rt_1(TVMValue* value, int* type_code, int nargs) {
-      if (nargs != 5) {
-        printf("Expect 5 args, but get %d", nargs);
-        return 1;
-      }
-      DLTensor* arg0 = static_cast<DLTensor*>(value[0].v_handle);
-      DLTensor* arg1 = static_cast<DLTensor*>(value[1].v_handle);
-      DLTensor* arg2 = static_cast<DLTensor*>(value[2].v_handle);
-      DLTensor* arg3 = static_cast<DLTensor*>(value[3].v_handle);
-      DLTensor* out = static_cast<DLTensor*>(value[4].v_handle);
-      gcc_1_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
-             static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
-             static_cast<float*>(out->data));
-      return 0;
+    extern "C" int ccompiler_wrapper_1_(DLTensor* arg0, DLTensor* arg1,
+                                        DLTensor* arg2, DLTensor* arg3,
+                                        DLTensor* out) {
+        gcc_1_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
+               static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
+               static_cast<float*>(out->data));
+        return 0;
     }
 
+    TVM_DLL_EXPORT_TYPED_FUNC(json_rt_1, ccompiler_wrapper_1_);
+
     GCC_BINARY_OP_2D(gcc_0_0, *, 10, 10);
     GCC_BINARY_OP_2D(gcc_0_1, -, 10, 10);
     GCC_BINARY_OP_2D(gcc_0_2, +, 10, 10);
@@ -100,21 +96,17 @@ def generate_csource_module():
       free(buf_1);
     }
 
-    extern "C" int json_rt_0(TVMValue* value, int* type_code, int nargs) {
-      if (nargs != 5) {
-        printf("Expect 5 args, but get %d", nargs);
-        return 1;
-      }
-      DLTensor* arg0 = static_cast<DLTensor*>(value[0].v_handle);
-      DLTensor* arg1 = static_cast<DLTensor*>(value[1].v_handle);
-      DLTensor* arg2 = static_cast<DLTensor*>(value[2].v_handle);
-      DLTensor* arg3 = static_cast<DLTensor*>(value[3].v_handle);
-      DLTensor* out = static_cast<DLTensor*>(value[4].v_handle);
-      gcc_0_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
-             static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
-             static_cast<float*>(out->data));
-      return 0;
+    extern "C" int ccompiler_wrapper_0_(DLTensor* arg0, DLTensor* arg1,
+                                        DLTensor* arg2, DLTensor* arg3,
+                                        DLTensor* out) {
+        gcc_0_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
+               static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
+               static_cast<float*>(out->data));
+        return 0;
     }
+
+    TVM_DLL_EXPORT_TYPED_FUNC(json_rt_0, ccompiler_wrapper_0_);
+
     '''
     csource_module = _tvm_module.csource_module_create(code, "cc")
     return csource_module
@@ -128,11 +120,12 @@ def generate_engine_module():
 
     code = r'''
     #include <tvm/runtime/c_runtime_api.h>
+    #include <tvm/runtime/packed_func.h>
     #include <dlpack/dlpack.h>
-    #include "gcc_engine.h"
+    #include "json_engine.h"
 
-    extern "C" void gcc_1_(float* gcc_input4, float* gcc_input5,
-            float* gcc_input6, float* gcc_input7, float* out) {
+    extern "C" void json_1_(float* json_input4, float* json_input5,
+                            float* json_input6, float* json_input7, float* out) {
             
         std::string graph =
             "add_2d,10,10\n"
@@ -140,28 +133,22 @@ def generate_engine_module():
             "mul_2d,10,10\n";
 
         Engine engine;
-        engine.run(graph, {gcc_input4, gcc_input5, gcc_input6, gcc_input7}, out);
+        engine.run(graph, {json_input4, json_input5, json_input6, json_input7}, out);
     }
 
-
-    extern "C" int json_rt_1(TVMValue* value, int* type_code, int nargs) {
-        if (nargs != 5) {
-            printf("Expect 5 args, but get %d", nargs);
-            return 1;
-        }
-        DLTensor* arg0 = static_cast<DLTensor*>(value[0].v_handle);
-        DLTensor* arg1 = static_cast<DLTensor*>(value[1].v_handle);
-        DLTensor* arg2 = static_cast<DLTensor*>(value[2].v_handle);
-        DLTensor* arg3 = static_cast<DLTensor*>(value[3].v_handle);
-        DLTensor* out = static_cast<DLTensor*>(value[4].v_handle);
-        gcc_1_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
+    extern "C" int json_wrapper_1_(DLTensor* arg0, DLTensor* arg1,
+                                   DLTensor* arg2, DLTensor* arg3,
+                                   DLTensor* out) {
+        json_1_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
                 static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
                 static_cast<float*>(out->data));
         return 0;
     }
 
-    extern "C" void gcc_0_(float* gcc_input0, float* gcc_input1,
-            float* gcc_input2, float* gcc_input3, float* out) {
+    TVM_DLL_EXPORT_TYPED_FUNC(json_rt_1, json_wrapper_1_);
+
+    extern "C" void json_0_(float* json_input0, float* json_input1,
+                            float* json_input2, float* json_input3, float* out) {
             
         std::string graph =
             "add_2d,10,10\n"
@@ -169,40 +156,36 @@ def generate_engine_module():
             "mul_2d,10,10\n";
 
         Engine engine;
-        engine.run(graph, {gcc_input0, gcc_input1, gcc_input2, gcc_input3}, out);
+        engine.run(graph, {json_input0, json_input1, json_input2, json_input3}, out);
 
     }
 
-    extern "C" int json_rt_0(TVMValue* value, int* type_code, int nargs) {
-        if (nargs != 5) {
-            printf("Expect 5 args, but get %d", nargs);
-            return 1;
-        }
-        DLTensor* arg0 = static_cast<DLTensor*>(value[0].v_handle);
-        DLTensor* arg1 = static_cast<DLTensor*>(value[1].v_handle);
-        DLTensor* arg2 = static_cast<DLTensor*>(value[2].v_handle);
-        DLTensor* arg3 = static_cast<DLTensor*>(value[3].v_handle);
-        DLTensor* out = static_cast<DLTensor*>(value[4].v_handle);
-        gcc_0_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
+    extern "C" int json_wrapper_0_(DLTensor* arg0, DLTensor* arg1,
+                                   DLTensor* arg2, DLTensor* arg3,
+                                   DLTensor* out) {
+        json_0_(static_cast<float*>(arg0->data), static_cast<float*>(arg1->data),
                 static_cast<float*>(arg2->data), static_cast<float*>(arg3->data),
                 static_cast<float*>(out->data));
         return 0;
     }
+
+    TVM_DLL_EXPORT_TYPED_FUNC(json_rt_0, json_wrapper_0_);
+
     '''
 
-    gen_gcc_engine()
+    gen_json_engine()
     csource_module = _tvm_module.csource_module_create(code, "cc")
     return csource_module
 
 
-def gen_gcc_engine():
+def gen_json_engine():
     """An example of external backend runtime engine. This is supposed to be provided
       by third-party vendors and included when building the generated external kernel code.
     """
 
     code = r'''
-    #ifndef _GCC_ENGINE_H_
-    #define _GCC_ENGINE_H_
+    #ifndef _JSON_ENGINE_H_
+    #define _JSON_ENGINE_H_
     #include <cstdint>
     #include <string>
     #include <sstream>
@@ -298,9 +281,9 @@ def gen_gcc_engine():
         std::vector<float*> buffers;
     };
 
-    #endif
+    #endif  // _JSON_ENGINE_H_
     '''
-    header_file = tmp_path.relpath("gcc_engine.h")
+    header_file = tmp_path.relpath("json_engine.h")
     with open(header_file, 'w') as f:
         f.write(code)