[uTVM][Runtime] Introduce Virtual Memory Allocator to CRT (#5124)

author Liangfu Chen <liangfu.chen@icloud.com>

Tue, 7 Apr 2020 21:33:05 +0000 (05:33 +0800)

committer GitHub <noreply@github.com>

Tue, 7 Apr 2020 21:33:05 +0000 (14:33 -0700)
author Liangfu Chen <liangfu.chen@icloud.com>
Tue, 7 Apr 2020 21:33:05 +0000 (05:33 +0800)
committer GitHub <noreply@github.com>
Tue, 7 Apr 2020 21:33:05 +0000 (14:33 -0700)
diff --git a/apps/bundle_deploy/Makefile b/apps/bundle_deploy/Makefile

index c80765f..73f9d75 100644 (file)
--- a/apps/bundle_deploy/Makefile
+++ b/apps/bundle_deploy/Makefile
@@ -20,11 +20,11 @@
  # Setup build environment
  TVM_ROOT=$(shell cd ../..; pwd)
  DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core
-PKG_CXXFLAGS = -std=c++14 -O2 -fPIC \
+PKG_CXXFLAGS = -Wall -std=c++14 -O2 -fPIC \
         -I${TVM_ROOT}/include \
         -I${DMLC_CORE}/include \
         -I${TVM_ROOT}/3rdparty/dlpack/include
-PKG_CFLAGS = -std=c99 -O2 -fPIC \
+PKG_CFLAGS = -Wall -std=c99 -O2 -fPIC \
         -I${TVM_ROOT}/include \
         -I${DMLC_CORE}/include \
         -I${TVM_ROOT}/3rdparty/dlpack/include
@@ -57,11 +57,11 @@ $(build_dir)/test_dynamic: test.cc ${build_dir}/test_graph.json ${build_dir}/tes
  
  $(build_dir)/demo_static: demo_static.c ${build_dir}/bundle_static.o ${build_dir}/model.o ${build_dir}/graph.json.c ${build_dir}/params.bin.c
         @mkdir -p $(@D)
-       gcc $(PKG_CXXFLAGS) -o $@ demo_static.c ${build_dir}/bundle_static.o ${build_dir}/model.o -lm
+       gcc $(PKG_CFLAGS) -o $@ demo_static.c ${build_dir}/bundle_static.o ${build_dir}/model.o -lm
  
  $(build_dir)/test_static: test_static.c ${build_dir}/bundle_static.o ${build_dir}/test_model.o
         @mkdir -p $(@D)
-       gcc $(PKG_CXXFLAGS) -o $@ $^
+       gcc $(PKG_CFLAGS) -o $@ $^
  
  # Serialize our graph.json file.
  $(build_dir)/graph.json.c: $(build_dir)/graph.json
@@ -71,14 +71,6 @@ $(build_dir)/graph.json.c: $(build_dir)/graph.json
  $(build_dir)/params.bin.c: $(build_dir)/params.bin
         xxd -i $^  > $@
  
-# # Serialize our test_graph.json file.
-# $(build_dir)/test_graph.json.c: $(build_dir)/test_graph.json
-#      xxd -i $^  > $@
-# 
-# # Serialize our test_params.bin file.
-# $(build_dir)/test_params.bin.c: $(build_dir)/test_params.bin
-#      xxd -i $^  > $@
-
  $(build_dir)/model.o $(build_dir)/graph.json $(build_dir)/params.bin $(build_dir)/cat.bin: build_model.py
         python3 $< -o $(build_dir)
  
diff --git a/apps/bundle_deploy/demo.cc b/apps/bundle_deploy/demo.cc

index 34be279..0de10d7 100644 (file)
--- a/apps/bundle_deploy/demo.cc
+++ b/apps/bundle_deploy/demo.cc
@@ -109,11 +109,11 @@ int main(int argc, char **argv) {
           max_index, max_iter);
    printf("timing: %.2f ms (create), %.2f ms (set_input), %.2f ms (run), "
           "%.2f ms (get_output), %.2f ms (destroy)\n",
-         (t1.tv_sec-t0.tv_sec)*1000000 + (t1.tv_usec-t0.tv_usec)/1000.f,
-         (t2.tv_sec-t1.tv_sec)*1000000 + (t2.tv_usec-t1.tv_usec)/1000.f,
-         (t3.tv_sec-t2.tv_sec)*1000000 + (t3.tv_usec-t2.tv_usec)/1000.f,
-         (t4.tv_sec-t3.tv_sec)*1000000 + (t4.tv_usec-t3.tv_usec)/1000.f,
-         (t5.tv_sec-t4.tv_sec)*1000000 + (t5.tv_usec-t4.tv_usec)/1000.f);
+         (t1.tv_sec-t0.tv_sec)*1000.0f + (t1.tv_usec-t0.tv_usec)/1000.f,
+         (t2.tv_sec-t1.tv_sec)*1000.0f + (t2.tv_usec-t1.tv_usec)/1000.f,
+         (t3.tv_sec-t2.tv_sec)*1000.0f + (t3.tv_usec-t2.tv_usec)/1000.f,
+         (t4.tv_sec-t3.tv_sec)*1000.0f + (t4.tv_usec-t3.tv_usec)/1000.f,
+         (t5.tv_sec-t4.tv_sec)*1000.0f + (t5.tv_usec-t4.tv_usec)/1000.f);
    dlclose(bundle);
    
    return 0;
diff --git a/apps/bundle_deploy/runtime.c b/apps/bundle_deploy/runtime.c

index 6a53aa1..a7ffea9 100644 (file)
--- a/apps/bundle_deploy/runtime.c
+++ b/apps/bundle_deploy/runtime.c
@@ -30,23 +30,37 @@
  #define TVM_CRT_MAX_NDIM 6
  /*! Maximum supported arguments in generated functions */
  #define TVM_CRT_MAX_ARGS 10
+/*! Maximum supported string length in dltype, e.g. "int8", "int16", "float32" */
+#define TVM_CRT_STRLEN_DLTYPE 10
+/*! Maximum supported string length in function names */
+#define TVM_CRT_STRLEN_NAME 80
  
-/*! Maximum inputs in a GraphRuntimeNode */
-#define GRAPH_RUNTIME_NODE_MAX_INPUTS 300
-/*! Maximum supported contexts in a GraphRuntime */
-#define GRAPH_RUNTIME_MAX_CONTEXTS 1
-/*! Maximum supported nodes in a GraphRuntime */
-#define GRAPH_RUNTIME_MAX_NODES 400
-/*! Maximum input nodes in a GraphRuntime */
-#define GRAPH_RUNTIME_MAX_INPUT_NODES 300
-/*! Maximum nodes in a GraphRuntime for quick entry indexing */
-#define GRAPH_RUNTIME_MAX_NODE_ROW_PTR 300
-/*! Maximum output entries in a GraphRuntime */
-#define GRAPH_RUNTIME_MAX_OUTPUTS 300
+/*!
+ * \brief Log memory pool size for virtual memory allocation
+ *
+ * Here is a list of possible choices:
+ * * use 16 for 64 KiB memory space
+ * * use 17 for 128 KiB memory space
+ * * use 18 for 256 KiB memory space
+ * * use 19 for 512 KiB memory space
+ * * use 20 for 1 MiB memory space
+ * * use 21 for 2 MiB memory space
+ * * use 22 for 4 MiB memory space
+ * * use 23 for 8 MiB memory space
+ * * use 24 for 16 MiB memory space
+ * * use 25 for 32 MiB memory space
+ * * use 26 for 64 MiB memory space
+ * * use 27 for 128 MiB memory space
+ * * use 28 for 256 MiB memory space
+ */
+#define TVM_CRT_LOG_VIRT_MEM_SIZE 24
+
+/*! \brief Page size for virtual memory allocation */
+#define TVM_CRT_PAGE_BYTES 4096
  
  #include "../../src/runtime/crt/crt_runtime_api.c"
  #include "../../src/runtime/crt/crt_backend_api.c"
  #include "../../src/runtime/crt/graph_runtime.c"
  #include "../../src/runtime/crt/load_json.c"
  #include "../../src/runtime/crt/ndarray.c"
-
+#include "../../src/runtime/crt/memory.c"
diff --git a/apps/bundle_deploy/test.cc b/apps/bundle_deploy/test.cc

index 643f1ad..c92400d 100644 (file)
--- a/apps/bundle_deploy/test.cc
+++ b/apps/bundle_deploy/test.cc
@@ -122,11 +122,11 @@ int main(int argc, char **argv) {
  
    printf("timing: %.2f ms (create), %.2f ms (set_input), %.2f ms (run), "
           "%.2f ms (get_output), %.2f ms (destroy)\n",
-         (t1.tv_sec-t0.tv_sec)*1000000 + (t1.tv_usec-t0.tv_usec)/1000.f,
-         (t2.tv_sec-t1.tv_sec)*1000000 + (t2.tv_usec-t1.tv_usec)/1000.f,
-         (t3.tv_sec-t2.tv_sec)*1000000 + (t3.tv_usec-t2.tv_usec)/1000.f,
-         (t4.tv_sec-t3.tv_sec)*1000000 + (t4.tv_usec-t3.tv_usec)/1000.f,
-         (t5.tv_sec-t4.tv_sec)*1000000 + (t5.tv_usec-t4.tv_usec)/1000.f);
+         (t1.tv_sec-t0.tv_sec)*1000.0f + (t1.tv_usec-t0.tv_usec)/1000.f,
+         (t2.tv_sec-t1.tv_sec)*1000.0f + (t2.tv_usec-t1.tv_usec)/1000.f,
+         (t3.tv_sec-t2.tv_sec)*1000.0f + (t3.tv_usec-t2.tv_usec)/1000.f,
+         (t4.tv_sec-t3.tv_sec)*1000.0f + (t4.tv_usec-t3.tv_usec)/1000.f,
+         (t5.tv_sec-t4.tv_sec)*1000.0f + (t5.tv_usec-t4.tv_usec)/1000.f);
  
    free(json_data);
    free(params_data);
diff --git a/include/tvm/runtime/crt/memory.h b/include/tvm/runtime/crt/memory.h

new file mode 100644 (file)

index 0000000..3e47060
--- /dev/null
+++ b/include/tvm/runtime/crt/memory.h
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file tvm/runtime/crt/memory.h
+ * \brief The virtual memory manager for micro-controllers
+ */
+
+#ifndef TVM_RUNTIME_CRT_MEMORY_H_
+#define TVM_RUNTIME_CRT_MEMORY_H_
+
+static int vleak_size = 0;
+
+/*!
+ * \brief Allocate memory from manager
+ * \param size The size of memory
+ * \return The virtual address
+ */
+void * vmalloc(size_t size);
+
+/*!
+ * \brief Reallocate memory from manager
+ * \param ptr The pointer to the memory area to be reallocated
+ * \param size The size of memory
+ * \return The virtual address
+ */
+void * vrealloc(void * ptr, size_t size);
+
+/*!
+ * \brief Free the memory.
+ * \param ptr The pointer to the memory to deallocate
+ * \return The virtual address
+ */
+void vfree(void * ptr);
+
+#endif  // TVM_RUNTIME_CRT_MEMORY_H_
diff --git a/src/runtime/crt/crt_backend_api.c b/src/runtime/crt/crt_backend_api.c

index e011e47..52cefaf 100644 (file)
--- a/src/runtime/crt/crt_backend_api.c
+++ b/src/runtime/crt/crt_backend_api.c
@@ -18,6 +18,7 @@
   */
  
  #include <tvm/runtime/c_backend_api.h>
+#include <tvm/runtime/crt/memory.h>
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -29,18 +30,12 @@ void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t nbytes,
    void* ptr = 0;
    assert(nbytes > 0);
    unsigned int dtype_bytes = dtype_bits_hint / 8;
-#ifdef __ANDROID__
-  ptr = memalign(64, nbytes * dtype_bytes);
-#else
-  const int ret = posix_memalign(&ptr, 64, nbytes * dtype_bytes);
-  (void)ret;
-  assert(ret == 0);
-#endif
+  ptr = vmalloc(nbytes * dtype_bytes);
    return ptr;
  }
  
  int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) {
-  free(ptr);
+  vfree(ptr);
    return 0;
  }
  
@@ -52,6 +47,7 @@ int TVMBackendParallelLaunch(FTVMParallelLambda flambda, void* cdata, int num_ta
  }
  
  int TVMBackendRegisterSystemLibSymbol(const char* name, void* ptr) {
+  g_fexecs = vrealloc(g_fexecs, sizeof(TVMPackedFunc) * (g_fexecs_count + 1));
    snprintf(g_fexecs[g_fexecs_count].name, sizeof(g_fexecs[g_fexecs_count].name), name);
    g_fexecs[g_fexecs_count].fexec = ptr;
    g_fexecs_count++;
diff --git a/src/runtime/crt/graph_runtime.c b/src/runtime/crt/graph_runtime.c

index 89c325a..b5ed3b7 100644 (file)
--- a/src/runtime/crt/graph_runtime.c
+++ b/src/runtime/crt/graph_runtime.c
@@ -21,6 +21,10 @@
   * \file graph_runtime.c
   * \brief implement graph runtime in pure C
   */
+
+#include <tvm/runtime/crt/memory.h>
+
+#include "logging.h"
  #include "graph_runtime.h"
  
  #ifndef MAX
@@ -105,13 +109,9 @@ int TVMGraphRuntimeNode_Load(TVMGraphRuntimeNode * node, JSONReader *reader) {
        bitmask |= 2;
      } else if (!strcmp(key, "inputs")) {
        size_t count = node->inputs_count;
-      if (count >= GRAPH_RUNTIME_NODE_MAX_INPUTS) {
-        fprintf(stderr, "The number of inputs in graph runtime node is greater than expected.\n");
-        status = -1;
-        break;
-      }
        reader->BeginArray(reader);
        while (reader->NextArrayItem(reader)) {
+        node->inputs = vrealloc(node->inputs, sizeof(TVMGraphRuntimeNodeEntry)*(count+1));
          TVMGraphRuntimeNodeEntry * inputs = node->inputs + count;
          reader->BeginArray(reader);
          if (!reader->NextArrayItem(reader)) {
@@ -169,6 +169,14 @@ TVMGraphRuntimeNode TVMGraphRuntimeNodeCreate() {
    return node;
  }
  
+void TVMGraphRuntimeNodeRelease(TVMGraphRuntimeNode * node) {
+  if (!node) { return; }
+  if (node->inputs) {
+    vfree(node->inputs);
+    node->inputs = 0;
+  }
+}
+
  int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *reader) {
    int status = 0;
    int bitmask = 0;
@@ -199,7 +207,8 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r
        }
        reader->BeginArray(reader);
        while (reader->NextArrayItem(reader)) {
-        reader->ReadString(reader, attr->dltype[dltype_count]);
+        attr->dltype = vrealloc(attr->dltype, TVM_CRT_STRLEN_DLTYPE * (dltype_count + 1));
+        reader->ReadString(reader, attr->dltype + dltype_count * TVM_CRT_STRLEN_DLTYPE);
          dltype_count++;
        }
        attr->dltype_count = dltype_count;;
@@ -229,6 +238,7 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r
        }
        reader->BeginArray(reader);
        while (reader->NextArrayItem(reader)) {
+        attr->storage_id = vrealloc(attr->storage_id, sizeof(uint32_t)*(storage_id_count+1));
          reader->ReadUnsignedInteger(reader, &(attr->storage_id[storage_id_count]));
          storage_id_count++;
        }
@@ -258,26 +268,24 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r
        }
        reader->BeginArray(reader);
        while (reader->NextArrayItem(reader)) {
+        attr->shape =
+          vrealloc(attr->shape, sizeof(attr->shape[0])*(shape_count+1)*TVM_CRT_MAX_NDIM);
+        attr->ndim = vrealloc(attr->ndim, sizeof(attr->ndim[0])*(shape_count+1));
          reader->BeginArray(reader);
-        reader->ReadInteger(reader, &(attr->shape[shape_count][0]));
+        int64_t * attr_shape_ptr = attr->shape + shape_count*TVM_CRT_MAX_NDIM;
+        reader->ReadInteger(reader, attr_shape_ptr + 0);
          uint32_t ndim = 1;
          if (reader->NextArrayItem(reader)) {
-          if (reader->NextArrayItem(reader)) {
-            reader->ReadInteger(reader, &(attr->shape[shape_count][1])); ndim++;
+          for (ndim = 1; ndim < TVM_CRT_MAX_NDIM; ndim++) {
              if (reader->NextArrayItem(reader)) {
-              reader->ReadInteger(reader, &(attr->shape[shape_count][2])); ndim++;
-              if (reader->NextArrayItem(reader)) {
-                reader->ReadInteger(reader, &(attr->shape[shape_count][3])); ndim++;
-                if (reader->NextArrayItem(reader)) {
-                  reader->ReadInteger(reader, &(attr->shape[shape_count][4])); ndim++;
-                  if (reader->NextArrayItem(reader)) {
-                    reader->ReadInteger(reader, &(attr->shape[shape_count][5])); ndim++;
-                    reader->NextArrayItem(reader);
-                  }
-                }
-              }
+              reader->ReadInteger(reader, attr_shape_ptr + ndim);
+            } else {
+              break;
              }
            }
+          if (ndim == TVM_CRT_MAX_NDIM) {
+            reader->NextArrayItem(reader);
+          }
          }
          attr->ndim[shape_count] = ndim;
          shape_count++;
@@ -308,6 +316,7 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r
          break;
        }
        while (reader->NextArrayItem(reader)) {
+        attr->device_index = vrealloc(attr->device_index, sizeof(uint32_t)*(device_index_count+1));
          reader->ReadUnsignedInteger(reader, &(attr->device_index[device_index_count]));
          device_index_count++;
        }
@@ -330,13 +339,18 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r
            status = -1;
            break;
          }
-        uint32_t temp[GRAPH_RUNTIME_MAX_NODES];
+        uint32_t * temp = 0;
          uint32_t temp_count = 0;
          reader->BeginArray(reader);
          while (reader->NextArrayItem(reader)) {
+          temp = vrealloc(temp, sizeof(uint32_t) * (temp_count + 1));
            reader->ReadUnsignedInteger(reader, &(temp[temp_count]));
            temp_count++;
          }
+        if (temp) {
+          vfree(temp);
+          temp = 0;
+        }
        } else if (!strcmp(type, "size_t")) {
          if (!(reader->NextArrayItem(reader))) {
            fprintf(stderr, "Invalid json format\n");
@@ -364,6 +378,30 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr * attr, JSONReader *r
    return status;
  }
  
+void TVMGraphRuntimeGraphAttr_Release(TVMGraphRuntimeGraphAttr * attr) {
+  if (!attr) { return; }
+  if (attr->storage_id) {
+    vfree(attr->storage_id);
+    attr->storage_id = 0;
+  }
+  if (attr->device_index) {
+    vfree(attr->device_index);
+    attr->device_index = 0;
+  }
+  if (attr->dltype) {
+    vfree(attr->dltype);
+    attr->dltype = 0;
+  }
+  if (attr->shape) {
+    vfree(attr->shape);
+    attr->shape = 0;
+  }
+  if (attr->ndim) {
+    vfree(attr->ndim);
+    attr->ndim = 0;
+  }
+}
+
  int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) {
      int status = 0;
      reader->BeginObject(reader);
@@ -373,6 +411,8 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) {
        if (!strcmp(key, "nodes")) {
          reader->BeginArray(reader);
          while (reader->NextArrayItem(reader)) {
+          runtime->nodes =
+            vrealloc(runtime->nodes, sizeof(TVMGraphRuntimeNode) * (runtime->nodes_count + 1));
            TVMGraphRuntimeNode * node = runtime->nodes + runtime->nodes_count;
            status = TVMGraphRuntimeNode_Load(node, reader);
            if (status != 0) {
@@ -380,7 +420,7 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) {
              break;
  #if TVM_CRT_DEBUG
            } else {
-            printf("layer %u: `%s` loaded.\n", runtime->nodes_count, node->name);
+            printf("loading: node (%u) %s loaded.\n", runtime->nodes_count, node->name);
  #endif  // TVM_CRT_DEBUG
            }
            runtime->nodes_count++;
@@ -389,6 +429,8 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) {
        } else if (!strcmp(key, "arg_nodes")) {
          reader->BeginArray(reader);
          while (reader->NextArrayItem(reader)) {
+          runtime->input_nodes =
+            vrealloc(runtime->input_nodes, sizeof(uint32_t) * (runtime->input_nodes_count + 1));
            uint32_t * node = runtime->input_nodes + runtime->input_nodes_count;
            reader->ReadUnsignedInteger(reader, node);
            runtime->input_nodes_count++;
@@ -397,6 +439,8 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) {
        } else if (!strcmp(key, "node_row_ptr")) {
          reader->BeginArray(reader);
          while (reader->NextArrayItem(reader)) {
+          runtime->node_row_ptr =
+            vrealloc(runtime->node_row_ptr, sizeof(uint32_t) * (runtime->node_row_ptr_count + 1));
            uint32_t count = runtime->node_row_ptr_count;
            uint32_t * node = runtime->node_row_ptr + count;
            reader->ReadUnsignedInteger(reader, node);
@@ -406,6 +450,9 @@ int TVMGraphRuntime_Load(TVMGraphRuntime * runtime, JSONReader *reader) {
        } else if (!strcmp(key, "heads")) {
          reader->BeginArray(reader);
          while (reader->NextArrayItem(reader)) {
+          runtime->outputs =
+            vrealloc(runtime->outputs,
+                     sizeof(TVMGraphRuntimeNodeEntry) * (runtime->outputs_count + 1));
            TVMGraphRuntimeNodeEntry * entry = runtime->outputs + runtime->outputs_count;
            status = NodeEntry_Load(entry, reader);
            if (status != 0) {
@@ -458,9 +505,7 @@ int TVMGraphRuntime_GetInputIndex(TVMGraphRuntime * runtime, const char * name)
        break;
      }
    }
-  if (rv < 0) {
-    fprintf(stderr, "cannot find \"%s\" among input\n", name);
-  }
+  CHECK_GE(rv, 0, "cannot find '%s' among input.", name);
    return rv;
  }
  
@@ -476,7 +521,7 @@ void TVMGraphRuntime_SetInput(TVMGraphRuntime * runtime, const char * name, DLTe
      fprintf(stderr, "given index is greater than num of input nodes.\n");
    }
    uint32_t eid = runtime->GetEntryId(runtime, runtime->input_nodes[index], 0);
-  runtime->data_entry[eid].dl_tensor = *data_in;
+  runtime->data_entry[eid].dl_tensor.data = data_in->data;
  }
  
  /*!
@@ -501,8 +546,8 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo
    bptr += sizeof(reserved);
  
    // read names
-  char names[GRAPH_RUNTIME_MAX_NODES][80];
-  memset(names, 0, sizeof(names));
+  char * names = vmalloc(TVM_CRT_STRLEN_NAME * runtime->nodes_count);
+  memset(names, 0, TVM_CRT_STRLEN_NAME * runtime->nodes_count);
    uint64_t names_count;
    int idx;
    names_count = ((uint64_t*)bptr)[0];  // NOLINT(*)
@@ -515,7 +560,7 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo
        fprintf(stderr, "Error: function name longer than expected.\n");
        status = -1;
      }
-    memcpy(names[idx], bptr, name_length);
+    memcpy(names + TVM_CRT_STRLEN_NAME * idx, bptr, name_length);
      bptr += name_length;
    }
  
@@ -530,11 +575,9 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo
    }
  
    for (idx = 0; idx < size; idx++) {
-    int32_t in_idx = runtime->GetInputIndex(runtime, names[idx]);
-    if (!(in_idx >= 0)) {
-      fprintf(stderr, "Found param for non-existent input: %s\n", names[idx]);
-      status = -1;
-    }
+    int32_t in_idx = runtime->GetInputIndex(runtime, names + TVM_CRT_STRLEN_NAME * idx);
+    CHECK_GT(in_idx, 0,
+             "Found param for non-existent input: %s\n", names + TVM_CRT_STRLEN_NAME * idx);
      uint32_t eid = runtime->GetEntryId(runtime, runtime->input_nodes[in_idx], 0);
      if (!(eid < runtime->data_entry_count)) {
        fprintf(stderr, "`entry_id`=%d is greater than expected(%d).\n",
@@ -542,15 +585,26 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime * runtime, const char * param_blo
        status = -1;
      }
  
+    if (runtime->data_entry[eid].dl_tensor.shape) {
+      vfree(runtime->data_entry[eid].dl_tensor.shape);
+      runtime->data_entry[eid].dl_tensor.shape = 0;
+    }
+    if (runtime->data_entry[eid].dl_tensor.data) {
+      vfree(runtime->data_entry[eid].dl_tensor.data);
+      runtime->data_entry[eid].dl_tensor.data = 0;
+    }
      status |= TVMNDArray_Load(&(runtime->data_entry[eid]), &bptr);
  #if TVM_CRT_DEBUG
      TVMNDArray * entry = &(runtime->data_entry[eid]);
-    printf("param %s loaded, in_idx=%d, eid=%d, ndim=%d, data[0]=%f\n",
-           names[idx], in_idx, eid, entry->dl_tensor.ndim,
+    printf("loading: param %s loaded, in_idx=%d, eid=%d, ndim=%d, data[0]=%f\n",
+           names + TVM_CRT_STRLEN_NAME * idx, in_idx, eid, entry->dl_tensor.ndim,
             ((float*)entry->dl_tensor.data)[0]);  // NOLINT(*)
  #endif  // TVM_CRT_DEBUG
    }
  
+  // Release memory
+  vfree(names);
+
    return status;
  }
  
@@ -564,7 +618,7 @@ void TVMGraphRuntime_Run(TVMGraphRuntime * runtime) {
    for (idx = 0; idx < runtime->op_execs_count; ++idx) {
      if (runtime->op_execs[idx].fexec) {
  #if TVM_CRT_DEBUG
-      printf("calling %s (%d)\n", runtime->op_execs[idx].name, idx);
+      printf("calling: %s (%d)\n", runtime->op_execs[idx].name, idx);
  #endif  // TVM_CRT_DEBUG
        runtime->op_execs[idx].Call(&(runtime->op_execs[idx]));
      }
@@ -581,33 +635,34 @@ int TVMGraphRuntime_GetOutput(TVMGraphRuntime * runtime, const int32_t idx, DLTe
    int32_t elem_bytes = out->dtype.bits / 8;
    int64_t size = Shape_Accumulate(out->shape, out->ndim);
    DLTensor * tensor = &(runtime->data_entry[eid].dl_tensor);
-  assert(out->ndim == tensor->ndim);
-  assert(out->dtype.bits == tensor->dtype.bits);
-  assert(Shape_Accumulate(out->shape, out->ndim) == Shape_Accumulate(tensor->shape, tensor->ndim));
+  CHECK(out->ndim == tensor->ndim);
+  CHECK(out->dtype.bits == tensor->dtype.bits);
+  CHECK(Shape_Accumulate(out->shape, out->ndim) == Shape_Accumulate(tensor->shape, tensor->ndim));
    memcpy(out->data, tensor->data, size * elem_bytes);
    return status;
  }
  
  void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) {
-  uint32_t idx, dim;
+  uint32_t idx;
  
    // Grab saved optimization plan from graph.
-  DLDataType vtype[GRAPH_RUNTIME_MAX_NODES];
    TVMGraphRuntimeGraphAttr * attrs = &(runtime->attrs);
+  DLDataType * vtype = vmalloc(sizeof(DLDataType) * attrs->dltype_count);
    for (idx = 0; idx < attrs->dltype_count; idx++) {
-    vtype[idx] = String2DLDataType(attrs->dltype[idx]);
+    vtype[idx] = String2DLDataType(attrs->dltype + idx * TVM_CRT_STRLEN_DLTYPE);
    }
  
    // Size and device type of each storage pool entry.
-  TVMGraphRuntimePoolEntry pool_entry[GRAPH_RUNTIME_MAX_NODES];
-  memset(pool_entry, 0, sizeof(pool_entry));
+  TVMGraphRuntimePoolEntry * pool_entry =
+    vmalloc(sizeof(TVMGraphRuntimePoolEntry) * runtime->nodes_count);
+  memset(pool_entry, 0, sizeof(TVMGraphRuntimePoolEntry) * runtime->nodes_count);
    uint32_t  pool_entry_count = 0;
    // Find the maximum space size.
    for (idx = 0; idx < attrs->shape_count; idx++) {
      int storage_id = attrs->storage_id[idx];
      // Use the fallback device if no device index is available.
      int device_type = runtime->ctxs[0].device_type;
-    uint32_t size = Shape_Accumulate(attrs->shape[idx], attrs->ndim[idx]);
+    uint32_t size = Shape_Accumulate(attrs->shape+idx*TVM_CRT_MAX_NDIM, attrs->ndim[idx]);
      DLDataType t = vtype[idx];
      uint32_t bits = t.bits * t.lanes;
      size_t bytes = ((bits + 7U) / 8U) * size;
@@ -622,15 +677,16 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) {
  
    // Allocate the space.
    for (idx = 0; idx < pool_entry_count; idx++) {
+    runtime->storage_pool =
+      vrealloc(runtime->storage_pool, sizeof(TVMNDArray) * (runtime->storage_pool_count + 1));
      TVMGraphRuntimePoolEntry pit = pool_entry[idx];
      int64_t shape[TVM_CRT_MAX_NDIM] = {0, };
      TVMContext ctx = runtime->ctxs[0];
      DLDataType dtype = {kDLFloat, 32, 1};
      shape[0] = (pit.size + 3) / 4;
      runtime->storage_pool[runtime->storage_pool_count] = TVMNDArray_Empty(1, shape, dtype, ctx);
-    if (runtime->storage_pool[runtime->storage_pool_count].dl_tensor.data == 0) {
-      fprintf(stderr, "fail to create storage_pool with idx=%d\n", idx);
-    }
+    CHECK_NE(runtime->storage_pool[runtime->storage_pool_count].dl_tensor.data, 0,
+             "fail to create storage_pool with idx=%d\n", idx);
      runtime->storage_pool_count++;
    }
  
@@ -638,26 +694,31 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime * runtime) {
    // memory assignment for each node entry. The allocated memory on each device
    // is mapped to this pool.
    runtime->data_entry_count = runtime->node_row_ptr[runtime->node_row_ptr_count - 1];
+  runtime->data_entry = vmalloc(sizeof(TVMNDArray) * runtime->data_entry_count);
    for (idx = 0; idx < runtime->data_entry_count; ++idx) {
      size_t storage_id = attrs->storage_id[idx];
-    assert(storage_id < runtime->storage_pool_count);
+    CHECK(storage_id < runtime->storage_pool_count);
      runtime->data_entry[idx] =
        TVMNDArray_CreateView(&(runtime->storage_pool[storage_id]),
-                         attrs->shape[idx], attrs->ndim[idx], vtype[idx]);
-    if (runtime->data_entry[idx].dl_tensor.data == 0) {
-      fprintf(stderr, "fail to create for node with idx=%d, storage_id=%d\n", idx, storage_id);
-    }
+                         attrs->shape+idx*TVM_CRT_MAX_NDIM, attrs->ndim[idx], vtype[idx]);
+    CHECK_NE(runtime->data_entry[idx].dl_tensor.data, 0,
+             "fail to create for node with idx=%d, storage_id=%lu\n", idx, storage_id);
    }
+
+  // Release memory
+  vfree(vtype);
+  vfree(pool_entry);
  }
  
  int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime * runtime) {
    int status = 0;
    uint32_t nid, idx;
    runtime->op_execs_count = runtime->nodes_count;
+  runtime->op_execs = vmalloc(sizeof(TVMPackedFunc) * runtime->op_execs_count);
    for (nid = 0; nid < runtime->nodes_count; nid++) {
      const TVMGraphRuntimeNode * inode = runtime->nodes + nid;
      if (strcmp(inode->op_type, "null")) {
-      DLTensorPtr args[GRAPH_RUNTIME_MAX_NODES];
+      DLTensorPtr args[TVM_CRT_MAX_ARGS];
        uint32_t args_count = 0;
        for (idx = 0; idx < inode->inputs_count; idx++) {
          const TVMGraphRuntimeNodeEntry * entry = inode->inputs + idx;
@@ -671,7 +732,7 @@ int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime * runtime) {
          args_count++;
        }
        if (strcmp(inode->op_type, "tvm_op")) {
-        fprintf(stderr, "Can only take tvm_op as op\n");
+        fprintf(stderr, "Can only take tvm_op as op, but \"%s\" is found.\n", inode->op_type);
          status = -1;
          break;
        }
@@ -682,7 +743,7 @@ int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime * runtime) {
          break;
        }
  #if TVM_CRT_DEBUG
-      printf("creating tvm_op: %s with node_id=%d\n", inode->param.func_name, nid);
+      printf("tvm_op: creating %s with node_id=%d\n", inode->param.func_name, nid);
  #endif  // TVM_CRT_DEBUG
        TVMPackedFunc pf;
        runtime->CreateTVMOp(runtime, &(inode->param), args, args_count, inode->inputs_count, &pf);
@@ -735,7 +796,7 @@ int32_t TVMGraphRuntime_CreateTVMOp(TVMGraphRuntime * runtime, const TVMOpParam
      status = -1;
    }
  
-  runtime->module.GetFunction(param->func_name, pf);
+  runtime->module.GetFunction(&(runtime->module), param->func_name, pf);
    TVMArgs targs = TVMArgs_Create(arg_ptr.arg_values, arg_ptr.arg_tcodes, arg_ptr.arg_values_count);
    pf->SetArgs(pf, &targs);
  
@@ -762,7 +823,7 @@ void TVMGraphRuntime_Init(TVMGraphRuntime * runtime, const char * graph_json,
  
  TVMGraphRuntime * TVMGraphRuntimeCreate(const char * sym_json,
                                          const TVMModule * m, const TVMContext * ctxs) {
-  TVMGraphRuntime * runtime = (TVMGraphRuntime*)malloc(sizeof(TVMGraphRuntime));  // NOLINT(*)
+  TVMGraphRuntime * runtime = (TVMGraphRuntime*)vmalloc(sizeof(TVMGraphRuntime));  // NOLINT(*)
    memset(runtime, 0, sizeof(TVMGraphRuntime));
    runtime->GetEntryId = TVMGraphRuntime_GetEntryId;
    runtime->GetInputIndex = TVMGraphRuntime_GetInputIndex;
@@ -784,8 +845,29 @@ TVMGraphRuntime * TVMGraphRuntimeCreate(const char * sym_json,
  void TVMGraphRuntimeRelease(TVMGraphRuntime ** pptr) {
    int32_t idx;
    TVMGraphRuntime * runtime = *pptr;
+  for (idx = 0; idx < runtime->nodes_count; ++idx) {
+    TVMGraphRuntimeNodeRelease(&(runtime->nodes[idx]));
+  }
+  vfree(runtime->nodes);
+  TVMGraphRuntimeGraphAttr_Release(&(runtime->attrs));
    for (idx = 0; idx < runtime->storage_pool_count; ++idx) {
      TVMNDArray_Release(&(runtime->storage_pool[idx]));
    }
-  free(*pptr);
+  for (idx = 0; idx < runtime->data_entry_count; ++idx) {
+    vfree(runtime->data_entry[idx].dl_tensor.shape);
+  }
+  vfree(runtime->input_nodes);
+  vfree(runtime->node_row_ptr);
+  vfree(runtime->outputs);
+  vfree(runtime->storage_pool);
+  vfree(runtime->data_entry);
+  vfree(runtime->op_execs);
+  vfree(*pptr);
+
+  if (g_fexecs) {
+    vfree(g_fexecs);
+    g_fexecs = 0;
+  }
+
+  CHECK_EQ(vleak_size, 0, "found memory leak, leak size=%d", vleak_size);
  }
diff --git a/src/runtime/crt/graph_runtime.h b/src/runtime/crt/graph_runtime.h

index 5b6e905..3cb8ba9 100644 (file)
--- a/src/runtime/crt/graph_runtime.h
+++ b/src/runtime/crt/graph_runtime.h
@@ -63,10 +63,11 @@ typedef struct TVMGraphRuntimeNode {
    // parameters
    TVMOpParam param;
    // inputs
-  TVMGraphRuntimeNodeEntry inputs[GRAPH_RUNTIME_NODE_MAX_INPUTS];
-  size_t                   inputs_count;
+  TVMGraphRuntimeNodeEntry * inputs;
+  // number of inputs
+  size_t inputs_count;
    // control deps
-  uint32_t control_deps[200];
+  uint32_t control_deps[20];
    // JSON Loader
    void (*LoadAttrs)(struct TVMGraphRuntimeNode * node, JSONReader *reader, TVMOpParam* param);
    // JSON Loader
@@ -76,12 +77,12 @@ typedef struct TVMGraphRuntimeNode {
  // Graph attribute
  typedef struct TVMGraphRuntimeGraphAttr {
    uint32_t storage_num_not_alloctaed;
-  uint32_t storage_id[GRAPH_RUNTIME_MAX_NODES];
-  uint32_t device_index[GRAPH_RUNTIME_MAX_NODES];
-  char     dltype[GRAPH_RUNTIME_MAX_NODES][10];  // "int8", "int16", "float32"
+  uint32_t * storage_id;
+  uint32_t * device_index;
+  char * dltype;  // "int8", "int16", "float32"
    uint32_t dltype_count;
-  int64_t  shape[GRAPH_RUNTIME_MAX_NODES][TVM_CRT_MAX_NDIM];
-  uint32_t ndim[GRAPH_RUNTIME_MAX_NODES];
+  int64_t * shape;
+  uint32_t * ndim;
    uint32_t shape_count;
  } TVMGraphRuntimeGraphAttr;
  
@@ -169,33 +170,35 @@ typedef struct TVMGraphRuntime {
    // Get node entry index.
    uint32_t (*GetEntryId)(struct TVMGraphRuntime * runtime, uint32_t nid, uint32_t index);
  
-  // /*! \brief The graph nodes. */
-  TVMGraphRuntimeNode nodes[GRAPH_RUNTIME_MAX_NODES];
-  uint32_t           nodes_count;
+  /*! \brief The graph nodes. */
+  TVMGraphRuntimeNode * nodes;
+  /*! \brief The graph nodes counter. */
+  uint32_t nodes_count;
    /*! \brief The argument nodes. */
-  uint32_t input_nodes[GRAPH_RUNTIME_MAX_INPUT_NODES];
-  uint32_t   input_nodes_count;
+  uint32_t * input_nodes;
+  uint32_t input_nodes_count;
    /*! \brief Used for quick entry indexing. */
-  uint32_t node_row_ptr[GRAPH_RUNTIME_MAX_NODE_ROW_PTR];
+  uint32_t * node_row_ptr;
    uint32_t node_row_ptr_count;
    /*! \brief Output entries. */
-  TVMGraphRuntimeNodeEntry outputs[GRAPH_RUNTIME_MAX_OUTPUTS];
-  uint32_t              outputs_count;
+  TVMGraphRuntimeNodeEntry * outputs;
+  /*! \brief Output entries counter. */
+  uint32_t outputs_count;
    /*! \brief Additional graph attributes. */
    TVMGraphRuntimeGraphAttr attrs;
    /*! \brief The code module that contains both host and device code. */
    TVMModule module;
    /*! \brief Execution context of all devices including the host. */
-  TVMContext ctxs[GRAPH_RUNTIME_MAX_CONTEXTS];
+  TVMContext ctxs[1];
    uint32_t   ctxs_count;
    /*! \brief Common storage pool for all devices. */
-  TVMNDArray  storage_pool[GRAPH_RUNTIME_MAX_NODES];
+  TVMNDArray * storage_pool;
    uint32_t storage_pool_count;
    /*! \brief Data entry of each node. */
-  TVMNDArray  data_entry[GRAPH_RUNTIME_MAX_NODES];
+  TVMNDArray * data_entry;
    uint32_t data_entry_count;
    /*! \brief Operator on each node. */
-  TVMPackedFunc op_execs[GRAPH_RUNTIME_MAX_NODES];
+  TVMPackedFunc * op_execs;
    uint32_t op_execs_count;
  } TVMGraphRuntime;
  
diff --git a/src/runtime/crt/load_json.c b/src/runtime/crt/load_json.c

index 894ab89..cf9492b 100644 (file)
--- a/src/runtime/crt/load_json.c
+++ b/src/runtime/crt/load_json.c
@@ -21,6 +21,8 @@
   * \file load_json.c
   * \brief Load graph from JSON file.
   */
+#include <tvm/runtime/crt/memory.h>
+
  #include "load_json.h"
  
  // the node entry structure in serialized format
@@ -74,10 +76,10 @@ void SeqPop(Seq * seq) {
  }
  
  Seq * SeqCreate(uint64_t len) {
-  Seq * seq = (Seq*)malloc(sizeof(Seq));  // NOLINT(*)
+  Seq * seq = (Seq*)vmalloc(sizeof(Seq));  // NOLINT(*)
    memset(seq, 0, sizeof(Seq));
    seq->allocated = len;
-  seq->data = (uint32_t*)malloc(sizeof(uint32_t)*len);  // NOLINT(*)
+  seq->data = (uint32_t*)vmalloc(sizeof(uint32_t)*len);  // NOLINT(*)
    seq->push_back = SeqPush;
    seq->back = SeqBack;
    seq->pop_back = SeqPop;
@@ -85,8 +87,8 @@ Seq * SeqCreate(uint64_t len) {
  }
  
  void SeqRelease(Seq ** seq) {
-  free((*seq)->data);
-  free(*seq);
+  vfree((*seq)->data);
+  vfree(*seq);
  }
  
  
@@ -156,11 +158,11 @@ int JSONReader_ReadString(JSONReader * reader, char * out_str) {
      if (ch == '\\') {
        char sch = reader->NextChar(reader);
        switch (sch) {
-      case 'r': snprintf(output, sizeof(output), "%s\r", output); break;
-      case 'n': snprintf(output, sizeof(output), "%s\n", output); break;
-      case '\\': snprintf(output, sizeof(output), "%s\\", output); break;
-      case 't': snprintf(output, sizeof(output), "%s\t", output); break;
-      case '\"': snprintf(output, sizeof(output), "%s\"", output); break;
+      case 'r': snprintf(output + strlen(output), sizeof(output), "\r"); break;
+      case 'n': snprintf(output + strlen(output), sizeof(output), "\n"); break;
+      case '\\': snprintf(output + strlen(output), sizeof(output), "\\"); break;
+      case 't': snprintf(output + strlen(output), sizeof(output), "\t"); break;
+      case '\"': snprintf(output + strlen(output), sizeof(output), "\""); break;
        default: fprintf(stderr, "unknown string escape %c\n", sch);
        }
      } else {
@@ -346,7 +348,7 @@ JSONReader JSONReader_Create(const char * is) {
    reader.BeginObject = JSONReader_BeginObject;
    reader.NextArrayItem = JSONReader_NextArrayItem;
    reader.NextObjectItem = JSONReader_NextObjectItem;
-  reader.is_ = (char*)malloc(strlen(is)+1);  // NOLINT(*)
+  reader.is_ = (char*)vmalloc(strlen(is)+1);  // NOLINT(*)
    memset(reader.is_, 0, strlen(is)+1);
    snprintf(reader.is_, strlen(is)+1, "%s", is);
    reader.isptr = reader.is_;
@@ -355,5 +357,5 @@ JSONReader JSONReader_Create(const char * is) {
  
  void JSONReader_Release(JSONReader * reader) {
    SeqRelease(&(reader->scope_counter_));
-  free(reader->is_);
+  vfree(reader->is_);
  }
diff --git a/src/runtime/crt/logging.h b/src/runtime/crt/logging.h

new file mode 100644 (file)

index 0000000..2c58834
--- /dev/null
+++ b/src/runtime/crt/logging.h
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file runtime/crt/loggin.h
+ * \brief A replacement of the dmlc logging system that avoids
+ *  the usage of GLOG and C++ headers
+ */
+
+#ifndef TVM_RUNTIME_CRT_LOGGING_H_
+#define TVM_RUNTIME_CRT_LOGGING_H_
+
+#ifndef CHECK
+#define CHECK(x)                                                        \
+  do {                                                                  \
+    if (!(x)) {                                                         \
+      fprintf(stderr, "Check failed: %s\n", #x);                        \
+      exit(-1);                                                         \
+    }                                                                   \
+  }while(0)
+#endif
+
+#ifndef CHECK_BINARY_OP
+#define CHECK_BINARY_OP(op, x, y, fmt, ...)                             \
+  do {                                                                  \
+    if (!(x op y)) {                                                    \
+      fprintf(stderr, "Check failed: %s %s %s: " fmt "\n", #x, #op, #y, ##__VA_ARGS__); \
+      exit(-1);                                                         \
+    }                                                                   \
+  }while(0)
+#endif
+
+#ifndef CHECK_LT
+#define CHECK_LT(x, y, fmt, ...) CHECK_BINARY_OP(<,  x, y, fmt, ##__VA_ARGS__)
+#endif
+
+#ifndef CHECK_GT
+#define CHECK_GT(x, y, fmt, ...) CHECK_BINARY_OP(>,  x, y, fmt, ##__VA_ARGS__)
+#endif
+
+#ifndef CHECK_LE
+#define CHECK_LE(x, y, fmt, ...) CHECK_BINARY_OP(<=, x, y, fmt, ##__VA_ARGS__)
+#endif
+
+#ifndef CHECK_GE
+#define CHECK_GE(x, y, fmt, ...) CHECK_BINARY_OP(>=, x, y, fmt, ##__VA_ARGS__)
+#endif
+
+#ifndef CHECK_EQ
+#define CHECK_EQ(x, y, fmt, ...) CHECK_BINARY_OP(==, x, y, fmt, ##__VA_ARGS__)
+#endif
+
+#ifndef CHECK_NE
+#define CHECK_NE(x, y, fmt, ...) CHECK_BINARY_OP(!=, x, y, fmt, ##__VA_ARGS__)
+#endif
+
+#endif  // TVM_RUNTIME_CRT_LOGGING_H_
diff --git a/src/runtime/crt/memory.c b/src/runtime/crt/memory.c

new file mode 100644 (file)

index 0000000..24175f6
--- /dev/null
+++ b/src/runtime/crt/memory.c
@@ -0,0 +1,393 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file memory.c
+ * \brief Virtal memory manager
+ *
+ * To maximize portability, thread-safe feature has been dropped for now.
+ */
+
+#include <tvm/runtime/c_runtime_api.h>
+#include <tvm/runtime/crt/memory.h>
+
+#include <inttypes.h>
+
+#include "logging.h"
+
+/*! Number of bits in a page */
+#define TVM_CRT_PAGE_BITS (TVM_CRT_PAGE_BYTES << 3)
+
+/*! \brief Translate log memory size into bytes */
+#define TVM_CRT_VIRT_MEM_SIZE (1 << TVM_CRT_LOG_VIRT_MEM_SIZE)
+
+/*! \brief Number of possible page entries in total */
+#define TVM_CRT_MAX_PAGES (TVM_CRT_VIRT_MEM_SIZE / TVM_CRT_PAGE_BYTES)
+
+/*! \brief Physical address type */
+typedef uint32_t tvm_phy_addr_t;
+
+/*! \brief The bits in page table */
+static const tvm_phy_addr_t kPageBits = TVM_CRT_PAGE_BITS;
+
+/*! \brief Page size, also the maximum allocable size */
+static const tvm_phy_addr_t kPageSize = TVM_CRT_PAGE_BYTES;
+
+/**
+ * \brief Memory pool for virtual dynamic memory allocation
+ */
+static char g_memory_pool[TVM_CRT_VIRT_MEM_SIZE];
+
+/*! \brief A page in the DRAM */
+typedef struct Page {
+  /*! \brief Start location in page table */
+  tvm_index_t ptable_begin;
+  /*! \brief The total number of pages */
+  tvm_index_t num_pages;
+  /*! \brief Data */
+  char * data;
+} Page;
+
+// construct a new page
+Page PageCreate(tvm_index_t ptable_begin, tvm_index_t num_pages) {
+  Page page;
+  page.ptable_begin = ptable_begin;
+  page.num_pages = num_pages;
+  page.data = g_memory_pool + ptable_begin * kPageSize;
+  return page;
+}
+
+typedef struct PageTable {
+  Page page[TVM_CRT_MAX_PAGES];
+  uint32_t count;
+  void (*resize)(struct PageTable * ptable, uint32_t size, Page * page);
+} PageTable;
+
+void PageTable_Resize(struct PageTable * ptable, uint32_t new_size, Page * page) {
+  CHECK_LE(ptable->count, new_size,
+           "size value (%d) is smaller than expected (%d).", new_size, ptable->count);
+  for (uint32_t idx = ptable->count; idx < new_size; idx++) {
+    ptable->page[idx] = *page;
+  }
+  ptable->count = new_size;
+}
+
+typedef struct PageEntry {
+  char * addr;
+  Page page;
+} PageEntry;
+
+typedef struct TLB {
+  PageEntry entries[TVM_CRT_MAX_PAGES];
+  uint32_t count;
+  void (*set)(struct TLB * tlb, char * data, Page * page);
+  PageEntry * (*find)(struct TLB * tlb, char * data);
+} TLB;
+
+void TLB_Set(TLB * tlb, char * data, Page * page) {
+  PageEntry * entry = tlb->find(tlb, data);
+  if (entry == 0) {
+    tlb->entries[tlb->count].addr = data;
+    tlb->entries[tlb->count].page = *page;
+    tlb->count++;
+  } else {
+    entry->addr = data;
+    entry->page = *page;
+  }
+}
+
+PageEntry * TLB_Find(TLB * tlb, char * data) {
+  PageEntry * entry = 0;
+  for (uint32_t idx = 0; idx < tlb->count; idx++) {
+    if (tlb->entries[idx].addr == data) {
+      entry = tlb->entries + idx;
+      break;
+    }
+  }
+  return entry;
+}
+
+typedef struct IndexedEntry {
+  tvm_index_t index;
+  Page page;
+} IndexedEntry;
+
+typedef struct MultiMap {
+  IndexedEntry entries[TVM_CRT_MAX_PAGES];
+  uint32_t count;
+  IndexedEntry * (*lower_bound)(struct MultiMap * map, uint32_t npage);
+  IndexedEntry * (*end)(struct MultiMap * map);
+  void (*erase)(struct MultiMap * map, IndexedEntry * entry);
+  void (*insert)(struct MultiMap * map, uint32_t npage, Page * p);
+} MultiMap;
+
+IndexedEntry * MultiMap_LowerBound(struct MultiMap * map, uint32_t npage) {
+  IndexedEntry * entry = 0;
+  for (uint32_t idx = 0; idx < map->count; idx++) {
+    if (map->entries[idx].index >= npage) {
+      entry = map->entries + idx;
+      break;
+    }
+  }
+  return entry;
+}
+
+IndexedEntry * MultiMap_End(struct MultiMap * map) {
+  IndexedEntry * entry = 0;
+  return entry;
+}
+
+void MultiMap_Erase(struct MultiMap * map, IndexedEntry * entry) {
+  for (uint32_t idx = 0; idx < map->count; idx++) {
+    if ((map->entries + idx) == entry) {
+      memcpy(map->entries + idx, map->entries + (idx + 1),
+             sizeof(IndexedEntry) * (map->count - idx));
+      map->count--;
+      break;
+    }
+  }
+}
+
+void MultiMap_Insert(struct MultiMap * map, uint32_t npage, Page * p) {
+  CHECK_LE(map->count + 1, TVM_CRT_MAX_PAGES, "invalid number of free pages.");
+  for (uint32_t idx = map->count; idx < (map->count + npage); idx++) {
+    map->entries[map->count].index = npage;
+    map->entries[map->count].page = *p;
+  }
+  map->count++;
+}
+
+/*!
+ * \brief DRAM memory manager
+ *  Implements simple paging to allow physical address translation.
+ */
+typedef struct MemoryManager {
+  /*!
+   * \brief Allocate memory from manager
+   * \param size The size of memory
+   * \return The virtual address
+   */
+  void* (*Alloc)(struct MemoryManager * mgr, tvm_index_t size);
+  /*!
+   * \brief Allocate memory from manager
+   * \param ptr The pointer to the memory area to be reallocated
+   * \param size The size of memory
+   * \return The virtual address
+   */
+  void* (*Realloc)(struct MemoryManager * mgr, void * ptr, tvm_index_t size);
+  /*!
+   * \brief Free the memory.
+   * \param ptr The pointer to the memory to deallocate
+   * \return The virtual address
+   */
+  void (*Free)(struct MemoryManager * mgr, void* data);
+
+  // Physical address -> page
+  PageTable ptable;
+  // Virtual address -> page
+  TLB pmap;
+  // Free map
+  MultiMap free_map;
+} MemoryManager;
+
+/*!
+ * \brief Allocate memory from manager
+ * \param size The size of memory
+ * \return The virtual address
+ */
+void* MemoryManager_Alloc(MemoryManager * mgr, tvm_index_t size) {
+  char * data = 0;
+  tvm_index_t npage = (size + kPageSize - 1) / kPageSize;
+  MultiMap * free_map = &(mgr->free_map);
+  IndexedEntry * it = free_map->lower_bound(free_map, npage);
+  tvm_index_t start = 0;
+  if (it != free_map->end(free_map)) {
+    Page p = it->page;
+    free_map->erase(free_map, it);
+    data = p.data;
+    start = p.ptable_begin;
+    npage = p.num_pages;
+  } else {
+    PageTable * ptable = &(mgr->ptable);
+    start = ptable->count;
+    CHECK_LE((unsigned)(start + npage), (sizeof(g_memory_pool) / kPageSize),
+             "insufficient memory, start=%" PRId64 ", npage=%" PRId64 ", total=%" PRId64 "",
+             start, npage, start + npage);
+    /* insert page entry */
+    Page p = PageCreate(start, npage);
+    ptable->resize(ptable, start + npage, &p);
+    data = p.data;
+    TLB * pmap = &(mgr->pmap);
+    pmap->set(pmap, data, &p);
+  }
+  vleak_size++;
+#if TVM_CRT_DEBUG > 1
+  printf("allocate: addr=%p, start=%d/%d, npage=%d, vleak=%d\n",
+         data, start, TVM_CRT_MAX_PAGES, npage, vleak_size);
+#endif  // TVM_CRT_DEBUG
+  return data;
+}
+
+/*!
+ * \brief Reallocate memory from manager
+ * \param ptr The pointer to the memory area to be reallocated
+ * \param size The size of memory
+ * \return The virtual address
+ */
+void* MemoryManager_Realloc(MemoryManager * mgr, void * ptr, tvm_index_t size) {
+  char * data = (char*)ptr;  // NOLINT(*)
+  PageTable * ptable = &(mgr->ptable);
+  TLB * pmap = &(mgr->pmap);
+  MultiMap * free_map = &(mgr->free_map);
+  tvm_index_t start = 0;
+  tvm_index_t npage = (size + kPageSize - 1) / kPageSize;
+  if (ptr) {
+    // get page size for given pointer
+    CHECK_NE(pmap->count, 0, "invalid translation look-aside buffer.");
+    PageEntry * entry = pmap->find(pmap, (char*)ptr);  // NOLINT(*)
+    CHECK_NE(entry, 0, "no valid page entry found.");
+    Page * pptr = &(entry->page);
+    // if the page size is smaller than target page size,
+    // try allocate new space
+    if (pptr->num_pages < npage) {
+      // TODO(liangfu): found out whether we can extend current entry
+      //
+      // insert new page entry
+      IndexedEntry * it = free_map->lower_bound(free_map, npage);
+      if (it != free_map->end(free_map)) {
+        data = it->page.data;
+        start = it->page.ptable_begin;
+        npage = it->page.num_pages;
+        free_map->erase(free_map, it);
+      } else {
+        start = ptable->count;
+        CHECK_LE((unsigned)(start + npage), (sizeof(g_memory_pool) / kPageSize),
+                 "insufficient memory, start=%" PRId64 ", npage=%" PRId64 ", total=%" PRId64 "",
+                 start, npage, start + npage);
+        Page p = PageCreate(start, npage);
+        ptable->resize(ptable, start + npage, &p);
+        data = p.data;
+        pmap->set(pmap, data, &p);
+      }
+      // copy previous data to the new entry
+      memcpy(data, ptr, kPageSize * pptr->num_pages);
+      // release memory
+      free_map->insert(free_map, pptr->num_pages, pptr);
+    } else {
+      start = pptr->ptable_begin;
+    }
+  } else {
+    IndexedEntry * it = free_map->lower_bound(free_map, npage);
+    if (it != free_map->end(free_map)) {
+      Page p = it->page;
+      free_map->erase(free_map, it);
+      data = p.data;
+      start = p.ptable_begin;
+      npage = p.num_pages;
+    } else {
+      PageTable * ptable = &(mgr->ptable);
+      start = ptable->count;
+      CHECK_LE((unsigned)(start + npage), (sizeof(g_memory_pool) / kPageSize),
+               "insufficient memory, start=%" PRId64 ", npage=%" PRId64 ", total=%" PRId64 "",
+               start, npage, start + npage);
+      /* insert page entry */
+      Page p = PageCreate(start, npage);
+      ptable->resize(ptable, start + npage, &p);
+      data = p.data;
+      TLB * pmap = &(mgr->pmap);
+      pmap->set(pmap, data, &p);
+    }
+    vleak_size++;
+  }
+#if TVM_CRT_DEBUG > 1
+  printf("reallocate: addr=%p, start=%d/%d, npage=%d, vleak=%d, size=%d\n",
+         data, start, TVM_CRT_MAX_PAGES, npage, vleak_size, size);
+#endif  // TVM_CRT_DEBUG
+  return data;
+}
+
+/*!
+ * \brief Free the memory.
+ * \param ptr The pointer to the memory to deallocate
+ * \return The virtual address
+ */
+void MemoryManager_Free(MemoryManager * mgr, void* ptr) {
+  TLB * pmap = &(mgr->pmap);
+  CHECK_NE(pmap->count, 0, "invalid translation look-aside buffer.");
+  PageEntry * entry = pmap->find(pmap, (char*)ptr);  // NOLINT(*)
+  CHECK_NE(entry, 0, "no valid page entry found.");
+  Page * p = &(entry->page);
+  MultiMap * free_map = &(mgr->free_map);
+  free_map->insert(free_map, p->num_pages, p);
+  vleak_size--;
+#if TVM_CRT_DEBUG > 1
+  printf("release: addr=%p, start=%d/%d, npage=%d, vleak=%d\n",
+         ptr, entry->page.ptable_begin, TVM_CRT_MAX_PAGES, entry->page.num_pages, vleak_size);
+#endif  // TVM_CRT_DEBUG
+}
+
+MemoryManager * MemoryManagerCreate() {
+  static MemoryManager mgr;
+  memset(&mgr, 0, sizeof(MemoryManager));
+  /* handle MemoryManager member functions */
+  mgr.Alloc = MemoryManager_Alloc;
+  mgr.Realloc = MemoryManager_Realloc;
+  mgr.Free = MemoryManager_Free;
+  /* handle PageTable member functions */
+  mgr.ptable.resize = PageTable_Resize;
+  /* handle TLB member functions */
+  mgr.pmap.set = TLB_Set;
+  mgr.pmap.find = TLB_Find;
+  /* handle free_map member functions */
+  mgr.free_map.lower_bound = MultiMap_LowerBound;
+  mgr.free_map.end = MultiMap_End;
+  mgr.free_map.erase = MultiMap_Erase;
+  mgr.free_map.insert = MultiMap_Insert;
+  return &mgr;
+}
+
+MemoryManager * TVMGetGlobalMemoryManager() {
+  /* initialize once */
+  static uint32_t initialized = 0;
+  static MemoryManager * mgr;
+  if (!initialized) {
+    mgr = MemoryManagerCreate();
+    memset(g_memory_pool, 0, sizeof(g_memory_pool));
+    initialized = 1;
+  }
+  return mgr;
+}
+
+/** \brief Allocate memory from manager */
+void * vmalloc(size_t size) {
+  MemoryManager * mgr = TVMGetGlobalMemoryManager();
+  return mgr->Alloc(mgr, size);
+}
+
+/** \brief Reallocate memory from manager */
+void * vrealloc(void * ptr, size_t size) {
+  MemoryManager * mgr = TVMGetGlobalMemoryManager();
+  return mgr->Realloc(mgr, ptr, size);
+}
+
+/** \brief Release memory from manager */
+void vfree(void * ptr) {
+  MemoryManager * mgr = TVMGetGlobalMemoryManager();
+  mgr->Free(mgr, ptr);
+}
diff --git a/src/runtime/crt/module.h b/src/runtime/crt/module.h

index 8ff979b..9ef287d 100644 (file)
--- a/src/runtime/crt/module.h
+++ b/src/runtime/crt/module.h
@@ -24,11 +24,10 @@
  #ifndef TVM_RUNTIME_CRT_MODULE_H_
  #define TVM_RUNTIME_CRT_MODULE_H_
  
-#include <string.h>
  #include <tvm/runtime/c_runtime_api.h>
+#include <string.h>
  
  struct TVMPackedFunc;
-typedef struct TVMPackedFunc TVMPackedFunc;
  
  /*!
   * \brief Module container of TVM.
@@ -42,7 +41,7 @@ typedef struct TVMModule {
     *
     *  This function will return PackedFunc(nullptr) if function do not exist.
     */
-  void (*GetFunction)(const char * name, TVMPackedFunc * pf);
+  void (*GetFunction)(struct TVMModule * mod, const char * name, struct TVMPackedFunc * pf);
  } TVMModule;
  
  #endif  // TVM_RUNTIME_CRT_MODULE_H_
diff --git a/src/runtime/crt/ndarray.c b/src/runtime/crt/ndarray.c

index 016fdd5..4b4ab68 100644 (file)
--- a/src/runtime/crt/ndarray.c
+++ b/src/runtime/crt/ndarray.c
@@ -22,6 +22,8 @@
   * \brief NDArray container infratructure.
   */
  
+#include <tvm/runtime/crt/memory.h>
+
  #include "ndarray.h"
  
  TVMNDArray TVMNDArray_Create(uint32_t ndim, const tvm_index_t * shape,
@@ -29,7 +31,7 @@ TVMNDArray TVMNDArray_Create(uint32_t ndim, const tvm_index_t * shape,
    TVMNDArray ret;
    memset(&ret, 0, sizeof(TVMNDArray));
    ret.dl_tensor.ndim = ndim;
-  ret.dl_tensor.shape = (int64_t*)malloc(sizeof(int64_t)*ndim);  // NOLINT(*)
+  ret.dl_tensor.shape = (int64_t*)vmalloc(sizeof(int64_t)*ndim);  // NOLINT(*)
    memcpy(ret.dl_tensor.shape, shape, sizeof(int64_t)*ndim);
    ret.dl_tensor.dtype = dtype;
    ret.dl_tensor.ctx = ctx;
@@ -109,7 +111,9 @@ TVMNDArray TVMNDArray_CreateView(TVMNDArray * arr, const tvm_index_t * shape,
  }
  
  int TVMNDArray_Release(TVMNDArray * arr) {
-  free(arr->dl_tensor.data);
-  free(arr->dl_tensor.shape);
+  vfree(arr->dl_tensor.data);
+  arr->dl_tensor.data = 0;
+  vfree(arr->dl_tensor.shape);
+  arr->dl_tensor.shape = 0;
    return 0;
  }
diff --git a/src/runtime/crt/packed_func.h b/src/runtime/crt/packed_func.h

index 21370b6..93898a4 100644 (file)
--- a/src/runtime/crt/packed_func.h
+++ b/src/runtime/crt/packed_func.h
@@ -112,14 +112,12 @@ static inline void TVMPackedFunc_SetArgs(TVMPackedFunc * pf, const TVMArgs * arg
    memcpy(&(pf->args), args, sizeof(TVMArgs));
  }
  
-TVMPackedFunc g_fexecs[GRAPH_RUNTIME_MAX_NODES];
+TVMPackedFunc * g_fexecs = 0;
  uint32_t g_fexecs_count = 0;
  
-void TVMPackedFunc_SetupExecs();
-
  // Implement TVMModule::GetFunction
  // Put implementation in this file so we have seen the TVMPackedFunc
-static inline void TVMModule_GetFunction(const char * name, TVMPackedFunc * pf) {
+static inline void TVMModule_GetFunction(TVMModule * mod, const char * name, TVMPackedFunc * pf) {
    int idx;
    memset(pf, 0, sizeof(TVMPackedFunc));
    assert(strlen(name) <= sizeof(pf->name));
@@ -127,13 +125,13 @@ static inline void TVMModule_GetFunction(const char * name, TVMPackedFunc * pf)
    pf->Call = TVMPackedFunc_Call;
    pf->SetArgs = TVMPackedFunc_SetArgs;
    pf->fexec = &TVMNoOperation;
-  for (idx = 0; idx < GRAPH_RUNTIME_MAX_NODES; idx++) {
+  for (idx = 0; idx < g_fexecs_count; idx++) {
      if (!strcmp(g_fexecs[idx].name, name)) {
        pf->fexec = g_fexecs[idx].fexec;
        break;
      }
    }
-  if (idx == GRAPH_RUNTIME_MAX_NODES) {
+  if (idx == g_fexecs_count) {
      fprintf(stderr, "function handle for %s not found\n", name);
    }
  }
diff --git a/tests/cpp/crt_memory_test.cc b/tests/cpp/crt_memory_test.cc

new file mode 100644 (file)

index 0000000..1c12916
--- /dev/null
+++ b/tests/cpp/crt_memory_test.cc
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#define TVM_CRT_LOG_VIRT_MEM_SIZE 16
+#define TVM_CRT_PAGE_BYTES 4096
+
+#include <gtest/gtest.h>
+#include <tvm/runtime/crt/memory.h>
+
+#include "../../src/runtime/crt/memory.c"
+
+TEST(CRTMemory, Alloc) {
+  for (int idx = 0; idx < 65536; idx++) {
+    void * a = vmalloc(1);
+    EXPECT_EQ(vleak_size, 1);
+    vfree(a);
+    EXPECT_EQ(vleak_size, 0);
+  }
+}
+
+TEST(CRTMemory, Realloc) {
+  for (int idx = 0; idx < 65536; idx++) {
+    void * a = vrealloc(0, 1);
+    EXPECT_EQ(vleak_size, 1);
+    void * b = vrealloc(a, 1);
+    EXPECT_EQ(a, b);
+    EXPECT_EQ(vleak_size, 1);
+    vfree(a);
+    EXPECT_EQ(vleak_size, 0);
+  }
+}
+
+int main(int argc, char ** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  testing::FLAGS_gtest_death_test_style = "threadsafe";
+  return RUN_ALL_TESTS();
+}
author	Liangfu Chen <liangfu.chen@icloud.com>
	Tue, 7 Apr 2020 21:33:05 +0000 (05:33 +0800)
committer	GitHub <noreply@github.com>
	Tue, 7 Apr 2020 21:33:05 +0000 (14:33 -0700)
apps/bundle_deploy/Makefile		patch \| blob \| history
apps/bundle_deploy/demo.cc		patch \| blob \| history
apps/bundle_deploy/runtime.c		patch \| blob \| history
apps/bundle_deploy/test.cc		patch \| blob \| history
include/tvm/runtime/crt/memory.h	[new file with mode: 0644]	patch \| blob
src/runtime/crt/crt_backend_api.c		patch \| blob \| history
src/runtime/crt/graph_runtime.c		patch \| blob \| history
src/runtime/crt/graph_runtime.h		patch \| blob \| history
src/runtime/crt/load_json.c		patch \| blob \| history
src/runtime/crt/logging.h	[new file with mode: 0644]	patch \| blob
src/runtime/crt/memory.c	[new file with mode: 0644]	patch \| blob
src/runtime/crt/module.h		patch \| blob \| history
src/runtime/crt/ndarray.c		patch \| blob \| history
src/runtime/crt/packed_func.h		patch \| blob \| history
tests/cpp/crt_memory_test.cc	[new file with mode: 0644]	patch \| blob