Clean up the allocation logic in the interpreter.

author A. Unique TensorFlower <gardener@tensorflow.org>

Sun, 14 Jan 2018 00:21:55 +0000 (16:21 -0800)

committer TensorFlower Gardener <gardener@tensorflow.org>

Sun, 14 Jan 2018 00:25:51 +0000 (16:25 -0800)
author A. Unique TensorFlower <gardener@tensorflow.org>
Sun, 14 Jan 2018 00:21:55 +0000 (16:21 -0800)
committer TensorFlower Gardener <gardener@tensorflow.org>
Sun, 14 Jan 2018 00:25:51 +0000 (16:25 -0800)
diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD

index 3f1b0be1a73a3ff1da3452f4ee1a9125f9e26178..13350c5a438b75fe14e8753e5bb1bb77ec8f655b 100644 (file)
--- a/tensorflow/contrib/lite/BUILD
+++ b/tensorflow/contrib/lite/BUILD
@@ -35,6 +35,28 @@ cc_library(
      hdrs = ["version.h"],
  )
  
+cc_library(
+    name = "arena_planner",
+    srcs = ["arena_planner.cc"],
+    hdrs = ["arena_planner.h"],
+    deps = [
+        ":context",
+        ":graph_info",
+        ":memory_planner",
+        ":simple_memory_arena",
+    ],
+)
+
+cc_test(
+    name = "arena_planner_test",
+    size = "small",
+    srcs = ["arena_planner_test.cc"],
+    deps = [
+        ":arena_planner",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
  # Main library. No ops are included here.
  # TODO(aselle): Resolve problems preventing C99 usage.
  cc_library(
@@ -43,6 +65,25 @@ cc_library(
      hdrs = ["context.h"],
  )
  
+cc_library(
+    name = "graph_info",
+    hdrs = ["graph_info.h"],
+    deps = [":context"],
+)
+
+cc_library(
+    name = "memory_planner",
+    hdrs = ["memory_planner.h"],
+    deps = [":context"],
+)
+
+cc_library(
+    name = "simple_memory_arena",
+    srcs = ["simple_memory_arena.cc"],
+    hdrs = ["simple_memory_arena.h"],
+    deps = [":context"],
+)
+
  cc_library(
      name = "builtin_op_data",
      hdrs = [
@@ -70,7 +111,6 @@ cc_library(
          "model.cc",
          "nnapi_delegate.cc",
          "optional_debug_tools.cc",
-        "simple_memory_arena.cc",
      ],
      hdrs = [
          "allocation.h",
@@ -80,13 +120,16 @@ cc_library(
          "model.h",
          "nnapi_delegate.h",
          "optional_debug_tools.h",
-        "simple_memory_arena.h",
      ],
      copts = tflite_copts(),
      deps = [
+        ":arena_planner",
          ":builtin_op_data",
          ":context",
+        ":graph_info",
+        ":memory_planner",
          ":schema_fbs_version",
+        ":simple_memory_arena",
          "//tensorflow/contrib/lite/kernels:gemm_support",
          "//tensorflow/contrib/lite/nnapi:nnapi_lib",
          "//tensorflow/contrib/lite/schema:schema_fbs",
@@ -134,7 +177,7 @@ cc_test(
      size = "small",
      srcs = ["simple_memory_arena_test.cc"],
      deps = [
-        ":framework",
+        ":simple_memory_arena",
          "//tensorflow/contrib/lite/testing:util",
          "@com_google_googletest//:gtest",
      ],
diff --git a/tensorflow/contrib/lite/arena_planner.cc b/tensorflow/contrib/lite/arena_planner.cc

new file mode 100644 (file)

index 0000000..bf1bcdd
--- /dev/null
+++ b/tensorflow/contrib/lite/arena_planner.cc
@@ -0,0 +1,247 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/arena_planner.h"
+
+namespace tflite {
+
+namespace {
+
+// Memory allocation tuning
+constexpr const int kDefaultArenaAlignment = 64;
+constexpr const int kDefaultTensorAlignment = 4;
+
+}  // namespace
+
+struct AllocationInfo {
+  // The node index requesting this allocation.
+  int node;
+  // The tensor index to be allocated or deallocated.
+  int tensor;
+  // Whether to allocate or deallocate
+  enum { ALLOC, DEALLOC } type;
+};
+
+ArenaPlanner::ArenaPlanner(TfLiteContext* context,
+                           std::unique_ptr<GraphInfo> graph_info)
+    : context_(context),
+      graph_info_(std::move(graph_info)),
+      arena_(kDefaultArenaAlignment),
+      persistent_arena_(kDefaultArenaAlignment) {}
+
+ArenaPlanner::~ArenaPlanner() {}
+
+int64_t ArenaPlanner::BasePointer(TfLiteAllocationType type) {
+  if (type == kTfLiteArenaRwPersistent) {
+    return persistent_arena_.BasePointer();
+  }
+  if (type == kTfLiteArenaRw) {
+    return arena_.BasePointer();
+  }
+  return 0;
+}
+
+TfLiteStatus ArenaPlanner::ResetAllocations() {
+  TF_LITE_ENSURE_STATUS(arena_.Clear());
+  TF_LITE_ENSURE_STATUS(persistent_arena_.Clear());
+  allocs_.clear();
+  allocs_.resize(graph_info_->num_tensors());
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::PlanAllocations() {
+  // Invalidate any existing data.
+  TF_LITE_ENSURE_STATUS(ResetAllocations());
+
+  // Keeps track of references to each tensor.
+  std::vector<int> refcounts(graph_info_->num_tensors(), 0);
+
+  // There will be an entry in alloc_queue_ for the allocation of each tensor
+  // and another for their deallocation.
+  alloc_queue_.reserve(2 * graph_info_->num_tensors());
+
+  // We must make sure the output tensors are never overwritten. We do that by
+  // artificially adding one to their ref-counts so they are never selected
+  // for deallocation.
+  for (int tensor_index : graph_info_->outputs()) {
+    refcounts[tensor_index]++;
+  }
+
+  // Count references to node input tensors.
+  for (int i = 0; i < graph_info_->num_nodes(); ++i) {
+    const TfLiteNode& node = graph_info_->node(i);
+    TfLiteIntArray* node_inputs = node.inputs;
+    for (int j = 0; j < node_inputs->size; ++j) {
+      int tensor_index = node_inputs->data[j];
+      if (tensor_index != kOptionalTensor) {
+        refcounts[tensor_index]++;
+      }
+    }
+  }
+
+  // Queue all graph inputs for allocation.
+  for (int tensor_index : graph_info_->inputs()) {
+    if (tensor_index != kOptionalTensor) {
+      alloc_queue_.push_back({0, tensor_index, AllocationInfo::ALLOC});
+    }
+  }
+
+  // Go through the graph in execution order.
+  for (int i = 0; i < graph_info_->num_nodes(); ++i) {
+    const TfLiteNode& node = graph_info_->node(i);
+
+    // First queue output tensors for allocation.
+    TfLiteIntArray* node_outputs = node.outputs;
+    for (int j = 0; j < node_outputs->size; ++j) {
+      int tensor_index = node_outputs->data[j];
+      alloc_queue_.push_back({i, tensor_index, AllocationInfo::ALLOC});
+    }
+
+    // Then update the ref-counts of the node's inputs, and if necessary queue
+    // them for deallocation.
+    TfLiteIntArray* node_inputs = node.inputs;
+    for (int j = 0; j < node_inputs->size; ++j) {
+      int tensor_index = node_inputs->data[j];
+      if (tensor_index != kOptionalTensor) {
+        refcounts[tensor_index]--;
+        if (refcounts[tensor_index] == 0) {
+          alloc_queue_.push_back({i, tensor_index, AllocationInfo::DEALLOC});
+        }
+      }
+    }
+  }
+
+  // Note that graph outputs will never be scheduled for deallocation. We
+  // could do that here for completeness, but it won't have any effect.
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
+  TF_LITE_ENSURE_STATUS(CalculateAllocations(first_node, last_node));
+  TF_LITE_ENSURE_STATUS(Commit());
+
+  for (int i = 0; i < graph_info_->num_tensors(); ++i) {
+    // TODO(ahentz): we could do this only for the tensors that were modified
+    // in CalculateAllocations(), instead of redoing it for tensors that
+    // already had proper pointers. However we must be very careful, because
+    // SimpleMemoryArena::Commit() could move the base pointer.
+    TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::Commit() {
+  TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
+  TF_LITE_ENSURE_STATUS(persistent_arena_.Commit(context_));
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
+  int active_node = first_node;
+  // When dynamic tensors are present this method is called multiple times.
+  // The items in the alloc_queue_ referring to nodes before first_node were
+  // processed previously and should be skipped. Entries after last_node are
+  // not yet ready to be handled.
+  for (const auto& alloc_info : alloc_queue_) {
+    if (alloc_info.node < first_node) continue;
+    if (alloc_info.node > last_node) break;
+    if (alloc_info.node == active_node) {
+      // This is the first allocation/deallocation for a given node.  It is
+      // time to deallocate the previous temporaries and allocate new ones.
+      if (active_node != first_node) {
+        TF_LITE_ENSURE_STATUS(
+            CalculateDeallocationOfInternalTensors(active_node - 1));
+      }
+      TF_LITE_ENSURE_STATUS(CalculateAllocationOfInternalTensors(active_node));
+      ++active_node;
+    }
+    // Handle the current item.
+    if (alloc_info.type == AllocationInfo::ALLOC) {
+      TF_LITE_ENSURE_STATUS(CalculateTensorAllocation(alloc_info.tensor));
+    } else {
+      TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(alloc_info.tensor));
+    }
+  }
+
+  // Don't forget to deallocate temporaries of last node.
+  TF_LITE_ENSURE_STATUS(
+      CalculateDeallocationOfInternalTensors(active_node - 1));
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index) {
+  TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
+  if (tensor.allocation_type == kTfLiteArenaRw) {
+    TF_LITE_ENSURE_STATUS(
+        arena_.ResolveAlloc(context_, allocs_[tensor_index], &tensor.data.raw));
+  }
+  if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
+    TF_LITE_ENSURE_STATUS(persistent_arena_.ResolveAlloc(
+        context_, allocs_[tensor_index], &tensor.data.raw));
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateTensorAllocation(int tensor_index) {
+  TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
+  if (tensor.allocation_type == kTfLiteArenaRw) {
+    TF_LITE_ENSURE_STATUS(arena_.Allocate(context_, kDefaultTensorAlignment,
+                                          tensor.bytes,
+                                          &allocs_[tensor_index]));
+  }
+  if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
+    TF_LITE_ENSURE_STATUS(
+        persistent_arena_.Allocate(context_, kDefaultTensorAlignment,
+                                   tensor.bytes, &allocs_[tensor_index]));
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateTensorDeallocation(int tensor_index) {
+  TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
+  if (tensor.allocation_type == kTfLiteArenaRw) {
+    TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[tensor_index]));
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateAllocationOfInternalTensors(
+    int node_index) {
+  if (node_index < graph_info_->num_nodes()) {
+    const TfLiteNode& node = graph_info_->node(node_index);
+    TfLiteIntArray* node_temporaries = node.temporaries;
+    for (int i = 0; i < node_temporaries->size; ++i) {
+      int tensor_index = node_temporaries->data[i];
+      TF_LITE_ENSURE_STATUS(CalculateTensorAllocation(tensor_index));
+    }
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateDeallocationOfInternalTensors(
+    int node_index) {
+  if (node_index < graph_info_->num_nodes()) {
+    const TfLiteNode& node = graph_info_->node(node_index);
+    TfLiteIntArray* node_temporaries = node.temporaries;
+    for (int i = 0; i < node_temporaries->size; ++i) {
+      int tensor_index = node_temporaries->data[i];
+      TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(tensor_index));
+    }
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/arena_planner.h b/tensorflow/contrib/lite/arena_planner.h

new file mode 100644 (file)

index 0000000..bd87414
--- /dev/null
+++ b/tensorflow/contrib/lite/arena_planner.h
@@ -0,0 +1,107 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ARENA_PLANNER_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ARENA_PLANNER_H_
+
+#include <memory>
+#include <vector>
+
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/graph_info.h"
+#include "tensorflow/contrib/lite/memory_planner.h"
+#include "tensorflow/contrib/lite/simple_memory_arena.h"
+
+namespace tflite {
+
+class AllocationInfo;
+
+// A memory planner that makes all the allocations using arenas.
+//
+// Before a model is executed by the interpreter, this class determines when
+// each tensor needs to be allocated and deallocated, and preallocates all the
+// necessary memory (the PlanAllocations phase). It then assigns portions of
+// this memory buffer to each tensor (the ExecuteAllocations phase). Tensors may
+// share some of the bufer if a tensor B is to be allocated after another tensor
+// A has been deallocated.
+//
+// If dynamic tensors are used the planning steps can be repeated during model
+// execution. Since dynamic tensors don't have sizes until after the
+// corresponding operation is executed, this class supports incremental
+// planning.
+class ArenaPlanner : public MemoryPlanner {
+ public:
+  // Ownership of 'context' is not taken and it must remain util the
+  // ArenaPlanner is destroyed.
+  ArenaPlanner(TfLiteContext* context, std::unique_ptr<GraphInfo> graph_info);
+  ~ArenaPlanner() override;
+  ArenaPlanner(const ArenaPlanner&) = delete;
+  ArenaPlanner& operator=(const ArenaPlanner&) = delete;
+
+  TfLiteStatus ResetAllocations() override;
+  TfLiteStatus PlanAllocations() override;
+  TfLiteStatus ExecuteAllocations(int first_node, int last_node) override;
+
+  // Returns the base arena location for a given allocation type.
+  int64_t BasePointer(TfLiteAllocationType type);
+
+ private:
+  // Make sure all the arenas have reserved enough memory to store all their
+  // tensors.
+  TfLiteStatus Commit();
+
+  // Traverse the allocation queue and reserve space in the appropriate arena
+  // for all tensors affected by ops in the interval [first_node, last_node].
+  TfLiteStatus CalculateAllocations(int first_node, int last_node);
+
+  // Assign absolute memory location to a tensor, based on its relative
+  // position inside the corresponding arena buffer.
+  TfLiteStatus ResolveTensorAllocation(int tensor_index);
+
+  // Register an allocation for the given tensor.
+  TfLiteStatus CalculateTensorAllocation(int tensor_index);
+
+  // Register a deallocation for the given tensor.
+  TfLiteStatus CalculateTensorDeallocation(int tensor_index);
+
+  // Register an allocation for all internal (temporary) tensors of
+  // 'node_index'.
+  TfLiteStatus CalculateAllocationOfInternalTensors(int node_index);
+
+  // Register a deallocation for all internal (temporary) tensors of
+  // 'node_index'.
+  TfLiteStatus CalculateDeallocationOfInternalTensors(int node_index);
+
+  TfLiteContext* context_;
+  std::unique_ptr<GraphInfo> graph_info_;
+
+  // Stores allocation data for all tensors.
+  std::vector<ArenaAlloc> allocs_;
+
+  // A chronological list of instructions to allocated and deallocate tensors,
+  // reflecting the way they are used in the graph.
+  std::vector<AllocationInfo> alloc_queue_;
+
+  // Raw memory buffer that is allocated for all temporary and graph outputs.
+  // that are declared kTfLiteArenaRw.
+  SimpleMemoryArena arena_;
+
+  // Raw memory buffer that is allocated for persistent tensors that are
+  // declared as kTfLiteArenaRwPersistent.
+  SimpleMemoryArena persistent_arena_;
+};
+
+}  // namespace tflite
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ARENA_PLANNER_H_
diff --git a/tensorflow/contrib/lite/arena_planner_test.cc b/tensorflow/contrib/lite/arena_planner_test.cc

new file mode 100644 (file)

index 0000000..c27c327
--- /dev/null
+++ b/tensorflow/contrib/lite/arena_planner_test.cc
@@ -0,0 +1,472 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/arena_planner.h"
+
+#include <cstdarg>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace tflite {
+namespace {
+
+// A simple op to be used in tests, as syntactic sugar.
+class TestOp {
+ public:
+  TestOp(std::initializer_list<int> inputs, std::initializer_list<int> outputs,
+         std::initializer_list<int> temporaries)
+      : inputs_(inputs), outputs_(outputs), temporaries_(temporaries) {}
+
+  const std::vector<int>& inputs() const { return inputs_; }
+  const std::vector<int>& outputs() const { return outputs_; }
+  const std::vector<int>& temporaries() const { return temporaries_; }
+
+ private:
+  std::vector<int> inputs_;
+  std::vector<int> outputs_;
+  std::vector<int> temporaries_;
+};
+
+// A test graph where inputs are processed by the given nodes to produce
+// outputs.
+class TestGraph {
+ public:
+  TestGraph(std::initializer_list<int> inputs,
+            std::initializer_list<TestOp> nodes,
+            std::initializer_list<int> outputs)
+      : inputs_(inputs), outputs_(outputs) {
+    int max_tensor_index = 0;
+
+    for (int t : inputs) {
+      max_tensor_index = std::max(max_tensor_index, t);
+    }
+    for (int t : outputs) {
+      max_tensor_index = std::max(max_tensor_index, t);
+    }
+    for (const auto& node : nodes) {
+      auto int_array = [](const std::vector<int>& x) {
+        TfLiteIntArray* lite = TfLiteIntArrayCreate(x.size());
+        for (size_t i = 0; i < x.size(); i++) lite->data[i] = x[i];
+        return lite;
+      };
+
+      nodes_.push_back(TfLiteNode());
+      nodes_.back().inputs = int_array(node.inputs());
+      for (int t : node.inputs()) {
+        max_tensor_index = std::max(max_tensor_index, t);
+      }
+      nodes_.back().outputs = int_array(node.outputs());
+      for (int t : node.outputs()) {
+        max_tensor_index = std::max(max_tensor_index, t);
+      }
+      nodes_.back().temporaries = int_array(node.temporaries());
+      for (int t : node.temporaries()) {
+        max_tensor_index = std::max(max_tensor_index, t);
+      }
+    }
+
+    for (int i = 0; i <= max_tensor_index; ++i) {
+      tensors_.push_back(TfLiteTensor());
+      // Set some default values for allocation_type and bytes, which are the
+      // only fields used by the arena planner.
+      tensors_.back().allocation_type = kTfLiteArenaRw;
+      tensors_.back().bytes = (i + 1) * 3;
+    }
+  }
+
+  ~TestGraph() {
+    for (auto node : nodes_) {
+      TfLiteIntArrayFree(node.inputs);
+      TfLiteIntArrayFree(node.outputs);
+      TfLiteIntArrayFree(node.temporaries);
+    }
+  }
+
+  const std::vector<TfLiteNode>& nodes() { return nodes_; }
+  std::vector<TfLiteTensor>* tensors() { return &tensors_; }
+  const std::vector<int>& inputs() { return inputs_; }
+  const std::vector<int>& outputs() { return outputs_; }
+
+ private:
+  std::vector<TfLiteNode> nodes_;
+  std::vector<TfLiteTensor> tensors_;
+  std::vector<int> inputs_;
+  std::vector<int> outputs_;
+};
+
+// The GraphInfo for a TestGraph.
+class TestGraphInfo : public GraphInfo {
+ public:
+  explicit TestGraphInfo(TestGraph* graph) : graph_(graph) {}
+
+  size_t num_tensors() const override { return graph_->tensors()->size(); }
+  TfLiteTensor* tensor(size_t index) override {
+    return &graph_->tensors()->at(index);
+  }
+  size_t num_nodes() const override { return graph_->nodes().size(); }
+  const TfLiteNode& node(size_t index) const override {
+    return graph_->nodes()[index];
+  }
+  const std::vector<int>& inputs() const override { return graph_->inputs(); }
+  const std::vector<int>& outputs() const override { return graph_->outputs(); }
+
+ private:
+  TestGraph* graph_;
+};
+
+void ReportError(TfLiteContext* context, const char* format, ...) {
+  const size_t kBufferSize = 1024;
+  char temp_buffer[kBufferSize];
+
+  va_list args;
+  va_start(args, format);
+  vsnprintf(temp_buffer, kBufferSize, format, args);
+  va_end(args);
+
+  LOG(INFO) << temp_buffer;
+}
+
+class ArenaPlannerTest : public ::testing::Test {
+ protected:
+  void SetGraph(TestGraph* graph) {
+    graph_ = graph;
+    context_.ReportError = ReportError;
+    planner_.reset(new ArenaPlanner(
+        &context_, std::unique_ptr<GraphInfo>(new TestGraphInfo(graph))));
+    CHECK(planner_->ResetAllocations() == kTfLiteOk);
+    CHECK(planner_->PlanAllocations() == kTfLiteOk);
+  }
+
+  void Execute(int start, int end) {
+    CHECK(planner_->ExecuteAllocations(start, end) == kTfLiteOk);
+  }
+
+  // Returns the actual offset of a given tensor, relative to the start of its
+  // arena.
+  int64_t GetOffset(int tensor_index) {
+    const TfLiteTensor& tensor = (*graph_->tensors())[tensor_index];
+    return reinterpret_cast<int64_t>(tensor.data.raw) -
+           planner_->BasePointer(tensor.allocation_type);
+  }
+
+  // Returns the first aligned offset after a given tensor.
+  int64_t GetOffsetAfter(int tensor_index) {
+    const TfLiteTensor& tensor = (*graph_->tensors())[tensor_index];
+    int64_t offset = GetOffset(tensor_index) + tensor.bytes;
+    // We must make sure the offset is aligned to kDefaultArenaAlignment.
+    if (offset % 4 != 0) {
+      offset += 4 - offset % 4;
+    }
+    return offset;
+  };
+
+  TfLiteContext context_;
+  TestGraph* graph_;
+  std::unique_ptr<ArenaPlanner> planner_;
+};
+
+TEST_F(ArenaPlannerTest, EmptyGraph) {
+  TestGraph graph({}, {}, {});
+  SetGraph(&graph);
+  Execute(0, 10);
+}
+
+TEST_F(ArenaPlannerTest, GraphWithNoOps) {
+  TestGraph graph({0, 10}, {}, {5, 11});
+  SetGraph(&graph);
+  Execute(0, 10);
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(10), GetOffsetAfter(0));
+  // The outputs are never allocated because they are not connected to any
+  // inputs.
+  EXPECT_EQ(GetOffset(5), 0);
+  EXPECT_EQ(GetOffset(11), 0);
+}
+
+TEST_F(ArenaPlannerTest, GraphWithOneOp) {
+  TestGraph graph({1}, {{{1}, {2}, {}}}, {2});
+  SetGraph(&graph);
+  Execute(0, 10);
+  EXPECT_EQ(GetOffset(1), 0);
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+}
+
+TEST_F(ArenaPlannerTest, ZeroSizedTensors) {
+  TestGraph graph({1}, {{{1}, {2}, {}}}, {2});
+  (*graph.tensors())[1].bytes = 0;
+  SetGraph(&graph);
+  // TODO(ahentz): this is currently broken because the arena finds two
+  // allocations with the same offset and returns an error.
+  ASSERT_FALSE(planner_->ExecuteAllocations(0, 10) == kTfLiteOk);
+  // EXPECT_EQ(GetOffset(1), 0);
+  // EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraph) {
+  TestGraph graph({0, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2}, {}},     // First op
+                      {{2, 0}, {4, 5}, {}},  // Second op
+                      {{4, 5}, {3}, {}}      // Third op
+                  },
+                  {3});
+  SetGraph(&graph);
+  Execute(0, 10);
+
+  // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+  EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithTemporary) {
+  TestGraph graph({0, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2}, {}},   // First op
+                      {{2, 0}, {4}, {5}},  // Second op, with temporary
+                      {{4}, {3}, {}}       // Third op
+                  },
+                  {3});
+  SetGraph(&graph);
+  Execute(0, 10);
+
+  // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
+  EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithOptionals) {
+  TestGraph graph({0, -1, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2}, {}},     // First op
+                      {{2, 0}, {4, 5}, {}},  // Second op
+                      {{4, -1, 5}, {3}, {}}  // Third op, with optional
+                  },
+                  {3});
+  SetGraph(&graph);
+  Execute(0, 10);
+
+  // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+  EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithLargeTensor) {
+  TestGraph graph({0, -1, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2}, {}},   // First op
+                      {{2, 0}, {4}, {5}},  // Second op, with temporary
+                      {{4, -1}, {3}, {}}   // Third op, with optional
+                  },
+                  {3});
+
+  // Make #1 very large so its vacancy can be filled with #5 and #4.
+  (*graph.tensors())[1].bytes = 40;
+
+  SetGraph(&graph);
+  Execute(0, 10);
+
+  // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
+  EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithPersistentTensor) {
+  TestGraph graph({0, -1, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2}, {}},   // First op
+                      {{2, 0}, {4}, {5}},  // Second op, with temporary
+                      {{4, -1}, {3}, {}}   // Third op, with optional
+                  },
+                  {3});
+
+  // Make #1 persistent so it goes into its own arena.
+  (*graph.tensors())[1].allocation_type = kTfLiteArenaRwPersistent;
+
+  SetGraph(&graph);
+  Execute(0, 10);
+
+  // Make sure #0 and #1 were given different memory locations (because they
+  // will both have offset=0, in different arenas.)
+  EXPECT_NE((*graph.tensors())[0].data.raw, (*graph.tensors())[1].data.raw);
+
+  // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), 0);
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
+  EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithDynamicTensor) {
+  TestGraph graph({0, -1, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2}, {}},   // First op
+                      {{2, 0}, {4}, {5}},  // Second op, with temporary
+                      {{4, -1}, {3}, {}}   // Third op, with optional
+                  },
+                  {3});
+
+  // Make #1 dynaic so it does not get allocated.
+  (*graph.tensors())[1].allocation_type = kTfLiteDynamic;
+
+  SetGraph(&graph);
+  Execute(0, 10);
+
+  EXPECT_EQ((*graph.tensors())[1].data.raw, nullptr);
+
+  // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
+  EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) {
+  TestGraph graph({0, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2, 3}, {}},
+                      {{2, 0}, {4, 5}, {6}},
+                      {{1, -1}, {7}, {}},
+                      {{7, 3}, {8}, {9}},
+                      {{4, 5, 8}, {10}, {}},
+                  },
+                  {10});
+  SetGraph(&graph);
+
+  auto is_unallocated = [&](int tensor_index) {
+    // TODO(ahentz): We'd to use nullptr to represent unallocated tensors, but
+    // the current code still points them all to the beginning fo the alloc
+    // (that is, zero offset).
+    // return (*graph.tensors())[tensor_index].data.raw == nullptr;
+    return GetOffset(tensor_index) == 0;
+  };
+
+  // The allocation plan is made at the beginning and is independent of
+  // the execution steps. Here's the allocation order:
+  //   Op0: +0 +1 +2 +3
+  //   Op1: +6 +4 +5 -6 -0 -2
+  //   Op2: +7 -1
+  //   Op3: +9 +8 -9 -3 -7
+  //   Op4: +10 -4 -5 -8
+
+  Execute(0, 0);
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+  EXPECT_TRUE(is_unallocated(6));
+  EXPECT_TRUE(is_unallocated(4));
+  EXPECT_TRUE(is_unallocated(5));
+  EXPECT_TRUE(is_unallocated(7));
+  EXPECT_TRUE(is_unallocated(9));
+  EXPECT_TRUE(is_unallocated(8));
+  EXPECT_TRUE(is_unallocated(10));
+
+  Execute(1, 1);
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+  EXPECT_TRUE(is_unallocated(7));
+  EXPECT_TRUE(is_unallocated(9));
+  EXPECT_TRUE(is_unallocated(8));
+  EXPECT_TRUE(is_unallocated(10));
+
+  Execute(2, 2);
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+  // Here's an interesting allocation. Even though #6 requires only 21 bytes,
+  // its deallocation freed up 24 bytes due to the alignment requirements in
+  // the arena. That means we can fit #7 in the same space!
+  EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
+  EXPECT_TRUE(is_unallocated(9));
+  EXPECT_TRUE(is_unallocated(8));
+  EXPECT_TRUE(is_unallocated(10));
+
+  Execute(3, 3);
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+  EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
+  // The deallocation of #0, #1 and #2 freed up 24 bytes but that's not enough
+  // for #9, so it goes at the end.
+  EXPECT_EQ(GetOffset(9), GetOffsetAfter(5));
+  EXPECT_EQ(GetOffset(8), GetOffsetAfter(9));
+  EXPECT_TRUE(is_unallocated(10));
+
+  Execute(4, 4);
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+  EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
+  EXPECT_EQ(GetOffset(9), GetOffsetAfter(5));
+  EXPECT_EQ(GetOffset(8), GetOffsetAfter(9));
+  // There's just enough space at the beginning for #10 due to the
+  // deallocation of #0, #1, #2 and #3 (total 36 bytes, #10 needs
+  // only 33.)
+  EXPECT_EQ(GetOffset(10), 0);
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  // ::tflite::LogToStderr();
+  FLAGS_logtostderr = true;
+
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/graph_info.h b/tensorflow/contrib/lite/graph_info.h

new file mode 100644 (file)

index 0000000..5481aed
--- /dev/null
+++ b/tensorflow/contrib/lite/graph_info.h
@@ -0,0 +1,53 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_GRAPH_INFO_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_GRAPH_INFO_H_
+
+#include <vector>
+
+#include "tensorflow/contrib/lite/context.h"
+
+namespace tflite {
+
+// Basic information about an inference graph, where execution nodes
+// are connected via tensors.
+class GraphInfo {
+ public:
+  virtual ~GraphInfo() {}
+
+  // Total number of tensors in the graph.
+  virtual size_t num_tensors() const = 0;
+
+  // Returns a tensor given its index which is expected to be between 0 and
+  // num_tensors().
+  virtual TfLiteTensor* tensor(size_t index) = 0;
+
+  // Total number of nodes in the graph.
+  virtual size_t num_nodes() const = 0;
+
+  // Returns a node given its index which is expected to be between 0 and
+  // num_nodes().
+  virtual const TfLiteNode& node(size_t index) const = 0;
+
+  // Returns the indices of the input tensors.
+  virtual const std::vector<int>& inputs() const = 0;
+
+  // Returns the indices of the output tensors.
+  virtual const std::vector<int>& outputs() const = 0;
+};
+
+}  // namespace tflite
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_GRAPH_INFO_H_
diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc

index 954e236ac8f0c8c59a9d20d62e66b3aa1164ecc1..5f5981e45a20a2c79ea1a2ba08345e831ce194da 100644 (file)
--- a/tensorflow/contrib/lite/interpreter.cc
+++ b/tensorflow/contrib/lite/interpreter.cc
@@ -18,16 +18,16 @@ limitations under the License.
  #include <cstdarg>
  #include <cstdint>
  #include <cstring>
+#include "tensorflow/contrib/lite/arena_planner.h"
  #include "tensorflow/contrib/lite/context.h"
  #include "tensorflow/contrib/lite/error_reporter.h"
+#include "tensorflow/contrib/lite/graph_info.h"
  #include "tensorflow/contrib/lite/kernels/gemm_support.h"
+#include "tensorflow/contrib/lite/memory_planner.h"
  #include "tensorflow/contrib/lite/nnapi_delegate.h"
  
  namespace {
  
-// Memory allocation tuning
-constexpr const int kDefaultArenaAlignment = 64;
-constexpr const int kDefaultTensorAlignment = 4;
  // std::vector preallocation tuning.
  constexpr const int kSlotsToReserve = 128;
  
@@ -35,10 +35,33 @@ constexpr const int kSlotsToReserve = 128;
  
  namespace tflite {
  
+// A trivial implementation of GraphInfo around the Interpreter.
+class InterpreterInfo : public GraphInfo {
+ public:
+  explicit InterpreterInfo(Interpreter* interpreter)
+      : interpreter_(interpreter) {}
+
+  size_t num_tensors() const override { return interpreter_->tensors_size(); }
+  TfLiteTensor* tensor(size_t index) override {
+    return interpreter_->tensor(index);
+  }
+  size_t num_nodes() const override { return interpreter_->nodes_size(); }
+  const TfLiteNode& node(size_t index) const override {
+    return interpreter_->node_and_registration(index)->first;
+  }
+  const std::vector<int>& inputs() const override {
+    return interpreter_->inputs();
+  }
+  const std::vector<int>& outputs() const override {
+    return interpreter_->outputs();
+  }
+
+ public:
+  Interpreter* interpreter_;
+};
+
  Interpreter::Interpreter(ErrorReporter* error_reporter)
-    : arena_(kDefaultArenaAlignment),
-      persistent_arena_(kDefaultArenaAlignment),
-      error_reporter_(error_reporter ? error_reporter
+    : error_reporter_(error_reporter ? error_reporter
                                       : DefaultErrorReporter()) {
    context_.impl_ = static_cast<void*>(this);
    context_.ResizeTensor = ResizeTensor;
@@ -50,7 +73,7 @@ Interpreter::Interpreter(ErrorReporter* error_reporter)
    // Reserve some space for the tensors to avoid excessive resizing.
    tensors_.reserve(kSlotsToReserve);
    nodes_and_registration_.reserve(kSlotsToReserve);
-  next_allocate_node_id_ = 0;
+  next_node_to_prepare_ = 0;
    UseNNAPI(false);
  }
  
@@ -128,181 +151,6 @@ TfLiteStatus Interpreter::BytesRequired(TfLiteType type, const int* dims,
    return kTfLiteOk;
  }
  
-TfLiteStatus Interpreter::AllocateTensorsWhoseSizesAreKnown() {
-  if (!consistent_) {
-    ReportError(&context_, "AllocateTensors() called on inconsistent model.");
-    return kTfLiteError;
-  }
-  if (next_allocate_node_id_ == nodes_and_registration_.size() && invokable_) {
-    return kTfLiteOk;
-  }
-  allocs_and_refcounts_.resize(context_.tensors_size);
-
-  int new_next_allocate_node_id = next_allocate_node_id_;
-  invokable_ = false;
-
-  // Allocate graph input nodes.
-  if (next_allocate_node_id_ == 0) {
-    for (int i = 0; i < inputs_.size(); ++i) {
-      int tensor_index = inputs_[i];
-      if (tensor_index == kOptionalTensor) {
-        continue;
-      }
-      TfLiteTensor& tensor = context_.tensors[tensor_index];
-      if (tensor.allocation_type == kTfLiteArenaRw) {
-        TF_LITE_ENSURE_OK(
-            &context_,
-            arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes,
-                            &allocs_and_refcounts_[tensor_index].alloc));
-      }
-    }
-    // Add 1 to output tensors, so they will not get overwritten.
-    for (int i = 0; i < outputs_.size(); ++i) {
-      allocs_and_refcounts_[outputs_[i]].count++;
-    }
-  }
-
-  // Count references to node input tensors, and resize node-referenced tensors
-  // until we encounter a node that has a dynamic output tensor.
-  for (int k = next_allocate_node_id_; k < nodes_and_registration_.size();
-       k++) {
-    new_next_allocate_node_id++;
-    TfLiteNode& node = nodes_and_registration_[k].first;
-    const TfLiteRegistration& registration = nodes_and_registration_[k].second;
-    if (OpPrepare(registration, &node) == kTfLiteError) {
-      return kTfLiteError;
-    }
-
-    TfLiteIntArray* node_inputs = node.inputs;
-    for (int i = 0; i < node_inputs->size; ++i) {
-      int tensor_index = node_inputs->data[i];
-      if (tensor_index != kOptionalTensor) {
-        allocs_and_refcounts_[node_inputs->data[i]].count++;
-      }
-    }
-
-    // Discontinue if the node has dynamic outputs.
-    bool has_unallocated_dynamic_tensor = false;
-    TfLiteIntArray* node_outputs = node.outputs;
-    for (int i = 0; i < node_outputs->size; ++i) {
-      TfLiteTensor& tensor = context_.tensors[node_outputs->data[i]];
-      if (tensor.allocation_type == kTfLiteDynamic) {
-        has_unallocated_dynamic_tensor = true;
-        break;
-      }
-    }
-    if (has_unallocated_dynamic_tensor) {
-      break;
-    }
-  }
-
-  // Allocate graph persistent outputs, e.g. RNN cell states, etc.
-  for (int k = next_allocate_node_id_; k < new_next_allocate_node_id; k++) {
-    TfLiteNode& node = nodes_and_registration_[k].first;
-
-    // Go through output tensors and allocate the persistent ones first.
-    TfLiteIntArray* node_outputs = node.outputs;
-    for (int i = 0; i < node_outputs->size; ++i) {
-      int tensor_index = node_outputs->data[i];
-      TfLiteTensor& tensor = context_.tensors[tensor_index];
-      if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
-        TF_LITE_ENSURE_OK(&context_,
-                          persistent_arena_.Allocate(
-                              &context_, kDefaultTensorAlignment, tensor.bytes,
-                              &allocs_and_refcounts_[tensor_index].alloc));
-      }
-    }
-  }
-
-  // Go through the graph in execution order.
-  for (int k = next_allocate_node_id_; k < new_next_allocate_node_id; k++) {
-    TfLiteNode& node = nodes_and_registration_[k].first;
-
-    // First allocate output tensors.
-    TfLiteIntArray* node_outputs = node.outputs;
-    for (int i = 0; i < node_outputs->size; ++i) {
-      int tensor_index = node_outputs->data[i];
-      TfLiteTensor& tensor = context_.tensors[tensor_index];
-      if (tensor.allocation_type == kTfLiteArenaRw) {
-        TF_LITE_ENSURE_OK(
-            &context_,
-            arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes,
-                            &allocs_and_refcounts_[tensor_index].alloc));
-      }
-    }
-    // Then the temporaries, in two passes. First allocate them all, them
-    // deallocate them.
-    TfLiteIntArray* node_temporaries = node.temporaries;
-    for (int i = 0; i < node_temporaries->size; ++i) {
-      int tensor_index = node_temporaries->data[i];
-      TfLiteTensor& tensor = context_.tensors[tensor_index];
-      if (tensor.allocation_type == kTfLiteArenaRw) {
-        TF_LITE_ENSURE_OK(
-            &context_,
-            arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes,
-                            &allocs_and_refcounts_[tensor_index].alloc));
-      }
-    }
-    for (int i = 0; i < node_temporaries->size; ++i) {
-      int tensor_index = node_temporaries->data[i];
-      TfLiteTensor& tensor = context_.tensors[tensor_index];
-      allocs_and_refcounts_[tensor_index].count--;
-      if (tensor.allocation_type == kTfLiteArenaRw &&
-          allocs_and_refcounts_[tensor_index].count == 0) {
-        TF_LITE_ENSURE_OK(
-            &context_,
-            arena_.Deallocate(&context_,
-                              allocs_and_refcounts_[tensor_index].alloc));
-      }
-    }
-
-    // Then process the node's inputs.
-    TfLiteIntArray* node_inputs = node.inputs;
-    for (int i = 0; i < node_inputs->size; ++i) {
-      int tensor_index = node_inputs->data[i];
-      if (tensor_index == kOptionalTensor) {
-        continue;
-      }
-      TfLiteTensor& tensor = context_.tensors[tensor_index];
-
-      // Decrease reference count and deallocate if not needed anymore.
-      allocs_and_refcounts_[tensor_index].count--;
-      if (tensor.allocation_type == kTfLiteArenaRw &&
-          allocs_and_refcounts_[tensor_index].count == 0) {
-        TF_LITE_ENSURE_OK(
-            &context_,
-            arena_.Deallocate(&context_,
-                              allocs_and_refcounts_[tensor_index].alloc));
-      }
-    }
-  }
-
-  // Resize the buffer and commit the arena.
-  TF_LITE_ENSURE_OK(&context_, arena_.Commit(&context_));
-  TF_LITE_ENSURE_OK(&context_, persistent_arena_.Commit(&context_));
-
-  // Rewire the tensors to use the underlying arena buffer.
-  for (int i = 0; i < context_.tensors_size; ++i) {
-    TfLiteTensor& tensor = context_.tensors[i];
-    if (tensor.allocation_type == kTfLiteArenaRw) {
-      TF_LITE_ENSURE_OK(
-          &context_,
-          arena_.ResolveAlloc(&context_, allocs_and_refcounts_[i].alloc,
-                              &tensor.data.raw));
-    }
-    if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
-      TF_LITE_ENSURE_OK(
-          &context_,
-          persistent_arena_.ResolveAlloc(
-              &context_, allocs_and_refcounts_[i].alloc, &tensor.data.raw));
-    }
-  }
-
-  invokable_ = true;
-  next_allocate_node_id_ = new_next_allocate_node_id;
-  return kTfLiteOk;
-}
-
  namespace {
  TfLiteIntArray* convertVectorToTfLiteIntArray(const std::vector<int>& x) {
    TfLiteIntArray* lite = TfLiteIntArrayCreate(x.size());
@@ -312,11 +160,19 @@ TfLiteIntArray* convertVectorToTfLiteIntArray(const std::vector<int>& x) {
  }  // namespace
  
  TfLiteStatus Interpreter::AllocateTensors() {
-  next_allocate_node_id_ = 0;
-  TF_LITE_ENSURE_OK(&context_, arena_.Clear());
-  TF_LITE_ENSURE_OK(&context_, persistent_arena_.Clear());
-  allocs_and_refcounts_.clear();
-  return AllocateTensorsWhoseSizesAreKnown();
+  next_node_to_prepare_ = 0;
+  if (memory_planner_) {
+    TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations());
+  }
+
+  if (!consistent_) {
+    ReportError(&context_, "AllocateTensors() called on inconsistent model.");
+    return kTfLiteError;
+  }
+
+  TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
+  invokable_ = true;
+  return kTfLiteOk;
  }
  
  TfLiteStatus Interpreter::AddNodeWithParameters(
@@ -372,6 +228,57 @@ TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index,
    return ResizeTensorImpl(&context_.tensors[tensor_index], dims_lite);
  }
  
+// Returns true if at least one tensor in the given list is kTfLiteDynamic.
+bool HasDynamicTensor(const TfLiteContext& context,
+                      const TfLiteIntArray* tensors) {
+  for (int i = 0; i < tensors->size; ++i) {
+    const TfLiteTensor& tensor = context.tensors[tensors->data[i]];
+    if (tensor.allocation_type == kTfLiteDynamic) {
+      return true;
+    }
+  }
+  return false;
+}
+
+TfLiteStatus Interpreter::PrepareOpsStartingAt(int first_node,
+                                               int* last_node_prepared) {
+  for (int i = first_node; i < nodes_and_registration_.size(); i++) {
+    TfLiteNode& node = nodes_and_registration_[i].first;
+    const TfLiteRegistration& registration = nodes_and_registration_[i].second;
+    if (OpPrepare(registration, &node) == kTfLiteError) {
+      return kTfLiteError;
+    }
+
+    *last_node_prepared = i;
+
+    // Discontinue if the node has dynamic outputs. Note that we don't
+    // stop for dynamic temporary tensors since they won't affect the
+    // sizes of other tensors in the graph.
+    if (HasDynamicTensor(context_, node.outputs)) {
+      break;
+    }
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus Interpreter::PrepareOpsAndTensors() {
+  if (!memory_planner_) {
+    memory_planner_.reset(new ArenaPlanner(
+        &context_, std::unique_ptr<GraphInfo>(new InterpreterInfo(this))));
+    memory_planner_->PlanAllocations();
+  }
+
+  int last_node_prepared = 0;
+
+  TF_LITE_ENSURE_STATUS(
+      PrepareOpsStartingAt(next_node_to_prepare_, &last_node_prepared));
+  TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations(
+      next_node_to_prepare_, last_node_prepared));
+
+  next_node_to_prepare_ = last_node_prepared + 1;
+  return kTfLiteOk;
+}
+
  TfLiteStatus Interpreter::Invoke() {
    if (!consistent_) {
      ReportError(&context_, "Invoke called on model that is not consistent.");
@@ -384,10 +291,8 @@ TfLiteStatus Interpreter::Invoke() {
  
    TfLiteStatus status = kTfLiteOk;
    if (nnapi_delegate_) {
-    if (AllocateTensorsWhoseSizesAreKnown() == kTfLiteError) {
-      return kTfLiteError;
-    }
-    if (next_allocate_node_id_ == nodes_and_registration_.size()) {
+    TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
+    if (next_node_to_prepare_ == nodes_and_registration_.size()) {
        TF_LITE_ENSURE_OK(&context_, nnapi_delegate_->Invoke(this));
        return kTfLiteOk;
      } else {
@@ -400,14 +305,17 @@ TfLiteStatus Interpreter::Invoke() {
      }
    }
  
+  // Invocations are always done in node order.
+  // Note that calling Invoke repeatedly will cause the original memory plan to
+  // be reused, unless either ResizeInputTensor() or AllocateTensors() has been
+  // called.
+  // TODO(b/71913981): we should force recalculation in the presence of dynamic
+  // tensors, because they may have new value which in turn may affect shapes
+  // and allocations.
    for (int i = 0; i < nodes_and_registration_.size(); i++) {
-    // Ensure we have allocated up to this node. The point of this is to
-    // allocate as much as possible before running any evaluation, but
-    // dynamic shapes can prevent this from being possible.
-    if (i >= next_allocate_node_id_) {
-      if (AllocateTensorsWhoseSizesAreKnown() == kTfLiteError) {
-        return kTfLiteError;
-      }
+    if (i == next_node_to_prepare_) {
+      TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
+      TF_LITE_ENSURE(&context_, next_node_to_prepare_ >= i);
      }
      TfLiteNode& node = nodes_and_registration_[i].first;
      const TfLiteRegistration& registration = nodes_and_registration_[i].second;
diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h

index 65c61e44bee48535f884a3afaddc691972f5e04b..38dd402e8a971fd0aab51e98610ad12131441862 100644 (file)
--- a/tensorflow/contrib/lite/interpreter.h
+++ b/tensorflow/contrib/lite/interpreter.h
@@ -23,7 +23,7 @@ limitations under the License.
  #include "tensorflow/contrib/lite/allocation.h"
  #include "tensorflow/contrib/lite/context.h"
  #include "tensorflow/contrib/lite/error_reporter.h"
-#include "tensorflow/contrib/lite/simple_memory_arena.h"
+#include "tensorflow/contrib/lite/memory_planner.h"
  
  namespace tflite {
  
@@ -49,13 +49,6 @@ constexpr TfLiteType typeToTfLiteType<unsigned char>() {
    return kTfLiteUInt8;
  }
  
-struct ArenaAllocRefCount {
-  ArenaAllocRefCount() : alloc(), count(0) {}
-
-  ArenaAlloc alloc;
-  int count;
-};
-
  // Forward declare since NNAPIDelegate uses Interpreter.
  class NNAPIDelegate;
  
@@ -276,9 +269,17 @@ class Interpreter {
      return op_reg.invoke(&context_, node);
    }
  
-  // Allocate tensors whose sizes are known in order of nodes. Discontinue when
-  // we encounter a node that has a dynamic output tensor.
-  TfLiteStatus AllocateTensorsWhoseSizesAreKnown();
+  // Call OpPrepare() for as many ops as possible, allocating memory for their
+  // tensors. If an op containing dynamic tensors is found, preparation will be
+  // postponed until this function is called again. This allows the interpreter
+  // to wait until Invoke() to resolve the sizes of dynamic tensors.
+  TfLiteStatus PrepareOpsAndTensors();
+
+  // Call OpPrepare() for all ops starting at 'first_node'. Stop when a
+  // dynamic tensors is found or all ops have been prepared. Fill
+  // 'last_node_prepared' with the id of the op containing dynamic tensors, or
+  // the last in the graph.
+  TfLiteStatus PrepareOpsStartingAt(int first_node, int* last_node_prepared);
  
    // Tensors needed by the interpreter. Use `AddTensors` to add more blank
    // tensor entries. Note, `tensors_.data()` needs to be synchronized to the
@@ -325,17 +326,6 @@ class Interpreter {
    std::vector<std::pair<TfLiteNode, TfLiteRegistration>>
        nodes_and_registration_;
  
-  // Raw memory buffer that is allocated for all temporary and graph outputs.
-  // that are declared kTfLiteArenaRw.
-  SimpleMemoryArena arena_;
-
-  // Raw memory buffer that is allocated for persistent tensors that are
-  // declared as kTfLiteArenaRwPersistent.
-  SimpleMemoryArena persistent_arena_;
-
-  // Stores allocation and reference counts of all tensors.
-  std::vector<ArenaAllocRefCount> allocs_and_refcounts_;
-
    // Whether the model is consistent. That is to say if the inputs and outputs
    // of every node and the global inputs and outputs are valid indexes into
    // the tensor array.
@@ -356,7 +346,7 @@ class Interpreter {
    // The error reporter delegate that tflite will forward queries errors to.
    ErrorReporter* error_reporter_;
  
-  // Next node to allocate output tensors.
+  // Index of the next node to prepare.
    // During Invoke(), Interpreter will allocate input tensors first, which are
    // known to be fixed size. Then it will allocate outputs from nodes as many
    // as possible. When there is a node that produces dynamic sized tensor.
@@ -364,10 +354,12 @@ class Interpreter {
    // node id, and execute the node to generate the output tensor before continue
    // to allocate successors. This process repeats until all nodes are executed.
    // NOTE: this relies on the order of nodes that is in topological order.
-  int next_allocate_node_id_;
+  int next_node_to_prepare_;
  
    // Whether to delegate to NN API
    std::unique_ptr<NNAPIDelegate> nnapi_delegate_;
+
+  std::unique_ptr<MemoryPlanner> memory_planner_;
  };
  
  }  // namespace tflite
diff --git a/tensorflow/contrib/lite/memory_planner.h b/tensorflow/contrib/lite/memory_planner.h

new file mode 100644 (file)

index 0000000..b11d86c
--- /dev/null
+++ b/tensorflow/contrib/lite/memory_planner.h
@@ -0,0 +1,45 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MEMORY_PLANNER_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MEMORY_PLANNER_H_
+
+#include "tensorflow/contrib/lite/context.h"
+
+namespace tflite {
+
+// A MemoryPlanner is responsible for planning and executing a number of
+// memory-related operations that are necessary in TF Lite.
+class MemoryPlanner {
+ public:
+  virtual ~MemoryPlanner() {}
+
+  // Plans the necessary memory allocations. This is the MemoryPlanner's
+  // pre-processing step and is called when the graph structure is known but
+  // actual size of the tensors is not.
+  virtual TfLiteStatus PlanAllocations() = 0;
+
+  // Allocates the necessary memory to execute all nodes in the interval
+  // [first_node, last_node].
+  virtual TfLiteStatus ExecuteAllocations(int first_node, int last_node) = 0;
+
+  // Invalidates allocations made earliers. This is called when tensors sizes
+  // have change. All planned allocations remain, but can't be used until
+  // ExecuteAllocations() is called.
+  virtual TfLiteStatus ResetAllocations() = 0;
+};
+
+}  // namespace tflite
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MEMORY_PLANNER_H_
diff --git a/tensorflow/contrib/lite/simple_memory_arena.h b/tensorflow/contrib/lite/simple_memory_arena.h

index 0d0b7f9ff79bf9fd8a60dbc057d63f44eeaa6396..07a38c42436655d307c89a987ebba4db38eba442 100644 (file)
--- a/tensorflow/contrib/lite/simple_memory_arena.h
+++ b/tensorflow/contrib/lite/simple_memory_arena.h
@@ -68,6 +68,10 @@ class SimpleMemoryArena {
  
    TfLiteStatus Clear();
  
+  int64_t BasePointer() const {
+    return reinterpret_cast<int64_t>(underlying_buffer_aligned_ptr_);
+  }
+
   private:
    bool commited_;
    size_t arena_alignment_;
author	A. Unique TensorFlower <gardener@tensorflow.org>
	Sun, 14 Jan 2018 00:21:55 +0000 (16:21 -0800)
committer	TensorFlower Gardener <gardener@tensorflow.org>
	Sun, 14 Jan 2018 00:25:51 +0000 (16:25 -0800)
tensorflow/contrib/lite/BUILD		patch \| blob \| history
tensorflow/contrib/lite/arena_planner.cc	[new file with mode: 0644]	patch \| blob
tensorflow/contrib/lite/arena_planner.h	[new file with mode: 0644]	patch \| blob
tensorflow/contrib/lite/arena_planner_test.cc	[new file with mode: 0644]	patch \| blob
tensorflow/contrib/lite/graph_info.h	[new file with mode: 0644]	patch \| blob
tensorflow/contrib/lite/interpreter.cc		patch \| blob \| history
tensorflow/contrib/lite/interpreter.h		patch \| blob \| history
tensorflow/contrib/lite/memory_planner.h	[new file with mode: 0644]	patch \| blob
tensorflow/contrib/lite/simple_memory_arena.h		patch \| blob \| history