[Static Runtime] Use F14FastMap/F14FastSet (#63999)

author Hao Lu <hlu@fb.com>

Fri, 27 Aug 2021 08:39:14 +0000 (01:39 -0700)

committer Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>

Fri, 27 Aug 2021 08:40:41 +0000 (01:40 -0700)
author Hao Lu <hlu@fb.com>
Fri, 27 Aug 2021 08:39:14 +0000 (01:39 -0700)
committer Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Fri, 27 Aug 2021 08:40:41 +0000 (01:40 -0700)
diff --git a/torch/csrc/jit/runtime/static/impl.cpp b/torch/csrc/jit/runtime/static/impl.cpp

index cb9342b..b3e1eb1 100644 (file)
--- a/torch/csrc/jit/runtime/static/impl.cpp
+++ b/torch/csrc/jit/runtime/static/impl.cpp
@@ -104,8 +104,8 @@ bool mayContainAlias(AliasDb& db, const Value* a, const Value* b) {
  
  bool mayContainAlias(
      AliasDb& db,
-    const std::unordered_set<const Value*>& a,
-    const std::unordered_set<const Value*>& b) {
+    const FastSet<const Value*>& a,
+    const FastSet<const Value*>& b) {
    std::vector<Value*> as;
    std::vector<Value*> bs;
    as.reserve(a.size());
@@ -122,11 +122,11 @@ bool mayContainAlias(
  }
  
  // Get set of all inputs/outputs/constants (always alive) and their aliases
-std::unordered_set<const Value*> GetAlwaysAliveValues(
+FastSet<const Value*> GetAlwaysAliveValues(
      const std::shared_ptr<torch::jit::Graph>& graph,
      AliasDb& db) {
    // a set of Values whose live-range exceed current inference
-  std::unordered_set<const Value*> always_alive;
+  FastSet<const Value*> always_alive;
  
    // mark inputs, constants, outputs as always_alive
    for (const auto* input : graph->inputs()) {
@@ -148,7 +148,7 @@ std::unordered_set<const Value*> GetAlwaysAliveValues(
      // constants are already in the always_alive set
      if (node->kind() != prim::Constant) {
        for (const auto* v : node->outputs()) {
-        if (mayContainAlias(db, ValueSet{v}, always_alive)) {
+        if (mayContainAlias(db, {v}, always_alive)) {
            always_alive.insert(v);
          }
        }
@@ -158,22 +158,22 @@ std::unordered_set<const Value*> GetAlwaysAliveValues(
  }
  
  //  Map each value to all values that are alive at the same time.
-using LivenessMap = std::unordered_map<const Value*, std::set<const Value*>>;
+using LivenessMap = FastMap<const Value*, std::set<const Value*>>;
  
  //  The algorithm does a traversal of the execution graph
  //  while keeping track of the live values.
  LivenessMap GetLivenessMap(
      const std::shared_ptr<torch::jit::Graph>& graph,
-    const std::unordered_set<const Value*>& always_alive,
+    const FastSet<const Value*>& always_alive,
      AliasDb& db) {
    // map a Value to a set of Values that overlap live-ranges with the Value's
-  std::unordered_map<const Value*, std::set<const Value*>> liveness_map;
+  FastMap<const Value*, std::set<const Value*>> liveness_map;
  
    // map Values to its creation order in graph (Note: only traverse top-level
    // nodes such that nodes under control-flows are represented by top-level
    // block nodes)
    std::vector<const Value*> values_in_creation_order;
-  std::unordered_map<const Value*, size_t> values_to_idx_in_creation_order;
+  FastMap<const Value*, size_t> values_to_idx_in_creation_order;
    for (const auto* node : graph->nodes()) {
      for (const auto* v : node->outputs()) {
        values_to_idx_in_creation_order[v] = values_in_creation_order.size();
@@ -184,10 +184,10 @@ LivenessMap GetLivenessMap(
    // presence of a Value in live_values_use_chain means the Value alive
    // Value mapped to set of Nodes that may use the Value (i.e., use-chain of
    // Value)
-  std::unordered_map<const Value*, std::set<const Node*>> live_values_use_chain;
+  FastMap<const Value*, std::set<const Node*>> live_values_use_chain;
    // Node mapped to set of Values that the Node may use (i.e., def-chain of node
    // inputs)
-  std::unordered_map<const Node*, std::set<const Value*>> live_nodes_def_chain;
+  FastMap<const Node*, std::set<const Value*>> live_nodes_def_chain;
  
    // add v to the current liveness_map
    std::function<void(const Value* v)> add_live_value_fn = [&](const Value* v) {
@@ -320,12 +320,12 @@ LivenessMap GetLivenessMap(
  std::pair<std::vector<const Value*>, std::vector<const Value*>>
  GetMemoryPlanningCandidates(const std::shared_ptr<torch::jit::Graph>& graph) {
    // for determinism
-  std::unordered_set<const Value*> seen_values;
+  FastSet<const Value*> seen_values;
    std::vector<const Value*> all_values;
-  std::unordered_set<const Value*> can_reuse;
+  FastSet<const Value*> can_reuse;
    // values used by unsupported ops (as either inputs or outputs)
    // these need to be removed from "can_reuse" after analyzing all nodes
-  std::unordered_set<const Value*> cannot_reuse;
+  FastSet<const Value*> cannot_reuse;
    for (auto* n : graph->nodes()) {
      bool can_reuse_inputs_outputs = canReuseInputsOutputs(n);
      for (const auto* v : n->inputs()) {
@@ -388,10 +388,9 @@ GetMemoryPlanningCandidates(const std::shared_ptr<torch::jit::Graph>& graph) {
  //
  // NB: This is a deterministic implementation, which makes it easier to tune
  // and debug.
-std::unordered_map<const Value*, std::vector<const Value*>>
-GenerateSameStorageValues(
+FastMap<const Value*, std::vector<const Value*>> GenerateSameStorageValues(
      const LivenessMap& alive_during,
-    const std::unordered_set<const Value*>& always_alive,
+    const FastSet<const Value*>& always_alive,
      const std::pair<std::vector<const Value*>, std::vector<const Value*>>&
          optimizable,
      AliasDb& db) {
@@ -399,8 +398,7 @@ GenerateSameStorageValues(
    const auto& all_values = optimizable.second;
  
    // map Value* to a set Value* that can share the same storage with it
-  std::unordered_map<const Value*, std::vector<const Value*>>
-      same_storage_values;
+  FastMap<const Value*, std::vector<const Value*>> same_storage_values;
  
    // make new_v and old_v map to the same storage (i.e., add to each other's
    // same_storage_values set)
@@ -589,9 +587,9 @@ StaticModule::StaticModule(
    }
  
    // map Value* to IValue (from inputs or prim::Constant) or null
-  std::unordered_map<Value*, IValue*> value_to_ivalue;
+  FastMap<Value*, IValue*> value_to_ivalue;
    // map Value* to its SSA definition IR
-  std::unordered_map<Value*, DefInfo> value_to_ssa_def;
+  FastMap<Value*, DefInfo> value_to_ssa_def;
  
    // N inputs map to the first N entries in storage
    for (const auto i : c10::irange(graph_->inputs().size())) {
@@ -1165,8 +1163,7 @@ void StaticRuntime::check_for_memory_leak(bool output_returned) {
      TORCH_CHECK(inputs_[i].isNone(), "Input ", i, " was not cleaned up");
    }
  
-  std::unordered_set<const IValue*> output_ivalues(
-      outputs_.begin(), outputs_.end());
+  FastSet<const IValue*> output_ivalues(outputs_.begin(), outputs_.end());
    for (const auto n : c10::irange(nodes_.size())) {
      auto& pnode = nodes_[n];
      for (const auto i : c10::irange(pnode.outputs().size())) {
@@ -1202,13 +1199,13 @@ void StaticRuntime::check_for_memory_leak(bool output_returned) {
  
  static void assign_storage_to_managed_tensors(
      StaticRuntime* runtime,
-    const std::unordered_set<const Value*>& managed_tensor_values,
-    const std::unordered_map<const Value*, std::vector<const Value*>>&
+    const FastSet<const Value*>& managed_tensor_values,
+    const FastMap<const Value*, std::vector<const Value*>>&
          value_to_same_storage_values,
      std::vector<std::pair<size_t, std::vector<at::Tensor*>>>& managed_tensors) {
    // map Value to index to managed_storage, where multiple values can
    // map to the same index (i.e., sharing the same storage)
-  std::unordered_map<const Value*, size_t> value_to_storage_idx;
+  FastMap<const Value*, size_t> value_to_storage_idx;
  
    // Snapshot of the current memory state
    for (auto& pnode : runtime->nodes()) {
@@ -1218,19 +1215,21 @@ static void assign_storage_to_managed_tensors(
        if (managed_tensor_values.count(val)) {
          TORCH_CHECK(ival.isTensor());
          at::Tensor* tensor = &ival.toTensor();
-
-        if (value_to_storage_idx.count(val)) {
-          managed_tensors[value_to_storage_idx[val]].second.emplace_back(
-              tensor);
+        auto f = value_to_storage_idx.find(val);
+        if (f != value_to_storage_idx.end()) {
+          auto storage_idx = f->second;
+          managed_tensors[storage_idx].second.emplace_back(tensor);
          } else {
            auto p =
                std::make_pair<size_t, std::vector<at::Tensor*>>(0, {tensor});
            managed_tensors.emplace_back(std::move(p));
            // first of a group, update the value_to_storage_idx map with the
            // index
-          if (value_to_same_storage_values.count(val)) {
+          auto f = value_to_same_storage_values.find(val);
+          if (f != value_to_same_storage_values.end()) {
              auto storage_idx = managed_tensors.size() - 1;
-            for (const auto* v : value_to_same_storage_values.at(val)) {
+            const auto& same_storage_values = f->second;
+            for (const auto* v : same_storage_values) {
                value_to_storage_idx[v] = storage_idx;
              }
            }
@@ -1242,14 +1241,14 @@ static void assign_storage_to_managed_tensors(
  
  MemoryPlanner::MemoryPlanner(
      StaticRuntime* runtime,
-    const std::unordered_map<const Value*, std::vector<const Value*>>&
+    const FastMap<const Value*, std::vector<const Value*>>&
          value_to_same_storage_values,
-    const std::unordered_set<const Value*>& external_values,
+    const FastSet<const Value*>& external_values,
      bool enable_out_variant,
      bool manage_graph_output_memory) {
    // collect register indices of outputs of ops with out variant
-  std::unordered_set<const Value*> managed_tensor_values;
-  std::unordered_set<const Value*> leaked_values;
+  FastSet<const Value*> managed_tensor_values;
+  FastSet<const Value*> leaked_values;
    if (enable_out_variant) {
      for (ProcessedNode& pnode : runtime->nodes()) {
        if (pnode.has_out_variant()) {
@@ -1260,7 +1259,7 @@ MemoryPlanner::MemoryPlanner(
            }
            // Types are stored in the underlying TorchScript IR
            const auto& type = out_v->type();
-          if (type->cast<TensorType>()) {
+          if (type->castRaw<TensorType>()) {
              managed_tensor_values.insert(out_v);
            } else if (isOptimizableContainerType(pnode.node())) {
              // We "leak" certain container types because their allocations take
@@ -1273,7 +1272,7 @@ MemoryPlanner::MemoryPlanner(
    }
  
    // collect unmanaged output ivalues
-  std::unordered_set<IValue*> unmanaged_ivalues;
+  FastSet<IValue*> unmanaged_ivalues;
    for (ProcessedNode& pnode : runtime->nodes()) {
      for (const auto i : c10::irange(pnode.outputs().size())) {
        // Types are stored in the underlying TorchScript IR
@@ -1295,9 +1294,11 @@ MemoryPlanner::MemoryPlanner(
    }
  
    // copy to unmanaged_ivalues_
-  for (IValue* out : unmanaged_ivalues) {
-    unmanaged_ivalues_.emplace_back(out);
-  }
+  unmanaged_ivalues_.reserve(unmanaged_ivalues.size());
+  unmanaged_ivalues_.insert(
+      unmanaged_ivalues_.begin(),
+      unmanaged_ivalues.begin(),
+      unmanaged_ivalues.end());
  
    if (enable_out_variant) {
      ::torch::jit::assign_storage_to_managed_tensors(
diff --git a/torch/csrc/jit/runtime/static/impl.h b/torch/csrc/jit/runtime/static/impl.h

index b16cfef..6cff047 100644 (file)
--- a/torch/csrc/jit/runtime/static/impl.h
+++ b/torch/csrc/jit/runtime/static/impl.h
@@ -9,9 +9,26 @@
  #include <torch/csrc/jit/passes/freeze_module.h>
  #include <torch/csrc/jit/passes/inliner.h>
  
+#ifdef FBCODE_CAFFE2
+#include <folly/container/F14Map.h>
+#include <folly/container/F14Set.h>
+#endif
+
  namespace torch {
  namespace jit {
  
+#ifdef FBCODE_CAFFE2
+template <typename Key, typename Value>
+using FastMap = folly::F14FastMap<Key, Value>;
+template <typename Key>
+using FastSet = folly::F14FastSet<Key>;
+#else
+template <typename Key, typename Value>
+using FastMap = std::unordered_map<Key, Value>;
+template <typename Key>
+using FastSet = std::unordered_set<Key>;
+#endif
+
  TORCH_API bool canEnableStaticRuntime(
      const std::shared_ptr<torch::jit::Graph>& graph);
  
@@ -127,7 +144,7 @@ class TORCH_API StaticModule {
    size_t num_inputs() const;
    size_t num_outputs() const;
  
-  const std::unordered_map<int, std::vector<DefInfo>>& index_map() const {
+  const FastMap<int, std::vector<DefInfo>>& index_map() const {
      return node_inputs_ssa_def_map_;
    }
  
@@ -147,12 +164,12 @@ class TORCH_API StaticModule {
      return schema_;
    }
  
-  const std::unordered_map<const Value*, std::vector<const Value*>>&
+  const FastMap<const Value*, std::vector<const Value*>>&
    values_share_same_storage() const {
      return value_to_same_storage_values_;
    }
  
-  const std::unordered_set<const Value*>& external_values() const {
+  const FastSet<const Value*>& external_values() const {
      return external_values_;
    }
  
@@ -178,14 +195,14 @@ class TORCH_API StaticModule {
    // a vector of ssa_defs corresponding to graph->outputs()
    std::vector<DefInfo> output_ssa_defs_;
    // map a node idx (in graph order) to a vector of ssa_defs for node inputs
-  std::unordered_map<int, std::vector<DefInfo>> node_inputs_ssa_def_map_;
+  FastMap<int, std::vector<DefInfo>> node_inputs_ssa_def_map_;
  
    // Bookkeeping for MemoryPlanner in StaticRuntime
    // values whose live-time exceeds that of running one inference (e.g., input,
    // output, prim::Constants, and their aliases)
-  std::unordered_set<const Value*> external_values_;
+  FastSet<const Value*> external_values_;
    // map a value to the set of values that may share the same storage with it
-  std::unordered_map<const Value*, std::vector<const Value*>>
+  FastMap<const Value*, std::vector<const Value*>>
        value_to_same_storage_values_;
  };
  
@@ -323,8 +340,8 @@ class MemoryPlanner {
   public:
    explicit MemoryPlanner(
        StaticRuntime* runtime,
-      const std::unordered_map<const Value*, std::vector<const Value*>>&,
-      const std::unordered_set<const Value*>& external_values,
+      const FastMap<const Value*, std::vector<const Value*>>&,
+      const FastSet<const Value*>& external_values,
        bool enable_out_variant,
        bool manage_graph_output_memory);
    // disable copying and moving
diff --git a/torch/csrc/jit/runtime/static/ops.cpp b/torch/csrc/jit/runtime/static/ops.cpp

index 484c4b0..54c0456 100644 (file)
--- a/torch/csrc/jit/runtime/static/ops.cpp
+++ b/torch/csrc/jit/runtime/static/ops.cpp
@@ -16,6 +16,7 @@
  #include <ATen/native/quantized/cpu/qembeddingbag.h>
  #include <c10/util/irange.h>
  #include <torch/csrc/jit/ir/ir.h>
+#include <torch/csrc/jit/runtime/static/impl.h>
  #include <torch/csrc/jit/runtime/static/te_wrapper.h>
  #include <torch/csrc/jit/runtime/vararg_functions.h>
  #include <torch/csrc/jit/tensorexpr/ir.h>
@@ -288,7 +289,7 @@ bool disableUnsafeMathOp(const char* op_name) {
    // not guarantee bit exactness vs the jit interpreter. Note aten::relu is not
    // included even though it uses NNC because the results of relu should always
    // match.
-  static const std::unordered_set<std::string> fast_ops{
+  static const FastSet<std::string> fast_ops{
        "aten::add", "aten::tanh", "aten::sigmoid", "aten::logit"};
    return fast_ops.count(op_name) > 0;
  }
diff --git a/torch/csrc/jit/runtime/static/te_wrapper.cpp b/torch/csrc/jit/runtime/static/te_wrapper.cpp

index d8b494c..acd1fb7 100644 (file)
--- a/torch/csrc/jit/runtime/static/te_wrapper.cpp
+++ b/torch/csrc/jit/runtime/static/te_wrapper.cpp
@@ -2,6 +2,7 @@
  
  #include <ATen/CPUFunctions.h>
  #include <torch/csrc/jit/ir/ir.h>
+#include <torch/csrc/jit/runtime/static/impl.h>
  
  namespace torch {
  namespace jit {
@@ -79,8 +80,8 @@ std::mutex& getNNCCacheMutex() {
    return nncCacheMutex;
  }
  
-std::unordered_map<NodeKind, std::shared_ptr<TEWrapper>>& getNNCCache() {
-  static std::unordered_map<NodeKind, std::shared_ptr<TEWrapper>> nncCache;
+FastMap<NodeKind, std::shared_ptr<TEWrapper>>& getNNCCache() {
+  static FastMap<NodeKind, std::shared_ptr<TEWrapper>> nncCache;
    return nncCache;
  }
author	Hao Lu <hlu@fb.com>
	Fri, 27 Aug 2021 08:39:14 +0000 (01:39 -0700)
committer	Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
	Fri, 27 Aug 2021 08:40:41 +0000 (01:40 -0700)
torch/csrc/jit/runtime/static/impl.cpp		patch \| blob \| history
torch/csrc/jit/runtime/static/impl.h		patch \| blob \| history
torch/csrc/jit/runtime/static/ops.cpp		patch \| blob \| history
torch/csrc/jit/runtime/static/te_wrapper.cpp		patch \| blob \| history