bool mayContainAlias(
AliasDb& db,
- const std::unordered_set<const Value*>& a,
- const std::unordered_set<const Value*>& b) {
+ const FastSet<const Value*>& a,
+ const FastSet<const Value*>& b) {
std::vector<Value*> as;
std::vector<Value*> bs;
as.reserve(a.size());
}
// Get set of all inputs/outputs/constants (always alive) and their aliases
-std::unordered_set<const Value*> GetAlwaysAliveValues(
+FastSet<const Value*> GetAlwaysAliveValues(
const std::shared_ptr<torch::jit::Graph>& graph,
AliasDb& db) {
// a set of Values whose live-range exceed current inference
- std::unordered_set<const Value*> always_alive;
+ FastSet<const Value*> always_alive;
// mark inputs, constants, outputs as always_alive
for (const auto* input : graph->inputs()) {
// constants are already in the always_alive set
if (node->kind() != prim::Constant) {
for (const auto* v : node->outputs()) {
- if (mayContainAlias(db, ValueSet{v}, always_alive)) {
+ if (mayContainAlias(db, {v}, always_alive)) {
always_alive.insert(v);
}
}
}
// Map each value to all values that are alive at the same time.
-using LivenessMap = std::unordered_map<const Value*, std::set<const Value*>>;
+using LivenessMap = FastMap<const Value*, std::set<const Value*>>;
// The algorithm does a traversal of the execution graph
// while keeping track of the live values.
LivenessMap GetLivenessMap(
const std::shared_ptr<torch::jit::Graph>& graph,
- const std::unordered_set<const Value*>& always_alive,
+ const FastSet<const Value*>& always_alive,
AliasDb& db) {
// map a Value to a set of Values that overlap live-ranges with the Value's
- std::unordered_map<const Value*, std::set<const Value*>> liveness_map;
+ FastMap<const Value*, std::set<const Value*>> liveness_map;
// map Values to its creation order in graph (Note: only traverse top-level
// nodes such that nodes under control-flows are represented by top-level
// block nodes)
std::vector<const Value*> values_in_creation_order;
- std::unordered_map<const Value*, size_t> values_to_idx_in_creation_order;
+ FastMap<const Value*, size_t> values_to_idx_in_creation_order;
for (const auto* node : graph->nodes()) {
for (const auto* v : node->outputs()) {
values_to_idx_in_creation_order[v] = values_in_creation_order.size();
// presence of a Value in live_values_use_chain means the Value alive
// Value mapped to set of Nodes that may use the Value (i.e., use-chain of
// Value)
- std::unordered_map<const Value*, std::set<const Node*>> live_values_use_chain;
+ FastMap<const Value*, std::set<const Node*>> live_values_use_chain;
// Node mapped to set of Values that the Node may use (i.e., def-chain of node
// inputs)
- std::unordered_map<const Node*, std::set<const Value*>> live_nodes_def_chain;
+ FastMap<const Node*, std::set<const Value*>> live_nodes_def_chain;
// add v to the current liveness_map
std::function<void(const Value* v)> add_live_value_fn = [&](const Value* v) {
std::pair<std::vector<const Value*>, std::vector<const Value*>>
GetMemoryPlanningCandidates(const std::shared_ptr<torch::jit::Graph>& graph) {
// for determinism
- std::unordered_set<const Value*> seen_values;
+ FastSet<const Value*> seen_values;
std::vector<const Value*> all_values;
- std::unordered_set<const Value*> can_reuse;
+ FastSet<const Value*> can_reuse;
// values used by unsupported ops (as either inputs or outputs)
// these need to be removed from "can_reuse" after analyzing all nodes
- std::unordered_set<const Value*> cannot_reuse;
+ FastSet<const Value*> cannot_reuse;
for (auto* n : graph->nodes()) {
bool can_reuse_inputs_outputs = canReuseInputsOutputs(n);
for (const auto* v : n->inputs()) {
//
// NB: This is a deterministic implementation, which makes it easier to tune
// and debug.
-std::unordered_map<const Value*, std::vector<const Value*>>
-GenerateSameStorageValues(
+FastMap<const Value*, std::vector<const Value*>> GenerateSameStorageValues(
const LivenessMap& alive_during,
- const std::unordered_set<const Value*>& always_alive,
+ const FastSet<const Value*>& always_alive,
const std::pair<std::vector<const Value*>, std::vector<const Value*>>&
optimizable,
AliasDb& db) {
const auto& all_values = optimizable.second;
// map Value* to a set Value* that can share the same storage with it
- std::unordered_map<const Value*, std::vector<const Value*>>
- same_storage_values;
+ FastMap<const Value*, std::vector<const Value*>> same_storage_values;
// make new_v and old_v map to the same storage (i.e., add to each other's
// same_storage_values set)
}
// map Value* to IValue (from inputs or prim::Constant) or null
- std::unordered_map<Value*, IValue*> value_to_ivalue;
+ FastMap<Value*, IValue*> value_to_ivalue;
// map Value* to its SSA definition IR
- std::unordered_map<Value*, DefInfo> value_to_ssa_def;
+ FastMap<Value*, DefInfo> value_to_ssa_def;
// N inputs map to the first N entries in storage
for (const auto i : c10::irange(graph_->inputs().size())) {
TORCH_CHECK(inputs_[i].isNone(), "Input ", i, " was not cleaned up");
}
- std::unordered_set<const IValue*> output_ivalues(
- outputs_.begin(), outputs_.end());
+ FastSet<const IValue*> output_ivalues(outputs_.begin(), outputs_.end());
for (const auto n : c10::irange(nodes_.size())) {
auto& pnode = nodes_[n];
for (const auto i : c10::irange(pnode.outputs().size())) {
static void assign_storage_to_managed_tensors(
StaticRuntime* runtime,
- const std::unordered_set<const Value*>& managed_tensor_values,
- const std::unordered_map<const Value*, std::vector<const Value*>>&
+ const FastSet<const Value*>& managed_tensor_values,
+ const FastMap<const Value*, std::vector<const Value*>>&
value_to_same_storage_values,
std::vector<std::pair<size_t, std::vector<at::Tensor*>>>& managed_tensors) {
// map Value to index to managed_storage, where multiple values can
// map to the same index (i.e., sharing the same storage)
- std::unordered_map<const Value*, size_t> value_to_storage_idx;
+ FastMap<const Value*, size_t> value_to_storage_idx;
// Snapshot of the current memory state
for (auto& pnode : runtime->nodes()) {
if (managed_tensor_values.count(val)) {
TORCH_CHECK(ival.isTensor());
at::Tensor* tensor = &ival.toTensor();
-
- if (value_to_storage_idx.count(val)) {
- managed_tensors[value_to_storage_idx[val]].second.emplace_back(
- tensor);
+ auto f = value_to_storage_idx.find(val);
+ if (f != value_to_storage_idx.end()) {
+ auto storage_idx = f->second;
+ managed_tensors[storage_idx].second.emplace_back(tensor);
} else {
auto p =
std::make_pair<size_t, std::vector<at::Tensor*>>(0, {tensor});
managed_tensors.emplace_back(std::move(p));
// first of a group, update the value_to_storage_idx map with the
// index
- if (value_to_same_storage_values.count(val)) {
+ auto f = value_to_same_storage_values.find(val);
+ if (f != value_to_same_storage_values.end()) {
auto storage_idx = managed_tensors.size() - 1;
- for (const auto* v : value_to_same_storage_values.at(val)) {
+ const auto& same_storage_values = f->second;
+ for (const auto* v : same_storage_values) {
value_to_storage_idx[v] = storage_idx;
}
}
MemoryPlanner::MemoryPlanner(
StaticRuntime* runtime,
- const std::unordered_map<const Value*, std::vector<const Value*>>&
+ const FastMap<const Value*, std::vector<const Value*>>&
value_to_same_storage_values,
- const std::unordered_set<const Value*>& external_values,
+ const FastSet<const Value*>& external_values,
bool enable_out_variant,
bool manage_graph_output_memory) {
// collect register indices of outputs of ops with out variant
- std::unordered_set<const Value*> managed_tensor_values;
- std::unordered_set<const Value*> leaked_values;
+ FastSet<const Value*> managed_tensor_values;
+ FastSet<const Value*> leaked_values;
if (enable_out_variant) {
for (ProcessedNode& pnode : runtime->nodes()) {
if (pnode.has_out_variant()) {
}
// Types are stored in the underlying TorchScript IR
const auto& type = out_v->type();
- if (type->cast<TensorType>()) {
+ if (type->castRaw<TensorType>()) {
managed_tensor_values.insert(out_v);
} else if (isOptimizableContainerType(pnode.node())) {
// We "leak" certain container types because their allocations take
}
// collect unmanaged output ivalues
- std::unordered_set<IValue*> unmanaged_ivalues;
+ FastSet<IValue*> unmanaged_ivalues;
for (ProcessedNode& pnode : runtime->nodes()) {
for (const auto i : c10::irange(pnode.outputs().size())) {
// Types are stored in the underlying TorchScript IR
}
// copy to unmanaged_ivalues_
- for (IValue* out : unmanaged_ivalues) {
- unmanaged_ivalues_.emplace_back(out);
- }
+ unmanaged_ivalues_.reserve(unmanaged_ivalues.size());
+ unmanaged_ivalues_.insert(
+ unmanaged_ivalues_.begin(),
+ unmanaged_ivalues.begin(),
+ unmanaged_ivalues.end());
if (enable_out_variant) {
::torch::jit::assign_storage_to_managed_tensors(
#include <torch/csrc/jit/passes/freeze_module.h>
#include <torch/csrc/jit/passes/inliner.h>
+#ifdef FBCODE_CAFFE2
+#include <folly/container/F14Map.h>
+#include <folly/container/F14Set.h>
+#endif
+
namespace torch {
namespace jit {
+#ifdef FBCODE_CAFFE2
+template <typename Key, typename Value>
+using FastMap = folly::F14FastMap<Key, Value>;
+template <typename Key>
+using FastSet = folly::F14FastSet<Key>;
+#else
+template <typename Key, typename Value>
+using FastMap = std::unordered_map<Key, Value>;
+template <typename Key>
+using FastSet = std::unordered_set<Key>;
+#endif
+
TORCH_API bool canEnableStaticRuntime(
const std::shared_ptr<torch::jit::Graph>& graph);
size_t num_inputs() const;
size_t num_outputs() const;
- const std::unordered_map<int, std::vector<DefInfo>>& index_map() const {
+ const FastMap<int, std::vector<DefInfo>>& index_map() const {
return node_inputs_ssa_def_map_;
}
return schema_;
}
- const std::unordered_map<const Value*, std::vector<const Value*>>&
+ const FastMap<const Value*, std::vector<const Value*>>&
values_share_same_storage() const {
return value_to_same_storage_values_;
}
- const std::unordered_set<const Value*>& external_values() const {
+ const FastSet<const Value*>& external_values() const {
return external_values_;
}
// a vector of ssa_defs corresponding to graph->outputs()
std::vector<DefInfo> output_ssa_defs_;
// map a node idx (in graph order) to a vector of ssa_defs for node inputs
- std::unordered_map<int, std::vector<DefInfo>> node_inputs_ssa_def_map_;
+ FastMap<int, std::vector<DefInfo>> node_inputs_ssa_def_map_;
// Bookkeeping for MemoryPlanner in StaticRuntime
// values whose live-time exceeds that of running one inference (e.g., input,
// output, prim::Constants, and their aliases)
- std::unordered_set<const Value*> external_values_;
+ FastSet<const Value*> external_values_;
// map a value to the set of values that may share the same storage with it
- std::unordered_map<const Value*, std::vector<const Value*>>
+ FastMap<const Value*, std::vector<const Value*>>
value_to_same_storage_values_;
};
public:
explicit MemoryPlanner(
StaticRuntime* runtime,
- const std::unordered_map<const Value*, std::vector<const Value*>>&,
- const std::unordered_set<const Value*>& external_values,
+ const FastMap<const Value*, std::vector<const Value*>>&,
+ const FastSet<const Value*>& external_values,
bool enable_out_variant,
bool manage_graph_output_memory);
// disable copying and moving