Symbol name() const {
return typename_;
}
-
+ const std::vector<IValue>& slots() const {
+ return slots_;
+ }
private:
const Symbol typename_;
std::vector<IValue> slots_;
attributeTypes_.push_back(type);
}
+ at::ArrayRef<std::string> attributeNames() const {
+ return attributeNames_;
+ }
+
at::ArrayRef<TypePtr> containedTypes() const override {
return attributeTypes_;
}
+ // generate a refined version of this class.
+ // It has the same name but the slot Types are subtypes of
+ // the original slots. It is only valid to refine a class type in a context
+ // where it is know that there are not assignments to the objects slots
+ // that would invalidate the refinement.
+ // These variants are not registered in the global class table.
+ ClassTypePtr refine(at::ArrayRef<TypePtr> refined_slots) const;
static const TypeKind Kind = TypeKind::ClassType;
private:
return ptr;
}
+ClassTypePtr ClassType::refine(at::ArrayRef<TypePtr> refined_slots) const {
+ auto ptr = ClassTypePtr(new ClassType(typename_, module_));
+ AT_ASSERT(numAttributes() == refined_slots.size());
+ for(size_t i = 0; i < attributeNames_.size(); ++i) {
+ AT_ASSERT(refined_slots[i]->isSubtypeOf(attributeTypes_[i]));
+ ptr->addAttribute(attributeNames_[i], refined_slots[i]);
+ }
+ return ptr;
+}
+
ClassTypePtr ClassType::get(const std::string& name) {
return getRegistry().getType(name);
}
at::empty_strided(2, 2, at::CPU(at::kFloat).options()), true);
auto b_var = autograd::make_variable(
at::empty_strided(2, 2, at::CPU(at::kFloat).options()), false);
- setInputTypes(*graph, ArgumentSpec(true, {a_var, b_var}, 2));
+
+ ArgumentSpecCreator asc(*graph);
+ asc.setInputTypes(*graph, asc.create(true, {a_var, b_var}));
+
PropagateInputShapes(graph);
PropagateRequiresGrad(graph);
def test_tuple_specialization(self):
@torch.jit.script
- def f(t):
- # type: (Tuple[Tensor, Tensor]) -> Tensor
- x, y = t
+ def f(t, s):
+ # type: (Tuple[Tensor, Tuple[int, Tensor]], str) -> Tensor
+ x, t2 = t
+ _, y = t2
return x + y
- t = torch.randn(2, 2), torch.randn(2, 2)
- f(t)
- graph = f.graph_for(t)
+ t = torch.randn(2, 2), (1, torch.randn(2, 2)),
+ f(t, "hi")
+ graph = f.graph_for(t, "hi")
input_types = list(next(graph.inputs()).type().elements())
- for t in input_types:
- self.assertEqual(t.kind(), 'DimensionedTensorType')
+ self.assertEqual(input_types[0].kind(), 'DimensionedTensorType')
+ self.assertEqual(input_types[1].elements()[1].kind(), 'DimensionedTensorType')
def test_constant_prop_simple(self):
@torch.jit.script
# test that shape analysis is written correctly for sum with IntArrayRef[1] dim argument
self.run_pass('constant_propagation', func.graph)
self.run_pass('constant_propagation', func2.graph)
- torch._C._jit_pass_shape_analysis(
- func.graph, (torch.zeros(1, 1, 1, 1, 4),), False)
- torch._C._jit_pass_shape_analysis(
- func2.graph, (torch.zeros(1, 1, 1, 1, 4),), False)
- self.assertTrue(func.graph.findNode("aten::sum").output().type().kind()
+ g = func._get_method('forward').propagate_shapes((torch.zeros(1, 1, 1, 1, 4),), False)
+ g2 = func2._get_method('forward').propagate_shapes((torch.zeros(1, 1, 1, 1, 4),), False)
+ self.assertTrue(g.findNode("aten::sum").output().type().kind()
== "DimensionedTensorType")
- self.assertTrue(func2.graph.findNode("aten::sum").output().type().kind()
+ self.assertTrue(g2.findNode("aten::sum").output().type().kind()
== "DimensionedTensorType")
def test_cat(self):
torch.mul(x, y, out=z)
return z
- torch._C._jit_pass_shape_analysis(
- test.graph, (torch.zeros(2, 1), torch.zeros(1, 2), torch.zeros(1, 1, 1)), False)
- self.assertTrue(next(test.graph.outputs()).type() == TensorType.get())
+ graph = test._get_method('forward').propagate_shapes(
+ (torch.zeros(2, 1), torch.zeros(1, 2), torch.zeros(1, 1, 1)), False)
+ self.assertTrue(next(graph.outputs()).type() == TensorType.get())
out_op_graph_input()
def test_resize():
after_resize_alias = b.add_(1)
return after_resize_alias
- g = test.graph
- self.run_pass('constant_propagation', g)
- torch._C._jit_pass_shape_analysis(
- g, (torch.zeros(1, 1),), False)
+ self.run_pass('constant_propagation', test.graph)
+ g = test._get_method('forward').propagate_shapes((torch.zeros(1, 1),), False)
resize_node = g.findNode("aten::resize_")
# first input and output of b.resize_ is b
self.assertTrue(next(resize_node.inputs()).type() == TensorType.get())
g = test.graph
self.run_pass('constant_propagation', g)
- torch._C._jit_pass_shape_analysis(
- g, (torch.zeros(1, 1),), False)
+ g = test._get_method('forward').propagate_shapes((torch.zeros(1, 1),), False)
# x doesn't alias a resized op so it shouldn't be set to base Tensor type
self.assertTrue(next(g.inputs()).type() != TensorType.get())
return x.view(T, B, C)
x = torch.randn(3, 1, 5, requires_grad=True)
- graph = torch.jit.script(fn).graph
- torch._C._jit_pass_shape_analysis(graph, (x,), False)
+ fn = torch.jit.script(fn)
+ graph = fn._get_method('forward').propagate_shapes((x,), False)
a = next(graph.outputs()).type().kind()
self.assertTrue(next(graph.outputs()).type().kind() != 'TensorType')
return torch.cat(c)
b = torch.zeros(2, 4)
- test_list.graph.propagate_shapes((b,), False)
+ test_list._get_method('forward').propagate_shapes((b,), False)
def test_if_supertype(self):
@torch.jit.script
b = torch.zeros(2, 4, dtype=torch.long)
c = torch.zeros(2, 4, dtype=torch.float)
- tensor_unifying.graph.propagate_shapes((a, b, c), False)
- if_outputs = list(tensor_unifying.graph.findNode("prim::If").outputs())
+ graph = tensor_unifying._get_method('forward').propagate_shapes((a, b, c), False)
+ if_outputs = list(graph.findNode("prim::If").outputs())
self.assertTrue(if_outputs[0].type().str() == "Float(*, *)")
self.assertTrue(if_outputs[1].type().str() == "Tensor")
self.assertTrue(if_outputs[2].type().str() == "Tensor")
self.assertEqual(x, f2.x)
self.assertEqual(y, f2.y)
+ def test_class_specialization(self):
+ @torch.jit.script # noqa: B903
+ class Foo(object):
+ def __init__(self, x, y):
+ self.x = x
+ self.y = y
+
+ def use_foo(foo, foo2, tup):
+ # type: (Foo, Foo, Tuple[Foo, Foo]) -> Tensor
+ a, b = tup
+ return foo.x + foo2.y + a.x + b.y
+
+ # create from python
+ x = torch.ones(2, 3)
+ y = torch.zeros(2, 3)
+ f = Foo(x, y)
+ f2 = Foo(x * 2, y * 3)
+ f3 = Foo(x * 4, y * 4)
+
+ input = (f, f2, (f, f3))
+ sfoo = self.checkScript(use_foo, input)
+ graphstr = str(sfoo.graph_for(*input))
+ FileCheck().check_count("Double(*, *) = prim::GetAttr", 4).run(graphstr)
+
class TestLogging(JitTestCase):
def test_bump_numeric_counter(self):
"torch/csrc/Exceptions.cpp",
"torch/csrc/jit/autodiff.cpp",
"torch/csrc/jit/attributes.cpp",
+ "torch/csrc/jit/argument_spec.cpp",
"torch/csrc/jit/constants.cpp",
"torch/csrc/jit/node_hashing.cpp",
"torch/csrc/jit/export.cpp",
${TORCH_SRC_DIR}/csrc/autograd/VariableTypeManual.cpp
${TORCH_SRC_DIR}/csrc/jit/autodiff.cpp
${TORCH_SRC_DIR}/csrc/jit/attributes.cpp
+ ${TORCH_SRC_DIR}/csrc/jit/argument_spec.cpp
${TORCH_SRC_DIR}/csrc/jit/export.cpp
${TORCH_SRC_DIR}/csrc/jit/pickler.cpp
${TORCH_SRC_DIR}/csrc/jit/generated/register_aten_ops_0.cpp
--- /dev/null
+
+#include <torch/csrc/jit/argument_spec.h>
+
+namespace torch {
+namespace jit {
+
+void ArgumentSpecCreator::scan(
+ const TypePtr& typ,
+ size_t depth,
+ const WrittenSlots& written_slots) {
+ auto finishAggregate = [&](size_t pos) {
+ // it is possible after all the work we did to scan this aggregate,
+ // we found no tensors to specialize. In this case, just generate
+ // a skip for the whole aggregate.
+ bool any_spec = std::any_of(
+ instructions_.begin() + pos, instructions_.end(), [](Inst i) {
+ return i == SPECIALIZE_TENSOR;
+ });
+ if (!any_spec) {
+ instructions_[pos] = SKIP;
+ instructions_.resize(pos + 1);
+ } else {
+ instructions_.emplace_back(LEAVE);
+ }
+ };
+ // the simple vm that scans instructions_ has a limited stack depth,
+ // this prevents going deeper than that.
+ if (depth >= DEPTH_LIMIT) {
+ instructions_.emplace_back(SKIP);
+ }
+ if (typ->isSubtypeOf(TensorType::get())) {
+ num_tensors_++;
+ instructions_.emplace_back(SPECIALIZE_TENSOR);
+ } else if (auto tup = typ->cast<TupleType>()) {
+ size_t pos = instructions_.size();
+ instructions_.emplace_back(ENTER_TUPLE);
+ for (const auto& elem : tup->containedTypes()) {
+ scan(elem, depth + 1, written_slots);
+ }
+ finishAggregate(pos);
+ } else if (auto cls = typ->cast<ClassType>()) {
+ size_t pos = instructions_.size();
+ instructions_.emplace_back(ENTER_OBJECT);
+ for (size_t i = 0; i < cls->numAttributes(); ++i) {
+ auto key = cls->name() + cls->attributeNames().at(i);
+ // it is only safe to specialize because someone might have written to it
+ if (!written_slots.count(key)) {
+ scan(cls->containedTypes().at(i), depth + 1, written_slots);
+ } else {
+ instructions_.emplace_back(SKIP);
+ }
+ }
+ finishAggregate(pos);
+ } else {
+ instructions_.emplace_back(SKIP);
+ }
+};
+
+// this is a coarse-grained guarentee that the slots of a class will not be
+// modified by the function. It works fine for things that used be read-only
+// modules, but will be overly conservative when some classes are written to.
+// Doing alias analysis and looking for writes to the class would be more
+// accurate.
+static void scanWrittenSlots(
+ Block* block,
+ ArgumentSpecCreator::WrittenSlots& written_slots) {
+ for (Node* n : block->nodes()) {
+ if (n->kind() == prim::SetAttr) {
+ if (auto cls = n->inputs().at(0)->type()->cast<ClassType>()) {
+ written_slots.insert(cls->name() + n->s(attr::name));
+ }
+ }
+ for (Block* subblock : n->blocks()) {
+ scanWrittenSlots(subblock, written_slots);
+ }
+ if (n->hasAttribute(attr::Subgraph)) {
+ scanWrittenSlots(n->g(attr::Subgraph)->block(), written_slots);
+ }
+ }
+}
+
+ArgumentSpecCreator::ArgumentSpecCreator(Graph& graph)
+ : num_inputs_(graph.inputs().size()) {
+ WrittenSlots written_slots;
+ scanWrittenSlots(graph.block(), written_slots);
+ for (Value* input : graph.inputs()) {
+ scan(input->type(), 0, written_slots);
+ }
+}
+
+void ArgumentSpecCreator::dump() const {
+ for (Inst inst : instructions_) {
+ switch (inst) {
+ case LEAVE:
+ std::cout << "] ";
+ break;
+ case ENTER_TUPLE:
+ std::cout << "Tuple[";
+ break;
+ case ENTER_OBJECT:
+ std::cout << "Object[";
+ break;
+ case SKIP:
+ std::cout << "Skip ";
+ break;
+ case SPECIALIZE_TENSOR:
+ std::cout << "SpecializeTensor ";
+ break;
+ }
+ }
+ std::cout << "\n";
+}
+
+ArgumentSpec ArgumentSpecCreator::create(bool with_grad, const Stack& input)
+ const {
+ ArgumentSpec spec(num_tensors_);
+ const IValue* stack[DEPTH_LIMIT]; // The stack of IValue lists
+ // The stack gets initialized with the input list
+ stack[0] = last(input, num_inputs_).begin();
+ size_t stack_top = 0; // offset to the top of the stack
+ for (Inst inst : instructions_) {
+ switch (inst) {
+ case SPECIALIZE_TENSOR:
+ // consume a tensor and add to the argspec
+ spec.addTensor(*stack[stack_top]++, with_grad);
+ break;
+ case ENTER_TUPLE: {
+ // consume tuple
+ const IValue* iv = stack[stack_top]++;
+ AT_ASSERT(iv->isTuple());
+ // see [argspec refcounting]
+ auto p = *reinterpret_cast<const at::ivalue::Tuple* const*>(iv);
+ auto tup_ptr = &p->elements()[0];
+ // push list of tuple elements to the stack
+ stack[++stack_top] = tup_ptr;
+ } break;
+ case ENTER_OBJECT: {
+ // consume object
+ const IValue* iv = stack[stack_top]++;
+ AT_ASSERT(iv->isObject());
+ iv->toObject();
+ // see [argspec refcounting]
+ auto p = *reinterpret_cast<const at::ivalue::Object* const*>(iv);
+ auto obj_ptr = &p->slots()[0];
+ // push list of object elements to the stack
+ stack[++stack_top] = obj_ptr;
+ } break;
+ case SKIP:
+ // consume and skip an element
+ stack[stack_top]++;
+ break;
+ case LEAVE:
+ --stack_top;
+ break;
+ }
+ }
+ return spec;
+}
+
+// For every input of a given graph, returns a most detailed type that can be
+// inferred for it based on this ArgumentSpec.
+std::vector<TypePtr> ArgumentSpecCreator::getSpecializedTypes(
+ Graph& graph,
+ const ArgumentSpec& spec) const {
+ auto input_types =
+ fmap(graph.inputs(), [](Value* input) { return input->type(); });
+ std::vector<std::vector<TypePtr>> result_stack;
+ result_stack.emplace_back();
+ std::vector<const TypePtr*> input_stack = {input_types.data()};
+ std::vector<std::function<TypePtr()>> aggregate_creators;
+
+ size_t arg_spec_offset = 0; // number of specialized tensors seen so far
+
+ for (Inst inst : instructions_) {
+ switch (inst) {
+ case SPECIALIZE_TENSOR: {
+ input_stack.back()++;
+ auto& arg = spec.at(arg_spec_offset++);
+ if (!arg.defined()) {
+ result_stack.back().emplace_back(AutogradZeroTensorType::get());
+ } else {
+ result_stack.back().emplace_back(DimensionedTensorType::create(
+ arg.type(),
+ ConvertIntToCPUOrCUDA(arg.device()),
+ arg.dim(),
+ arg.requires_grad()));
+ }
+ } break;
+ case ENTER_TUPLE: {
+ auto tup = (*input_stack.back()++)->expect<TupleType>();
+ input_stack.emplace_back(tup->elements().data());
+ result_stack.emplace_back();
+ aggregate_creators.emplace_back(
+ [&] { return TupleType::create(result_stack.back()); });
+ } break;
+ case ENTER_OBJECT: {
+ auto cls = (*input_stack.back()++)->expect<ClassType>();
+ input_stack.emplace_back(cls->containedTypes().data());
+ result_stack.emplace_back();
+ aggregate_creators.emplace_back(
+ [&result_stack, cls] { return cls->refine(result_stack.back()); });
+ } break;
+ case SKIP:
+ result_stack.back().emplace_back(*input_stack.back()++);
+ break;
+ case LEAVE:
+ TypePtr result = aggregate_creators.back()();
+ result_stack.pop_back();
+ aggregate_creators.pop_back();
+ input_stack.pop_back();
+ result_stack.back().emplace_back(std::move(result));
+ break;
+ }
+ }
+ AT_ASSERT(result_stack.size() == 1);
+ return result_stack.back();
+}
+
+void ArgumentSpecCreator::setInputTypes(Graph& g, const ArgumentSpec& spec)
+ const {
+ auto input_types = getSpecializedTypes(g, spec);
+ auto inputs = g.inputs();
+ for (size_t i = 0; i < inputs.size(); ++i) {
+ inputs[i]->setType(input_types[i]);
+ }
+}
+
+} // namespace jit
+} // namespace torch
#pragma once
+#include <ATen/core/jit_type.h>
+#include <ATen/core/stack.h>
#include <torch/csrc/autograd/variable.h>
#include <torch/csrc/jit/ir.h>
-#include <ATen/core/stack.h>
-#include <ATen/core/jit_type.h>
#include <torch/csrc/jit/variable_tensor_list.h>
#include <torch/csrc/utils/hash.h>
#include <iostream>
friend struct ArgumentSpec;
using plain_data_type = uint32_t;
- bool isTensor() const {
- return is_tensor_;
- }
bool defined() const {
return defined_;
}
operator TypePtr() const {
if (!defined())
return TensorType::get();
- return DimensionedTensorType::create(type(), ConvertIntToCPUOrCUDA(device()), dim());
+ return DimensionedTensorType::create(
+ type(), ConvertIntToCPUOrCUDA(device()), dim());
}
private:
- unsigned is_tensor_ : 1;
unsigned defined_ : 1;
unsigned requires_grad_ : 1;
unsigned : 5;
"ArgumentInfo is expected to be a 32-bit struct");
struct ArgumentSpec {
- ArgumentSpec(
- bool with_grad,
- at::ArrayRef<IValue> inputs,
- size_t num_flat_inputs) {
+ ArgumentSpec(size_t num_flat_inputs) {
hash_code = num_flat_inputs;
- args.resize(num_flat_inputs);
- size_t offset = 0;
- for (const auto& i : inputs) {
- addInput(i, offset, with_grad);
- }
- AT_ASSERT(offset <= num_flat_inputs);
+ args.reserve(num_flat_inputs);
}
- void addInput(const IValue& input, size_t& offset, bool with_grad) {
- auto& arg = args.at(offset);
+ void addTensor(const IValue& input, bool with_grad) {
+ AT_ASSERT(input.isTensor());
+ args.emplace_back();
+ auto& arg = args.back();
// Initialize all fields to 0. This is convenient, because e.g.
// requires_grad() can be checked even on tensors AND will make
// padding bits all 0s.
std::memset(&arg, 0, sizeof(ArgumentInfo));
- if (input.isTensor()) {
- at::Tensor t = input.toTensor();
- if ((arg.defined_ = t.defined())) {
- arg.requires_grad_ = with_grad && autograd::Variable(t).requires_grad();
- arg.dim_ = t.dim();
- arg.device_ = t.is_cuda() ? t.get_device() : -1;
- arg.type_ = static_cast<unsigned>(t.scalar_type());
- }
-
- arg.is_tensor_ = true;
- combineHash(arg);
- offset++;
- } else if (input.isTuple()) {
- for (const IValue& elem : input.toTuple()->elements()) {
- addInput(elem, offset, with_grad);
- }
- } else {
- // NB: no need to set is_tensor to false, because we memset the struct to
- // 0 above
- combineHash(arg);
- offset++;
+ // [argspec refcounting] reinterpret the IValue to avoid having to refcount
+ // the Tensor microbenchmarks
+ // https://github.com/zdevito/pytorch/commit/21e7200a0a0fc456bea2f10e95b1781f83933d10
+ // show overhead in extra refcounting along this path
+ const at::Tensor* t = reinterpret_cast<const at::Tensor*>(&input);
+ if ((arg.defined_ = t->defined())) {
+ arg.requires_grad_ = with_grad && autograd::Variable(*t).requires_grad();
+ arg.dim_ = t->dim();
+ arg.device_ = t->is_cuda() ? t->get_device() : -1;
+ arg.type_ = static_cast<unsigned>(t->scalar_type());
}
+ combineHash(arg);
}
void combineHash(const ArgumentInfo& arg) {
size_t hashCode() const {
return hash_code;
}
- // For every input of a given graph, returns a most detailed type that can be
- // inferred for it based on this ArgumentSpec.
- std::vector<TypePtr> getTypes(Graph& graph) const {
- size_t offset = 0;
- return fmap(
- graph.inputs(), [&](Value* v) { return fillType(v->type(), offset); });
- }
private:
- TypePtr fillType(TypePtr original, size_t& offset) const {
- if (original->isSubtypeOf(TensorType::get())) {
- auto& arg = args.at(offset++);
- if (!arg.defined())
- return AutogradZeroTensorType::get();
- return DimensionedTensorType::create(
- arg.type(),
- ConvertIntToCPUOrCUDA(arg.device()),
- arg.dim(),
- arg.requires_grad());
- } else if (auto tuple_type = original->cast<TupleType>()) {
- return TupleType::create(fmap(
- tuple_type->elements(),
- [&](const TypePtr& subtype) { return fillType(subtype, offset); }));
- } else {
- offset++;
- return original;
- }
- }
size_t hash_code; // precomputed on construction
std::vector<ArgumentInfo> args;
};
+// ArgumentSpecCreator takes an initial graph and comes up with a set
+// of simple instructions to compute the ArgumentSpec given a set of
+// input tensors.
+struct ArgumentSpecCreator {
+ // instructs acts on a stack of a list of input IValues
+ // at the beginning the stack contains a single list of the inputs to the
+ // function the ENTER_ instructs descend into subobjects and push new lists
+ // onto the stack
+ enum Inst : char {
+ ENTER_TUPLE, // consume a tuple ivalue from the top-most list, and push the
+ // list of its elements onto the stack as a new list
+ ENTER_OBJECT, // same as ENTER_TUPLE, but the input is a class
+ LEAVE, // pop the top-most list from the stack
+ SKIP, // consume an element from the top-most list, and discard
+ SPECIALIZE_TENSOR, // consume a tensor for the top-most list, and
+ // add it to the ArgSpec key being created
+ };
+ ArgumentSpecCreator(Graph& graph);
+ ArgumentSpec create(bool with_grad, const Stack& stack) const;
+ void setInputTypes(Graph& g, const ArgumentSpec& spec) const;
+ std::vector<TypePtr> getSpecializedTypes(
+ Graph& graph,
+ const ArgumentSpec& spec) const;
+ void dump() const;
+ using WrittenSlots = std::unordered_set<std::string>;
+
+ private:
+ static constexpr size_t DEPTH_LIMIT = 128;
+ void scan(
+ const TypePtr& typ,
+ size_t depth,
+ const WrittenSlots& written_slots);
+ size_t num_inputs_;
+ size_t num_tensors_ = 0;
+ std::vector<Inst> instructions_;
+};
+
// CompleteArgumentSpec represents one particular specialization.
// It is designed so that it can be created, hashed, and compared quickly
// since it is used along the hot-path of the JIT to check if the code
return CompleteArgumentInfo(*this, i);
}
-inline void setInputTypes(Graph& g, const ArgumentSpec& spec) {
- auto input_types = spec.getTypes(g);
- auto inputs = g.inputs();
- for (size_t i = 0; i < inputs.size(); ++i) {
- inputs[i]->setType(input_types[i]);
- }
-}
-
} // namespace jit
} // namespace torch
return copy;
}
- static size_t countFlatInputs(const TypePtr& ptr) {
- if (auto optional_type = ptr->cast<OptionalType>()) {
- return countFlatInputs(optional_type->getElementType());
- }
- if (auto tuple_type = ptr->cast<TupleType>()) {
- size_t total = 0;
- for (auto& elem : tuple_type->elements()) {
- total += countFlatInputs(elem);
- }
- return total;
- }
- return 1;
- }
-
- static size_t countFlatInputs(const std::shared_ptr<Graph>& graph) {
- size_t total = 0;
- for (Value* input : graph->inputs()) {
- total += countFlatInputs(input->type());
- }
- return total;
- }
-
inline bool hasMutableOperators(Block* block) {
for (auto n : block->nodes()) {
if (n->kind().is_aten() && n->schema().is_mutable())
// disables all optimization
optimize(optimize),
num_inputs(this->graph->inputs().size()),
- num_flat_inputs(countFlatInputs(graph)),
+ arg_spec_creator_(*graph),
num_outputs(this->graph->outputs().size()) {
- logging::getLogger()->addStatValue(
- logging::runtime_counters::GRAPH_EXECUTORS_CONSTRUCTED, 1.0);
- }
+ logging::getLogger()->addStatValue(
+ logging::runtime_counters::GRAPH_EXECUTORS_CONSTRUCTED, 1.0);
+ }
// entry point where execution begins
void run(Stack& stack) {
std::shared_ptr<Graph> graphFor(const Stack& stack) const {
AT_ASSERT(stack.size() >= num_inputs);
- auto inputs = last(stack, num_inputs);
- ArgumentSpec spec(
- autograd::GradMode::is_enabled(), inputs, num_flat_inputs);
+
+ ArgumentSpec spec =
+ arg_spec_creator_.create(autograd::GradMode::is_enabled(), stack);
if (!optimize) {
AT_CHECK(fallback, "No graph found for given inputs");
const ExecutionPlan& getOrCompile(const Stack& stack) {
// outside lock guard, to minimize the time holding the lock on the fast
// path ArgumentSpec even computes its hashCode here.
- ArgumentSpec spec(
- autograd::GradMode::is_enabled(),
- last(stack, num_inputs),
- num_flat_inputs);
+ ArgumentSpec spec =
+ arg_spec_creator_.create(autograd::GradMode::is_enabled(), stack);
{
std::lock_guard<std::mutex> lock(compile_mutex);
auto it = plan_cache.find(spec);
ExecutionPlan compileSpec(const ArgumentSpec& spec) {
auto opt_graph = graph->copy();
- setInputTypes(*opt_graph, spec);
+ arg_spec_creator_.setInputTypes(*opt_graph, spec);
// Phase 1. Specialize to input definedness (this is very important for
// gradient graphs), and run required passes to bring the graph
auto input_values = fmap(
inputs, [](const IValue& v) { return tracer::getNestedValueTrace(v); });
- ArgumentSpec spec(
- autograd::GradMode::is_enabled(), inputs, num_flat_inputs);
+ ArgumentSpec spec =
+ arg_spec_creator_.create(autograd::GradMode::is_enabled(), stack);
// NB: we could just run the fallback in here and call it a day, but that
// would loose all the control flow information we have in the graph. Thus,
// we run the fallback to get the correct output values, but we will
// tracing and so we only do the type propgation if no concrete types have
// been set.
auto local_graph = this->graph->copy();
- setInputTypes(*local_graph, spec);
+ arg_spec_creator_.setInputTypes(*local_graph, spec);
PropagateInputShapes(local_graph);
auto output_values =
inlineCallTo(*state->graph, *local_graph, input_values);
// Useful for debugging.
const bool optimize;
const size_t num_inputs;
- const size_t num_flat_inputs; // Number of inputs, assuming all tuples would
- // be flattened.
+ ArgumentSpecCreator arg_spec_creator_;
const size_t num_outputs;
// Populated only when optimize is false (and in that case plan_cache will be
[](const std::shared_ptr<Graph>& g) { return Canonicalize(g); })
.def("_jit_pass_lint", LintGraph)
.def(
- "_jit_pass_shape_analysis",
- [](std::shared_ptr<Graph> graph,
- std::vector<at::Tensor> inputs,
- bool with_grad) {
- setInputTypes(
- *graph,
- ArgumentSpec(with_grad, fmap<IValue>(inputs), inputs.size()));
- PropagateInputShapes(graph);
- })
- .def(
"_jit_pass_complete_shape_analysis",
[](std::shared_ptr<Graph> graph, py::tuple inputs, bool with_grad) {
CompleteArgumentSpec spec(
setUnshapedType(node);
return;
}
+ case prim::GetAttr: {
+ auto cls = node->input()->type()->expect<ClassType>();
+ // propagate any type specializations encoded in the type of the class
+ node->output()->setType(cls->getAttribute(node->s(attr::name)));
+ return;
+ }
case aten::_unwrap_optional: {
auto input_ivalue = toIValue(node->input());
if (input_ivalue && input_ivalue->isNone()) {
};
// Requirements:
- // dims : 0 if dim is None, otherwise preserved if keepdim == false or 1 smaller otherwise
- // scalar type : preserved
- // device : preserved
- // tensor inputs : 1
- // tensor outputs : 1
+ // dims : 0 if dim is None, otherwise preserved if keepdim ==
+ // false or 1 smaller otherwise scalar type : preserved device :
+ // preserved tensor inputs : 1 tensor outputs : 1
// Additionally:
// - First input should be the only tensor input
// - Has a bool keepdim argument
[](Node* node) -> type_vec_t {
if (auto dim = node->get<std::vector<int64_t>>(attr::dim)) {
return multidim_reduce_with_postprocess(
- node, /*num_reduced_dim=*/dim->size(), /*upcast_integer=*/false);
+ node,
+ /*num_reduced_dim=*/dim->size(),
+ /*upcast_integer=*/false);
}
return {};
}};
db.dump();
})
.def(
- "propagate_shapes",
- [](std::shared_ptr<Graph> g,
- std::vector<at::Tensor> inputs,
- bool with_grad) {
- setInputTypes(
- *g,
- ArgumentSpec(with_grad, fmap<IValue>(inputs), inputs.size()));
- PropagateInputShapes(g);
- })
- .def(
"_export_onnx",
[](const std::shared_ptr<Graph> g,
const std::map<std::string, at::Tensor>& initializers,
#pragma once
-#include <torch/csrc/autograd/variable.h>
+#include <c10/util/Exception.h>
#include <torch/csrc/autograd/generated/variable_factories.h>
+#include <torch/csrc/autograd/variable.h>
#include <torch/csrc/jit/argument_spec.h>
-#include <c10/util/Exception.h>
#include <torch/csrc/jit/graph_executor.h>
#include <torch/csrc/jit/ir.h>
#include <torch/csrc/jit/named_value.h>
#include <torch/csrc/jit/passes/shape_analysis.h>
-#include <torch/csrc/jit/source_range.h>
#include <torch/csrc/jit/script/slot.h>
-
+#include <torch/csrc/jit/source_range.h>
#include <torch/csrc/WindowsTorchApiMacro.h>
#include <torch/csrc/api/include/torch/ordered_dict.h>
struct Module;
-using ModuleLookup = std::function<std::shared_ptr<Module>(
- const std::vector<std::string>&)>;
+using ModuleLookup =
+ std::function<std::shared_ptr<Module>(const std::vector<std::string>&)>;
struct Method {
Method(
return graph()->addInput()->setType(type);
}
+ static void setInputTensorTypes(Graph& g, const Stack& stack) {
+ AT_ASSERT(stack.size() == g.inputs().size());
+ for (size_t i = 0; i < stack.size(); ++i) {
+ g.inputs().at(i)->setType(
+ DimensionedTensorType::create(stack.at(i).toTensor()));
+ }
+ }
+
std::shared_ptr<Graph> propagate_shapes(
std::vector<at::Tensor> inputs,
bool with_grad = false) {
for (const Slot& inp : initial_ivalues_) {
stack.push_back(*inp);
}
- const auto size = stack.size();
- setInputTypes(*retval, ArgumentSpec(with_grad, stack, size));
+ setInputTensorTypes(*retval, stack);
PropagateInputShapes(retval);
return retval;
}
}
}
if (propagate) {
- setInputTypes(
- *retval,
- ArgumentSpec(with_grad, fmap<IValue>(inputs), inputs.size()));
+ setInputTensorTypes(*retval, fmap<IValue>(inputs));
PropagateInputShapes(retval);
}
AT_ASSERT(retval->inputs().size() == inputs.size());
if (pos < inputs.size()) {
if (!isSubvalueOf(inputs[pos], argument.type())) {
AT_ERROR(
- "Expected value of type ",
- *argument.type(),
- " for argument '",
- argument.name(),
- "' in position ",
- pos,
- ", but instead got value of type ",
- attemptToRecoverType(inputs[pos])->str(),
- ". Declaration: ",
- schema);
+ "Expected value of type ",
+ *argument.type(),
+ " for argument '",
+ argument.name(),
+ "' in position ",
+ pos,
+ ", but instead got value of type ",
+ attemptToRecoverType(inputs[pos])->str(),
+ ". Declaration: ",
+ schema);
}
} else if (argument.default_value()) {
inputs.push_back(*argument.default_value());
const TypePtr& type() const {
return type_;
}
-private:
+
+ private:
const std::string name_;
const TypePtr type_;
std::unique_ptr<IValue> ivalue_;
const torch::OrderedDict<std::string, NamedModule>& get_modules() const {
return modules;
}
- const torch::OrderedDict<std::string, NamedIValue>& get_parameters()
- const {
+ const torch::OrderedDict<std::string, NamedIValue>& get_parameters() const {
return parameters;
}
- const torch::OrderedDict<std::string, NamedIValue>& get_attributes()
- const {
+ const torch::OrderedDict<std::string, NamedIValue>& get_attributes() const {
return attributes;
}
const torch::OrderedDict<std::string, std::unique_ptr<Method>>& get_methods()
if (!kv.value().type()->isSubtypeOf(TensorType::get())) {
continue;
}
- curr->register_buffer(
- kv.key(),
- kv.value().slot()->toTensor());
+ curr->register_buffer(kv.key(), kv.value().slot()->toTensor());
parameter_remap[kv.value().slot()] = curr->find_buffer(kv.key())->slot();
}
for (auto& kv : modules) {