}
}
-void invokeTestRecordFunction(at::Tensor& t) {
- autograd::profiler::GetPackedInputsCallback inputs_cb = [t]() {
- Stack st;
- pack(st, t);
- return st;
- };
- autograd::profiler::RecordFunction guard("test", inputs_cb);
- t.add_(torch::ones_like(t));
+at::Tensor invokeTestRecordFunction(at::Tensor& t) {
+ RECORD_FUNCTION("test", std::vector<c10::IValue>({t}));
+
+ auto t2 = t.pow(2);
+ return t2;
+}
+
+static const auto invokeTestRecordFunction_JIT = R"JIT(
+ def forward(t):
+ t2 = t.pow(2)
+ return t2
+)JIT";
+
+at::Tensor invokeTestRecordFunctionJIT(at::Tensor& t) {
+ RECORD_FUNCTION("test", std::vector<c10::IValue>({t}));
+
+ auto cu = compile(invokeTestRecordFunction_JIT);
+ return cu->get_function("forward")({t}).toTensor();
+}
+
+using TracedTestInputs =
+ std::vector<std::tuple<std::string, std::vector<std::vector<int64_t>>>>;
+
+void checkTracedInputs(const TracedTestInputs& inputs) {
+ bool found_test = false;
+ bool found_pow = false;
+ bool found_mul = false;
+ for (const auto& input : inputs) {
+ const auto& fn = std::get<0>(input);
+ const auto& sizes = std::get<1>(input);
+ if (fn == "test") {
+ found_test = true;
+ AT_CHECK(sizes.size() == 1);
+ AT_CHECK(sizes[0] == std::vector<int64_t>({1, 2, 3}));
+ } else if (fn == "test::pow") {
+ found_pow = true;
+ AT_CHECK(sizes.size() == 2);
+ AT_CHECK(sizes[0] == std::vector<int64_t>({1, 2, 3}));
+ AT_CHECK(sizes[1].empty());
+ } else if (fn.find("::mul") != std::string::npos) {
+ found_mul = true;
+ AT_CHECK(sizes.size() > 1);
+ AT_CHECK(sizes[0] == std::vector<int64_t>({1, 2, 3}));
+ }
+ }
+ AT_CHECK(found_test);
+ AT_CHECK(found_pow);
+ AT_CHECK(found_mul);
}
std::string getFullName(const autograd::profiler::RecordFunction* fn_ptr) {
return full_name;
}
-void invokeTestRecordFunctionNested() {
- autograd::profiler::RecordFunction guard("inner");
-}
-
void testRecordFunction() {
- std::vector<std::vector<int64_t>> input_sizes;
+ // [(fn, [[sizes], [sizes], ...]), ...]
+ TracedTestInputs traced_inputs;
autograd::profiler::pushCallback(
- [&input_sizes](const autograd::profiler::RecordFunction& fn) {
- for (const auto& input : fn.inputs()) {
+ [&traced_inputs](const autograd::profiler::RecordFunction& fn) {
+ auto inputs = fn.inputs();
+ std::vector<std::vector<int64_t>> sizes;
+ for (const auto& input : inputs) {
if (input.isTensor()) {
- std::vector<int64_t> t = input.toTensor().sizes().vec();
- input_sizes.push_back(t);
+ sizes.push_back(input.toTensor().sizes().vec());
+ } else if (input.isScalar()){
+ sizes.push_back(std::vector<int64_t>());
}
}
- });
+ traced_inputs.push_back(
+ std::make_tuple(std::string(getFullName(&fn)), sizes));
+ }, [](const autograd::profiler::RecordFunction&) {}, true);
auto t = torch::randn({1, 2, 3}, at::kCPU);
- invokeTestRecordFunction(t);
+ t.set_requires_grad(true);
+ auto t2 = invokeTestRecordFunction(t);
+ t2.backward();
+ auto eager_inputs = traced_inputs;
+ traced_inputs.clear();
+
+ t = torch::randn({1, 2, 3}, at::kCPU);
+ t.set_requires_grad(true);
+ t2 = invokeTestRecordFunctionJIT(t);
+ t2.backward();
+ auto jit_inputs = traced_inputs;
+ traced_inputs.clear();
autograd::profiler::popCallback();
- AT_CHECK(input_sizes.size() == 1);
- AT_CHECK(input_sizes[0] == at::IntArrayRef({1, 2, 3}));
-
- // test nested RecordFunctions
- std::vector<std::string> nested_names;
- autograd::profiler::pushCallback(
- [&nested_names](const autograd::profiler::RecordFunction& fn) {
- nested_names.push_back(getFullName(&fn));
- });
-
- {
- autograd::profiler::RecordFunction guard("outer");
- invokeTestRecordFunctionNested();
- ;
- }
-
- autograd::profiler::popCallback();
- AT_CHECK(nested_names.size() == 2);
- AT_CHECK(nested_names[0] == "outer");
- AT_CHECK(nested_names[1] == "outer::inner");
+ checkTracedInputs(eager_inputs);
+ checkTracedInputs(jit_inputs);
}
void testAutogradProfiler() {
""")
RECORD_FUNCTION = CodeTemplate("""\
-profiler::RecordFunction profiler("${name}", Function::peek_at_next_sequence_nr());""")
+RECORD_FUNCTION("${name}", std::vector<c10::IValue>({${input_names}}), Function::peek_at_next_sequence_nr());
+""")
SELECT = CodeTemplate("""\
if (${cond}) {
return []
return ['increment_version({});'.format(arg['name']) for arg in differentiable_outputs]
+ def check_record_function_input_type(simple_type):
+ return simple_type in ['Tensor', 'Scalar']
+
+ def record_function_input_names():
+ return ', '.join([
+ arg['name'] for arg in declaration['arguments']
+ if check_record_function_input_type(arg['simple_type'])])
+
env = {}
combined = nested_dict(env, declaration)
body = []
if base_name not in DONT_PROFILE:
- body.append(RECORD_FUNCTION.substitute(combined))
+ input_names = record_function_input_names()
+ body.append(
+ RECORD_FUNCTION.substitute(combined, input_names=input_names))
if strategy != 'use_type':
body.extend(unpack_args(env, declaration))
if requires_derivative:
CONSTRUCTOR = CodeTemplate("""\
[](Stack & stack) {
- autograd::profiler::RecordFunction record("${name}");
${lvalues}
${call}
drop(stack, ${num_inputs});
Operator(
"aten::get_device(Tensor self) -> int",
[](Stack & stack) {
- autograd::profiler::RecordFunction record("get_device");
+ RECORD_FUNCTION("get_device", std::vector<c10::IValue>());
auto result = at::get_device(
(std::move(peek(stack, 0, 1))).toTensor()
);
Operator(
"aten::storage_offset(Tensor self) -> int",
[](Stack & stack) {
- autograd::profiler::RecordFunction record("storage_offset");
+ RECORD_FUNCTION("storage_offset", std::vector<c10::IValue>());
auto result = ((std::move(peek(stack, 0, 1))).toTensor()).storage_offset();
drop(stack, 1);
pack(stack, std::move(result));
Operator(
"aten::is_contiguous(Tensor self) -> bool",
[](Stack & stack) {
- autograd::profiler::RecordFunction record("is_contiguous");
+ RECORD_FUNCTION("is_contiguous", std::vector<c10::IValue>());
auto result = ((std::move(peek(stack, 0, 1))).toTensor()).is_contiguous();
drop(stack, 1);
pack(stack, std::move(result));
}
Tensor VariableType::detach(const Tensor & self) const {
- profiler::RecordFunction profiler("detach");
+ RECORD_FUNCTION("detach", std::vector<c10::IValue>({self}));
+
torch::jit::Node* node = nullptr;
if (jit::tracer::isTracing()) {
auto& graph = jit::tracer::getTracingState()->graph;
}
Tensor & VariableType::detach_(Tensor & self) const {
- profiler::RecordFunction profiler("detach_");
+ RECORD_FUNCTION("detach_", std::vector<c10::IValue>({self}));
+
torch::jit::Node* node = nullptr;
if (jit::tracer::isTracing()) {
auto& graph = jit::tracer::getTracingState()->graph;
/// Evaluates the function on the given inputs and returns the result of the
/// function call.
variable_list operator()(variable_list&& inputs) {
- profiler::RecordFunction rec(this);
+ RECORD_FUNCTION(
+ this, std::vector<c10::IValue>(inputs.begin(), inputs.end()));
+
return apply(std::move(inputs));
}
PyObject *THPFunction_do_forward(THPFunction *self, PyObject *_inputs)
{
HANDLE_TH_ERRORS
- torch::autograd::profiler::RecordFunction record(Py_TYPE(self)->tp_name,
- Function::peek_at_next_sequence_nr());
+ RECORD_FUNCTION(
+ Py_TYPE(self)->tp_name,
+ std::vector<c10::IValue>(),
+ Function::peek_at_next_sequence_nr());
auto info_pair = unpack_input<true>(_inputs);
auto& unpacked_input = info_pair.first;
PyObject *THPFunction_apply(PyObject *cls, PyObject *inputs)
{
HANDLE_TH_ERRORS
- torch::autograd::profiler::RecordFunction record(((PyTypeObject*)cls)->tp_name,
- Function::peek_at_next_sequence_nr());
+ RECORD_FUNCTION(
+ ((PyTypeObject*)cls)->tp_name,
+ std::vector<c10::IValue>(),
+ Function::peek_at_next_sequence_nr());
THPObjectPtr backward_cls(PyObject_GetAttrString(cls, "_backward_cls"));
if (!backward_cls) return nullptr;
namespace torch { namespace autograd { namespace profiler {
namespace {
-bool has_callbacks = false;
std::vector<RecordFunctionCallback> start_callbacks;
std::vector<RecordFunctionCallback> end_callbacks;
+size_t callback_needs_inputs = 0;
thread_local RecordFunction* thread_local_func_ = nullptr;
}
-void pushCallback(RecordFunctionCallback start, RecordFunctionCallback end) {
+void pushCallback(
+ RecordFunctionCallback start,
+ RecordFunctionCallback end,
+ bool needs_inputs) {
start_callbacks.push_back(start);
end_callbacks.push_back(end);
- has_callbacks = true;
-}
-
-void pushCallback(RecordFunctionCallback start) {
- pushCallback(start, [](const RecordFunction&){});
+ if (callback_needs_inputs > 0 || needs_inputs) {
+ ++callback_needs_inputs;
+ }
}
void popCallback() {
}
start_callbacks.pop_back();
end_callbacks.pop_back();
- has_callbacks = !start_callbacks.empty();
+ if (callback_needs_inputs > 0) {
+ --callback_needs_inputs;
+ }
+}
+
+bool hasCallbacks() {
+ return !start_callbacks.empty();
+}
+
+bool needsInputs() {
+ return callback_needs_inputs > 0;
}
-RecordFunction::RecordFunction(Function* fn, GetPackedInputsCallback cb) {
- if (!has_callbacks) {
+void RecordFunction::before(const char* name, int64_t sequence_nr) {
+ if (!hasCallbacks()) {
return;
}
- fn_ = fn;
- name_ = StringView(fn->name());
- sequence_nr_ = fn->sequence_nr();
- inputs_cb_ = cb;
+ AT_ASSERT(!initialized_);
+ name_ = StringView(name);
+ sequence_nr_ = sequence_nr;
+
+ initialized_ = true;
processCallbacks();
}
-RecordFunction::RecordFunction(
- std::string name, int64_t sequence_nr, GetPackedInputsCallback cb) {
- if (!has_callbacks) {
+void RecordFunction::before(std::string name, int64_t sequence_nr) {
+ if (!hasCallbacks()) {
return;
}
+ AT_ASSERT(!initialized_);
name_ = StringView(std::move(name));
sequence_nr_ = sequence_nr;
- inputs_cb_ = cb;
+
+ initialized_ = true;
processCallbacks();
}
-RecordFunction::RecordFunction(
- const char* name, int64_t sequence_nr, GetPackedInputsCallback cb) {
- if (!has_callbacks) {
+void RecordFunction::before(Function* fn, int64_t sequence_nr) {
+ if (!hasCallbacks()) {
return;
}
- name_ = StringView(name);
- sequence_nr_ = sequence_nr;
- inputs_cb_ = cb;
+ AT_ASSERT(!initialized_);
+ fn_ = fn;
+ name_ = StringView(fn->name());
+ sequence_nr_ = (sequence_nr >= 0) ? sequence_nr : fn->sequence_nr();
+
+ initialized_ = true;
processCallbacks();
}
}
RecordFunction::~RecordFunction() {
- if (has_callbacks) {
+ if (initialized_) {
for (const auto& cb : end_callbacks) {
cb(*this);
}
const char* str_ptr_;
};
-using GetPackedInputsCallback = std::function<std::vector<c10::IValue>()>;
-
struct TORCH_API RecordFunction {
- explicit RecordFunction(Function* fn, GetPackedInputsCallback cb = nullptr);
-
- explicit RecordFunction(
- std::string name,
- int64_t current_sequence_nr = -1,
- GetPackedInputsCallback cb = nullptr);
-
- explicit RecordFunction(
- const char* name,
- int64_t current_sequence_nr = -1,
- GetPackedInputsCallback cb = nullptr);
-
- explicit RecordFunction(
- std::string name,
- GetPackedInputsCallback cb) : RecordFunction(name, -1, cb) {}
+ // Default constructor is used with before function called afterwards
+ RecordFunction() {}
+
+ // before function initializes RecordFunction members and calls
+ // start callbacks
+ void before(const char* name, int64_t sequence_nr = -1);
+ void before(std::string name, int64_t sequence_nr = -1);
+ void before(Function* fn, int64_t sequence_nr = -1);
+
+ template<typename F>
+ void before(
+ F fn,
+ c10::ArrayRef<c10::IValue> args,
+ int64_t current_sequence_nr = -1) {
+ inputs_ = args.vec();
+ before(fn, current_sequence_nr);
+ }
- explicit RecordFunction(
- const char* name,
- GetPackedInputsCallback cb) : RecordFunction(name, -1, cb) {}
+ template<typename F>
+ void before(
+ F fn,
+ std::vector<c10::IValue>&& args,
+ int64_t current_sequence_nr = -1) {
+ inputs_ = std::move(args);
+ before(fn, current_sequence_nr);
+ }
+ // Destructor calls end callbacks
virtual ~RecordFunction();
-
inline Function* func() const {
return fn_;
}
}
const std::vector<c10::IValue>& inputs() const {
- if (inputs_cb_ && !inputs_initialized_) {
- inputs_ = inputs_cb_();
- inputs_initialized_ = true;
- }
return inputs_;
}
Function* fn_ = nullptr;
StringView name_;
int64_t sequence_nr_ = -1;
-
+ std::vector<c10::IValue> inputs_;
RecordFunction* parent_ = nullptr;
- GetPackedInputsCallback inputs_cb_ = nullptr;
- mutable bool inputs_initialized_ = false;
- // initialized lazily by inputs_cb_
- mutable std::vector<c10::IValue> inputs_;
+ bool initialized_ = false;
};
+TORCH_API bool hasCallbacks();
+TORCH_API bool needsInputs();
+
+// optional argument - function's seq_no
+#define RECORD_FUNCTION(fn, inputs, ...) \
+ torch::autograd::profiler::RecordFunction guard; \
+ if (torch::autograd::profiler::hasCallbacks()) { \
+ if (torch::autograd::profiler::needsInputs()) { \
+ guard.before(fn, inputs, ##__VA_ARGS__); \
+ } else { \
+ guard.before(fn, ##__VA_ARGS__); \
+ } \
+ }
+
// WARNING: all calls to pushCallback/popCallback are not thread safe and
// must not overlap with other code execution
using RecordFunctionCallback = std::function<void(const RecordFunction&)>;
-TORCH_API void pushCallback(RecordFunctionCallback, RecordFunctionCallback);
-TORCH_API void pushCallback(RecordFunctionCallback);
+TORCH_API void pushCallback(
+ RecordFunctionCallback start,
+ RecordFunctionCallback end = [](const RecordFunction&){},
+ bool needs_inputs = false);
TORCH_API void popCallback();
} // namespace profiler
[](const Node* node) {
const auto key = registerFusion(node);
return [key](Stack& stack) {
- autograd::profiler::RecordFunction record("FusionGroup");
+ RECORD_FUNCTION("FusionGroup", std::vector<c10::IValue>());
runFusion(key, stack);
return 0;
};
return v->uses().size() > 0;
});
return [=](Stack& stack) {
- autograd::profiler::RecordFunction record("chunk");
+ RECORD_FUNCTION("chunk", last(stack, 1));
+
at::Tensor t;
pop(stack, t);
auto result = at::chunk(t, chunks, dim);
Operator(
"aten::split(Tensor self, int[] split_sizes, int dim=0) -> Tensor[]",
[](Stack& stack) {
- autograd::profiler::RecordFunction record("split_with_sizes");
+ RECORD_FUNCTION("split_with_sizes", last(stack, 3));
+
auto result = at::split_with_sizes(
(std::move(peek(stack, 0, 3))).toTensor(),
(std::move(peek(stack, 1, 3))).toIntList()->elements(),
Operator(
"aten::size(Tensor self) -> int[]",
[](Stack& stack) {
- autograd::profiler::RecordFunction record("sizes");
+ RECORD_FUNCTION("size", last(stack, 1));
+
auto t = std::move(pop(stack)).toTensor();
pack(stack, t.sizes().vec());
return 0;
Operator(
"aten::list_with_default(int[] list, int[] defaults) -> int[]",
[](Stack& stack) {
- autograd::profiler::RecordFunction record("sizes");
+ RECORD_FUNCTION("sizes", last(stack, 2));
+
auto list = peek(stack, 0, 2).toIntListRef();
auto defaults = peek(stack, 1, 2).toIntListRef();
drop(stack, 2);