From: Kimish Patel Date: Sat, 14 Aug 2021 04:37:57 +0000 (-0700) Subject: [Pytorch Profiler] Introduce scopes to enableProfiler (#62417) X-Git-Tag: accepted/tizen/8.0/unified/20231005.095509~1016 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1b04d99f554f0d2ec2653e78ba74ef9bb8b38602;p=platform%2Fupstream%2Fpytorch.git [Pytorch Profiler] Introduce scopes to enableProfiler (#62417) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/62417 This diff adds an option to make enableProfiler enable callbacks only for certain RecordScopes. Why? Profiling has some overhead when we repeatedly execute callbacks for alls copes. On mobile side when we often have small quantized models this overhead can be large. We observed that by only profiling top level op and skipping profiling of other atend ops called within we can limit this overhead. For example, instead of profling at::conv2d -> at::convolution -> at::convolution_ and further more if ops like transpose etc. are called, skipping profiling of those. Of course this limits the visibility, but at the least this way we get a choice. Test Plan: Imported from OSS Reviewed By: ilia-cher Differential Revision: D29993659 fbshipit-source-id: 852d3ae7822f0d94dc6e507bd4019b60d488ef69 --- diff --git a/test/cpp/jit/test_misc.cpp b/test/cpp/jit/test_misc.cpp index 5ee8816..8ecedd3 100644 --- a/test/cpp/jit/test_misc.cpp +++ b/test/cpp/jit/test_misc.cpp @@ -2505,6 +2505,76 @@ TEST(RecordDebugHandles, Basic) { ASSERT_EQ(my_events, 2); } +TEST(RecordDebugHandles, ScopedCallbacks) { + // Enable the profiler in this thread + torch::autograd::profiler::prepareProfiler( + torch::autograd::profiler::ProfilerConfig( + torch::autograd::profiler::ProfilerState::KINETO, false, false), + {torch::autograd::profiler::ActivityType::CPU}); + torch::autograd::profiler::enableProfiler( + torch::autograd::profiler::ProfilerConfig( + torch::autograd::profiler::ProfilerState::KINETO, false, false), + {torch::autograd::profiler::ActivityType::CPU}); + + { + auto a = torch::rand({128, 128}); + auto b = torch::rand({128, 128}); + auto c = a + b; + } + auto profiler_results_ptr = torch::autograd::profiler::disableProfiler(); + ASSERT_TRUE(profiler_results_ptr->events().size() > 0); + + // Enable the profiler in this thread + torch::autograd::profiler::prepareProfiler( + torch::autograd::profiler::ProfilerConfig( + torch::autograd::profiler::ProfilerState::KINETO, false, false), + {torch::autograd::profiler::ActivityType::CPU}); + torch::autograd::profiler::enableProfiler( + torch::autograd::profiler::ProfilerConfig( + torch::autograd::profiler::ProfilerState::KINETO, false, false), + {torch::autograd::profiler::ActivityType::CPU}, + {at::RecordScope::USER_SCOPE}); + { + auto a = torch::rand({128, 128}); + auto b = torch::rand({128, 128}); + auto c = a + b; + } + profiler_results_ptr = torch::autograd::profiler::disableProfiler(); + ASSERT_TRUE(profiler_results_ptr->events().size() == 0); + + torch::autograd::profiler::prepareProfiler( + torch::autograd::profiler::ProfilerConfig( + torch::autograd::profiler::ProfilerState::KINETO, false, false), + {torch::autograd::profiler::ActivityType::CPU}); + torch::autograd::profiler::enableProfiler( + torch::autograd::profiler::ProfilerConfig( + torch::autograd::profiler::ProfilerState::KINETO, false, false), + {torch::autograd::profiler::ActivityType::CPU}, + {at::RecordScope::USER_SCOPE}); + { + RECORD_USER_SCOPE_WITH_DEBUG_HANDLE_AND_INPUTS("my_function", 42, {}); + auto a = torch::rand({128, 128}); + auto b = torch::rand({128, 128}); + auto c = a + b; + } + { + RECORD_USER_SCOPE_WITH_INPUTS("not_my_function", {}); + auto a = torch::rand({128, 128}); + auto b = torch::rand({128, 128}); + auto c = a + b; + } + profiler_results_ptr = torch::autograd::profiler::disableProfiler(); + const auto& kineto_events = profiler_results_ptr->events(); + for (const auto& e : kineto_events) { + if (e.name() == "my_function") { + ASSERT_EQ(e.debugHandle(), 42); + } else if (e.name() == "not_my_function") { + ASSERT_EQ(e.debugHandle(), -1); + } + } + ASSERT_TRUE(profiler_results_ptr->events().size() == 2); +} + TEST(IValueKWargsTest, Basic) { const auto text = R"( def foo(a : int, b : int, c : int = 4): diff --git a/torch/csrc/autograd/init.cpp b/torch/csrc/autograd/init.cpp index dc51241..2eacbf1 100644 --- a/torch/csrc/autograd/init.cpp +++ b/torch/csrc/autograd/init.cpp @@ -18,6 +18,7 @@ #include #include +#include struct DisableTorchDispatch { DisableTorchDispatch() : guard_(c10::DispatchKey::Python) { @@ -223,7 +224,11 @@ PyObject* THPAutograd_initExtension(PyObject* _unused, PyObject *unused) { #endif // USE_KINETO ; - m.def("_enable_profiler", enableProfiler); + m.def("_enable_profiler", + &enableProfiler, + py::arg("config"), + py::arg("activities"), + py::arg("scopes") = std::unordered_set()); m.def("_disable_profiler", disableProfiler); m.def("_prepare_profiler", prepareProfiler); diff --git a/torch/csrc/autograd/profiler_kineto.cpp b/torch/csrc/autograd/profiler_kineto.cpp index e92461a..526813d 100644 --- a/torch/csrc/autograd/profiler_kineto.cpp +++ b/torch/csrc/autograd/profiler_kineto.cpp @@ -287,7 +287,7 @@ KinetoThreadLocalState* getProfilerTLSState() { return static_cast(state); } -void pushProfilingCallbacks() { +void pushProfilingCallbacks(const std::unordered_set& scopes) { auto state_ptr = getProfilerTLSState(); TORCH_INTERNAL_ASSERT(state_ptr, "Expected profiler state set"); auto handle = at::addThreadLocalCallback(at::RecordFunctionCallback( @@ -388,7 +388,8 @@ void pushProfilingCallbacks() { } }) .needsInputs(state_ptr->config().report_input_shapes) - .needsIds(true)); + .needsIds(true) + .scopes(scopes)); state_ptr->setCallbackHandle(handle); } @@ -497,7 +498,8 @@ void prepareProfiler( void enableProfiler( const ProfilerConfig& config, - const std::set& activities) { + const std::set& activities, + const std::unordered_set& scopes) { if (config.state != ProfilerState::NVTX) { TORCH_CHECK( config.state == ProfilerState::KINETO || @@ -514,7 +516,7 @@ void enableProfiler( c10::ThreadLocalDebugInfo::_push(c10::DebugInfoKind::PROFILER_STATE, state); if (activities.count(ActivityType::CPU) || config.state == ProfilerState::NVTX) { - pushProfilingCallbacks(); + pushProfilingCallbacks(scopes); } #ifdef USE_KINETO diff --git a/torch/csrc/autograd/profiler_kineto.h b/torch/csrc/autograd/profiler_kineto.h index 8a878a0..310554a 100644 --- a/torch/csrc/autograd/profiler_kineto.h +++ b/torch/csrc/autograd/profiler_kineto.h @@ -331,7 +331,8 @@ struct TORCH_API ProfilerResult { TORCH_API void enableProfiler( const ProfilerConfig& config, - const std::set& activities); + const std::set& activities, + const std::unordered_set& scopes = {}); TORCH_API std::unique_ptr disableProfiler();