[Pytorch Edge] Enable kineto profiler on mobile via EdgeKinetoProfiler (#62419)

author Kimish Patel <kimishpatel@fb.com>

Sat, 14 Aug 2021 04:37:57 +0000 (21:37 -0700)

committer Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>

Sat, 14 Aug 2021 04:40:19 +0000 (21:40 -0700)
author Kimish Patel <kimishpatel@fb.com>
Sat, 14 Aug 2021 04:37:57 +0000 (21:37 -0700)
committer Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Sat, 14 Aug 2021 04:40:19 +0000 (21:40 -0700)
diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh

index f2c279b02056e5b6732f1fbf1ef23338ec50c70b..f6ac52aed99c44d75a7da70cd5a649e763f8a3c5 100755 (executable)
--- a/.jenkins/pytorch/build.sh
+++ b/.jenkins/pytorch/build.sh
@@ -130,6 +130,7 @@ if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
    if [[ "${BUILD_ENVIRONMENT}" == *vulkan* ]]; then
      build_args+=("-DUSE_VULKAN=ON")
    fi
+  build_args+=("-DUSE_LITE_INTERPRETER_PROFILER=OFF")
    exec ./scripts/build_android.sh "${build_args[@]}" "$@"
  fi
  
diff --git a/CMakeLists.txt b/CMakeLists.txt

index 5ee0e75a669d7f3e1fe1b979ad3c0a0ca11bc5f9..717de6e61a44e31b983243cf36646129cc0cd560 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -266,6 +266,7 @@ if(NOT DEFINED USE_VULKAN)
  endif()
  
  option(USE_SOURCE_DEBUG_ON_MOBILE "Enable " ON)
+option(USE_LITE_INTERPRETER_PROFILER "Enable " ON)
  option(USE_VULKAN_FP16_INFERENCE "Vulkan - Use fp16 inference" OFF)
  option(USE_VULKAN_RELAXED_PRECISION "Vulkan - Use relaxed precision math in the kernels (mediump)" OFF)
  option(USE_VULKAN_SHADERC_RUNTIME "Vulkan - Use runtime shader compilation as opposed to build-time (needs libshaderc)" OFF)
@@ -687,6 +688,10 @@ if(USE_SOURCE_DEBUG_ON_MOBILE)
    string(APPEND CMAKE_CXX_FLAGS " -DSYMBOLICATE_MOBILE_DEBUG_HANDLE")
  endif()
  
+if(USE_LITE_INTERPRETER_PROFILER)
+  string(APPEND CMAKE_CXX_FLAGS " -DEDGE_PROFILER_USE_KINETO")
+endif()
+
  # ---[ Allowlist file if allowlist is specified
  include(cmake/Allowlist.cmake)
  
diff --git a/android/common.sh b/android/common.sh

index 9f5768d0b47bd7ab42037bbabdfaf925d1b2e452..ab1cb5ff43c728b8010ebc82c4af736a6c2c3129 100644 (file)
--- a/android/common.sh
+++ b/android/common.sh
@@ -29,7 +29,7 @@ check_gradle() {
  }
  
  parse_abis_list() {
-  ABIS_LIST="armeabi-v7a,arm64-v8a,x86,x86_64"
+  ABIS_LIST="x86"
    CUSTOM_ABIS_LIST=false
    if [ $# -gt 0 ]; then
      ABIS_LIST=$1
@@ -59,7 +59,8 @@ build_android() {
      ANDROID_ABI="$abi" \
        BUILD_ROOT="$ANDROID_BUILD_ROOT" \
        "$PYTORCH_DIR/scripts/build_android.sh" \
-      -DANDROID_CCACHE="$(which ccache)"
+      -DANDROID_CCACHE="$(which ccache)" \
+      -DUSE_LITE_INTERPRETER_PROFILER="OFF"
  
      echo "$abi build output lib,include at $ANDROID_BUILD_ROOT/install"
      ln -s "$ANDROID_BUILD_ROOT/install/lib" "$LIB_DIR/$abi"
diff --git a/android/pytorch_android/build.gradle b/android/pytorch_android/build.gradle

index f9a7559e598f2d382bb27be71927501744bc1b1a..a65c0ffd436b216bf52750339858d1b5a0eda4a9 100644 (file)
--- a/android/pytorch_android/build.gradle
+++ b/android/pytorch_android/build.gradle
@@ -18,9 +18,9 @@ android {
          externalNativeBuild {
              cmake {
                if(System.env.BUILD_LITE_INTERPRETER == '0') {
-                arguments "-DANDROID_STL=c++_shared", "-DBUILD_LITE_INTERPRETER=OFF"
+                arguments "-DANDROID_STL=c++_shared", "-DBUILD_LITE_INTERPRETER=OFF", "-DUSE_LITE_INTERPRETER_PROFILER=OFF"
                } else {
-                arguments "-DANDROID_STL=c++_shared"
+                arguments "-DANDROID_STL=c++_shared", "-DUSE_LITE_INTERPRETER_PROFILER=OFF"
                }
              }
          }
diff --git a/aten/src/ATen/record_function.h b/aten/src/ATen/record_function.h

index 80c3ca90107415e4b73bea42ce5ee224e5ac1d5a..f73df9c27608408b54db71f331754b0b84bb8cc1 100644 (file)
--- a/aten/src/ATen/record_function.h
+++ b/aten/src/ATen/record_function.h
@@ -27,6 +27,8 @@ enum class C10_API_ENUM RecordScope : uint8_t {
    TORCHSCRIPT_FUNCTION,
    // Kernel Function dtype Tag
    KERNEL_FUNCTION_DTYPE,
+  // Kernel Function dtype Tag
+  LITE_INTERPRETER,
    // User defined scope (e.g. with record_function())
    USER_SCOPE,
    NUM_SCOPES, // must be the last in the list
@@ -502,11 +504,11 @@ class TORCH_API RecordFunctionCallback {
        }                                             \
      }
  
-// Helper macros to record user_scope events with debug handles
-#define RECORD_USER_SCOPE_WITH_DEBUG_HANDLE_AND_INPUTS(         \
-    fn, debug_handle, inputs)                                   \
-    RECORD_WITH_SCOPE_DEBUG_HANDLE_AND_INPUTS(                  \
-        at::RecordScope::USER_SCOPE, fn, debug_handle, inputs)
+// Helper macros to record LITE INTERPETER scope events with debug handles
+#define RECORD_EDGE_SCOPE_WITH_DEBUG_HANDLE_AND_INPUTS(             \
+    fn, debug_handle, inputs)                                       \
+    RECORD_WITH_SCOPE_DEBUG_HANDLE_AND_INPUTS(                      \
+        at::RecordScope::LITE_INTERPRETER, fn, debug_handle, inputs)
  
  // Notes:
  //  - two types of callbacks are provided: thread local and global
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt

index 4ab9ef23833a414f9a1e10f2abf3f5af32ee1dea..83048ce723e2b7b2a0c2004696a1c17d33d7fb5a 100644 (file)
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -485,10 +485,17 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
      endif()
    endif()
  
+  list(APPEND LITE_PROFILER_SRCS "")
+  if(USE_LITE_INTERPRETER_PROFILER)
+    append_filelist("libtorch_edge_profiler_sources " LITE_PROFILER_SRCS)
+  endif()
+
    # Switch between the full jit interpreter and lite interpreter
    if(BUILD_LITE_INTERPRETER)
      append_filelist("libtorch_lite_cmake_sources" LIBTORCH_CMAKE_SRCS)
      list(APPEND LIBTORCH_CMAKE_SRCS ${LITE_EAGER_SYMOBLICATION_SRCS})
+    list(APPEND LIBTORCH_CMAKE_SRCS ${LITE_PROFILER_SRCS})
+    set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
    else()
      append_filelist("libtorch_cmake_sources" LIBTORCH_CMAKE_SRCS)
  
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake

index b0d36893dab340bbe94bae1077b8de3f192d8447..526e39a9cda28d6c7663ff093725d57b01f63988 100644 (file)
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1568,6 +1568,11 @@ endif()
  # --[ ATen checks
  set(USE_LAPACK 0)
  
+# we need to build all targets to be linked with PIC
+if(USE_KINETO AND INTERN_BUILD_MOBILE AND USE_LITE_INTERPRETER_PROFILER)
+  set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
+endif()
+
  if(NOT INTERN_BUILD_MOBILE)
    set(TORCH_CUDA_ARCH_LIST $ENV{TORCH_CUDA_ARCH_LIST})
    set(TORCH_NVCC_FLAGS $ENV{TORCH_NVCC_FLAGS})
@@ -1876,11 +1881,17 @@ list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
  set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
  
  # ---[ Kineto
-if(USE_KINETO AND INTERN_BUILD_MOBILE)
+# edge profiler depends on KinetoProfiler but it only does cpu
+# profiling. Thus we dont need USE_CUDA/USE_ROCM
+if(USE_KINETO AND INTERN_BUILD_MOBILE AND NOT (BUILD_LITE_INTERPRETER AND USE_LITE_INTERPRETER_PROFILER))
    message(STATUS "Not using libkineto in a mobile build.")
    set(USE_KINETO OFF)
  endif()
  
+if(USE_KINETO AND INTERN_BUILD_MOBILE AND USE_LITE_INTERPRETER_PROFILER AND (USE_CUDA OR USE_ROCM))
+  message(FATAL_ERROR "Mobile build with profiler does not support CUDA or ROCM")
+endif()
+
  if(USE_KINETO)
    if((NOT USE_CUDA) OR MSVC)
      set(LIBKINETO_NOCUPTI ON CACHE STRING "" FORCE)
@@ -1956,6 +1967,7 @@ if(USE_KINETO)
  
    if(NOT TARGET kineto)
      add_subdirectory("${KINETO_SOURCE_DIR}")
+    set_property(TARGET kineto PROPERTY POSITION_INDEPENDENT_CODE ON)
    endif()
    list(APPEND Caffe2_DEPENDENCY_LIBS kineto)
    string(APPEND CMAKE_CXX_FLAGS " -DUSE_KINETO")
diff --git a/scripts/build_ios.sh b/scripts/build_ios.sh

index 6da5b16782a6a76744af2ef71b3438a8e0144e60..7e4881546dfa3ec8ad9e94dd32537171f6ee6d85 100755 (executable)
--- a/scripts/build_ios.sh
+++ b/scripts/build_ios.sh
@@ -83,6 +83,7 @@ if [ "${BUILD_LITE_INTERPRETER}" == 0 ]; then
  else
    CMAKE_ARGS+=("-DBUILD_LITE_INTERPRETER=ON")
  fi
+CMAKE_ARGS+=("-DUSE_LITE_INTERPRETER_PROFILER=OFF")
  
  # Don't build binaries or tests (only the library)
  CMAKE_ARGS+=("-DBUILD_TEST=OFF")
diff --git a/test/cpp/jit/test_backend.cpp b/test/cpp/jit/test_backend.cpp

index ef0294d2fbc4130d47ab1decff4ed06ceea17bc0..11b47a8f92122e5bbf790913317022f86a8c520c 100644 (file)
--- a/test/cpp/jit/test_backend.cpp
+++ b/test/cpp/jit/test_backend.cpp
@@ -338,16 +338,16 @@ TEST(BackendTestDebugInfo, TestCompiler) {
    lm._save_for_mobile(ss, ExtraFilesMap(), true);
    auto mlm = _load_for_mobile(ss);
    std::string error_pattern = R"(
-  Module hierarchy:top(m).aten::add
+  Module hierarchy:top(m)::<unknown>.aten::add
  Traceback of TorchScript (most recent call last):
-  File "<string>", line 5, in FunctionName_UNKNOWN
+  File "<string>", line 5, in <unknown>
                  typed_inputs: List[Any] = [x, h, ]
                  if self.__backend.is_available() :
                    _0, = self.__backend.execute(self.__handles["forward"], typed_inputs)
                          ~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
                    assert isinstance(_0, Tensor)
                    return _0
-  File "<string>", line 3, in FunctionName_UNKNOWN
+  File "<string>", line 3, in <unknown>
  
      def forward(self, x, h):
          return x + h
@@ -392,16 +392,16 @@ TEST(BackendTestDebugInfo, TestExceptionStackForCompilerWithModuleHierarchy) {
    lm._save_for_mobile(ss, ExtraFilesMap(), true);
    auto mlm = _load_for_mobile(ss);
    std::string error_pattern = R"(
-  Module hierarchy:top(C).A0(A).aten::add
+  Module hierarchy:top(C)::<unknown>.A0(A)::forward.aten::add
  Traceback of TorchScript (most recent call last):
-  File "<string>", line 5, in FunctionName_UNKNOWN
+  File "<string>", line 5, in <unknown>
                  typed_inputs: List[Any] = [x, y, ]
                  if self.__backend.is_available() :
                    _0, = self.__backend.execute(self.__handles["forward"], typed_inputs)
                          ~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
                    assert isinstance(_0, Tensor)
                    return _0
-  File "<string>", line 3, in FunctionName_UNKNOWN
+  File "<string>", line 3, in <unknown>
  
      def forward(self, x, y):
        return self.A0.forward(x, y) + self.B0.forward(x)
@@ -485,16 +485,16 @@ TEST(
     *
     */
    std::string error_pattern = R"(
-  Module hierarchy:top(C).B0(B).A0(A).aten::add
+  Module hierarchy:top(C)::<unknown>.B0(B)::forward.A0(A)::forward.aten::add
  Traceback of TorchScript (most recent call last):
-  File "<string>", line 5, in FunctionName_UNKNOWN
+  File "<string>", line 5, in <unknown>
                  typed_inputs: List[Any] = [x, y, ]
                  if self.__backend.is_available() :
                    _0, = self.__backend.execute(self.__handles["forward"], typed_inputs)
                          ~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
                    assert isinstance(_0, Tensor)
                    return _0
-  File "<string>", line 3, in FunctionName_UNKNOWN
+  File "<string>", line 3, in <unknown>
  
      def forward(self, x, y):
        return self.B0.forward(x, y) + 3
@@ -572,9 +572,9 @@ TEST(BackendTestDebugInfo, TestExceptionStackForCompilerWithLoweredSubModule) {
    c._save_for_mobile(ss, ExtraFilesMap(), true);
    auto c_loaded = _load_for_mobile(ss);
    std::string error_pattern = R"(
-  Module hierarchy:top(C).A0(A).aten::add
+  Module hierarchy:top(C)::<unknown>.A0(A)::forward.aten::add
  Traceback of TorchScript (most recent call last):
-  File "<string>", line 3, in FunctionName_UNKNOWN
+  File "<string>", line 3, in <unknown>
  
      def forward(self, x, y):
        return self.A0.forward(x, y) + self.B0.forward(x)
@@ -587,7 +587,7 @@ Traceback of TorchScript (most recent call last):
                          ~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
                    assert isinstance(_0, Tensor)
                    return _0
-  File "<string>", line 3, in FunctionName_UNKNOWN
+  File "<string>", line 3, in <unknown>
  
      def forward(self, x, y):
        return x + y
@@ -693,9 +693,9 @@ TEST(
     *
     *  */
    std::string error_pattern = R"(
-  Module hierarchy:top(C).A0(A).AA0(AA).aten::add
+  Module hierarchy:top(C)::<unknown>.A0(A)::forward.AA0(AA)::forward.aten::add
  Traceback of TorchScript (most recent call last):
-  File "<string>", line 3, in FunctionName_UNKNOWN
+  File "<string>", line 3, in <unknown>
  
      def forward(self, x, y):
        return self.A0.forward(x, y) + self.B0.forward(x)
@@ -708,7 +708,7 @@ Traceback of TorchScript (most recent call last):
                          ~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
                    assert isinstance(_0, Tensor)
                    return _0
-  File "<string>", line 3, in FunctionName_UNKNOWN
+  File "<string>", line 3, in <unknown>
  
      def forward(self, x, y):
        return self.AA0.forward(x, y) + 3
diff --git a/test/cpp/jit/test_lite_interpreter.cpp b/test/cpp/jit/test_lite_interpreter.cpp

index 93e6b40f1c34deb23bb2591067fa9a5025e84bc4..3bd2becd8779d863e2f0c97d6c6fe9290d283608 100644 (file)
--- a/test/cpp/jit/test_lite_interpreter.cpp
+++ b/test/cpp/jit/test_lite_interpreter.cpp
@@ -482,7 +482,7 @@ TEST(LiteInterpreterTest, ModuleInfoBasic) {
      }
    }
  
-  AT_ASSERT(module_debug_info_set.count("top(M).aten::mul"));
+  AT_ASSERT(module_debug_info_set.count("top(M)::<unknown>.aten::mul"));
  }
  
  TEST(LiteInterpreterTest, NotSaveModuleInfo) {
@@ -542,9 +542,11 @@ TEST(LiteInterpreterTest, OneSubmoduleModuleInfo) {
      }
    }
  
-  AT_ASSERT(module_debug_info_set.count("top(B).aten::add"));
-  AT_ASSERT(module_debug_info_set.count("top(B).A0(A).aten::add"));
-  AT_ASSERT(module_debug_info_set.count("top(B).A0(A).aten::mul"));
+  AT_ASSERT(module_debug_info_set.count("top(B)::<unknown>.aten::add"));
+  AT_ASSERT(module_debug_info_set.count(
+      "top(B)::<unknown>.A0(A)::forward.aten::add"));
+  AT_ASSERT(module_debug_info_set.count(
+      "top(B)::<unknown>.A0(A)::forward.aten::mul"));
  }
  
  TEST(LiteInterpreterTest, TwoSubmodulesModuleInfo) {
@@ -585,9 +587,11 @@ TEST(LiteInterpreterTest, TwoSubmodulesModuleInfo) {
      }
    }
  
-  AT_ASSERT(module_debug_info_set.count("top(C).aten::add"));
-  AT_ASSERT(module_debug_info_set.count("top(C).A0(A).aten::add"));
-  AT_ASSERT(module_debug_info_set.count("top(C).B0(B).aten::add"));
+  AT_ASSERT(module_debug_info_set.count("top(C)::<unknown>.aten::add"));
+  AT_ASSERT(module_debug_info_set.count(
+      "top(C)::<unknown>.A0(A)::forward.aten::add"));
+  AT_ASSERT(module_debug_info_set.count(
+      "top(C)::<unknown>.B0(B)::forward.aten::add"));
  }
  
  TEST(LiteInterpreterTest, GetRuntimeByteCodeVersion) {
@@ -854,9 +858,11 @@ TEST(LiteInterpreterTest, SequentialModuleInfo) {
    //   def forward(self, x):
    //     return self.A0.forward(self.B0.forward(x))
  
-  AT_ASSERT(module_debug_info_set.count("top(C).prim::Return"));
-  AT_ASSERT(module_debug_info_set.count("top(C).A0(A).aten::add"));
-  AT_ASSERT(module_debug_info_set.count("top(C).B0(B).aten::add"));
+  AT_ASSERT(module_debug_info_set.count("top(C)::<unknown>.prim::Return"));
+  AT_ASSERT(module_debug_info_set.count(
+      "top(C)::<unknown>.A0(A)::forward.aten::add"));
+  AT_ASSERT(module_debug_info_set.count(
+      "top(C)::<unknown>.B0(B)::forward.aten::add"));
  }
  
  TEST(LiteInterpreterTest, HierarchyModuleInfo) {
@@ -901,9 +907,11 @@ TEST(LiteInterpreterTest, HierarchyModuleInfo) {
    // "top(C).forward": for the add operator in top.
    // "top(C).B0(B).forward": for the add operator in B0.
    // "top(C).B0(B).forward.A0(A).forward": for the add operator in A0.
-  AT_ASSERT(module_debug_info_set.count("top(C).aten::add"));
-  AT_ASSERT(module_debug_info_set.count("top(C).B0(B).aten::add"));
-  AT_ASSERT(module_debug_info_set.count("top(C).B0(B).A0(A).aten::add"));
+  AT_ASSERT(module_debug_info_set.count("top(C)::<unknown>.aten::add"));
+  AT_ASSERT(module_debug_info_set.count(
+      "top(C)::<unknown>.B0(B)::forward.aten::add"));
+  AT_ASSERT(module_debug_info_set.count(
+      "top(C)::<unknown>.B0(B)::forward.A0(A)::forward.aten::add"));
  }
  
  TEST(LiteInterpreterTest, DuplicatedClassTypeModuleInfo) {
@@ -960,9 +968,11 @@ TEST(LiteInterpreterTest, DuplicatedClassTypeModuleInfo) {
    // "top(B).A0(A).forward": for the add operator in A0.
    // "top(B).A1(A).forward": for the add operator in A1.
  
-  AT_ASSERT(module_debug_info_set.count("top(B).aten::add"));
-  AT_ASSERT(module_debug_info_set.count("top(B).A0(A).aten::add"));
-  AT_ASSERT(module_debug_info_set.count("top(B).A1(A).aten::add"));
+  AT_ASSERT(module_debug_info_set.count("top(B)::<unknown>.aten::add"));
+  AT_ASSERT(module_debug_info_set.count(
+      "top(B)::<unknown>.A0(A)::forward.aten::add"));
+  AT_ASSERT(module_debug_info_set.count(
+      "top(B)::<unknown>.A1(A)::forward.aten::add"));
  }
  #endif // !defined(FB_XPLAT_BUILD)
  
@@ -1371,9 +1381,9 @@ TEST(LiteInterpreterTest, TestExceptionStackWithTwoLevelModuleHierarchy) {
    c._save_for_mobile(ss, ExtraFilesMap(), true);
    auto lite_m = _load_for_mobile(ss);
    std::string error_pattern = R"(
-  Module hierarchy:top(C).B0(B).A0(A).aten::add
+  Module hierarchy:top(C)::<unknown>.B0(B)::foo.A0(A)::bar.aten::add
  Traceback of TorchScript (most recent call last):
-  File "<string>", line 3, in FunctionName_UNKNOWN
+  File "<string>", line 3, in <unknown>
  
      def forward(self, x, y):
        return self.B0.foo(x, y) + 3
diff --git a/test/cpp/jit/test_misc.cpp b/test/cpp/jit/test_misc.cpp

index 8ecedd363f401ecc130f5e50db9f23fa209a04b8..82f70fee1dd20d9e60fed829e49b41b1e722d7e6 100644 (file)
--- a/test/cpp/jit/test_misc.cpp
+++ b/test/cpp/jit/test_misc.cpp
@@ -2481,7 +2481,7 @@ TEST(RecordDebugHandles, Basic) {
            torch::autograd::profiler::ProfilerState::KINETO, false, false),
        activities);
    {
-    RECORD_USER_SCOPE_WITH_DEBUG_HANDLE_AND_INPUTS("my_function", 42, {});
+    RECORD_EDGE_SCOPE_WITH_DEBUG_HANDLE_AND_INPUTS("my_function", 42, {});
      float x{5.9999}, y{2.1212};
      float z = x / y;
    }
@@ -2533,7 +2533,7 @@ TEST(RecordDebugHandles, ScopedCallbacks) {
        torch::autograd::profiler::ProfilerConfig(
            torch::autograd::profiler::ProfilerState::KINETO, false, false),
        {torch::autograd::profiler::ActivityType::CPU},
-      {at::RecordScope::USER_SCOPE});
+      {at::RecordScope::LITE_INTERPRETER});
    {
      auto a = torch::rand({128, 128});
      auto b = torch::rand({128, 128});
@@ -2550,9 +2550,9 @@ TEST(RecordDebugHandles, ScopedCallbacks) {
        torch::autograd::profiler::ProfilerConfig(
            torch::autograd::profiler::ProfilerState::KINETO, false, false),
        {torch::autograd::profiler::ActivityType::CPU},
-      {at::RecordScope::USER_SCOPE});
+      {at::RecordScope::LITE_INTERPRETER});
    {
-    RECORD_USER_SCOPE_WITH_DEBUG_HANDLE_AND_INPUTS("my_function", 42, {});
+    RECORD_EDGE_SCOPE_WITH_DEBUG_HANDLE_AND_INPUTS("my_function", 42, {});
      auto a = torch::rand({128, 128});
      auto b = torch::rand({128, 128});
      auto c = a + b;
@@ -2568,11 +2568,9 @@ TEST(RecordDebugHandles, ScopedCallbacks) {
    for (const auto& e : kineto_events) {
      if (e.name() == "my_function") {
        ASSERT_EQ(e.debugHandle(), 42);
-    } else if (e.name() == "not_my_function") {
-      ASSERT_EQ(e.debugHandle(), -1);
      }
    }
-  ASSERT_TRUE(profiler_results_ptr->events().size() == 2);
+  ASSERT_TRUE(profiler_results_ptr->events().size() == 1);
  }
  
  TEST(IValueKWargsTest, Basic) {
diff --git a/test/cpp/lite_interpreter_runtime/CMakeLists.txt b/test/cpp/lite_interpreter_runtime/CMakeLists.txt

index c68ea8869b4b1a0c356d05363d55dad759f18586..503203d7be080ff2b5771498552d42aeb036466f 100644 (file)
--- a/test/cpp/lite_interpreter_runtime/CMakeLists.txt
+++ b/test/cpp/lite_interpreter_runtime/CMakeLists.txt
@@ -4,6 +4,7 @@ set(
  set(LITE_INTERPRETER_RUNTIME_TEST_DIR
    ${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/main.cpp
    ${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/test_lite_interpreter_runtime.cpp
+  ${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/test_mobile_profiler.cpp
  )
  
  add_library(backend_with_compiler_runtime SHARED
diff --git a/test/cpp/lite_interpreter_runtime/test_lite_interpreter_runtime.cpp b/test/cpp/lite_interpreter_runtime/test_lite_interpreter_runtime.cpp

index 2ccf6ee18d3a81439287258cc2a18ad715b98265..1648b1e3d8197f6794440fe3757099b490df72c2 100644 (file)
--- a/test/cpp/lite_interpreter_runtime/test_lite_interpreter_runtime.cpp
+++ b/test/cpp/lite_interpreter_runtime/test_lite_interpreter_runtime.cpp
@@ -142,9 +142,9 @@ TEST(RunTimeTest, DelegateException) {
    inputs.emplace_back(torch::rand({13, 9}));
  
    std::string error_pattern = R"(
-  Module hierarchy:top(C).A0(backend_with_compiler_demoLoweredModule).AA0(AA).aten::add
+  Module hierarchy:top(C)::<unknown>.A0(backend_with_compiler_demoLoweredModule)::forward.AA0(AA)::forward.aten::add
  Traceback of TorchScript (most recent call last):
-  File "<string>", line 3, in FunctionName_UNKNOWN
+  File "<string>", line 3, in <unknown>
  
      def forward(self, x, y):
        return self.A0.forward(x, y) + self.B0.forward(x)
@@ -157,7 +157,7 @@ Traceback of TorchScript (most recent call last):
                          ~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
                    assert isinstance(_0, Tensor)
                    return _0
-  File "<string>", line 3, in FunctionName_UNKNOWN
+  File "<string>", line 3, in <unknown>
  
      def forward(self, x, y):
        return self.AA0.forward(x, y) + 3
diff --git a/test/cpp/lite_interpreter_runtime/test_mobile_profiler.cpp b/test/cpp/lite_interpreter_runtime/test_mobile_profiler.cpp

new file mode 100644 (file)

index 0000000..ee59b39
--- /dev/null
+++ b/test/cpp/lite_interpreter_runtime/test_mobile_profiler.cpp
@@ -0,0 +1,74 @@
+#include <fstream>
+#include <gtest/gtest.h>
+#include <test/cpp/jit/test_utils.h>
+#include <torch/csrc/jit/api/module.h>
+#include <torch/csrc/jit/frontend/resolver.h>
+#include <torch/csrc/jit/mobile/import.h>
+#include <torch/csrc/jit/mobile/module.h>
+#include <torch/csrc/jit/mobile/profiler_edge.h>
+
+#include <unordered_set>
+
+#ifdef EDGE_PROFILER_USE_KINETO
+namespace torch {
+namespace jit {
+namespace mobile {
+
+namespace {
+bool checkModuleHierarchyForOp(
+    const std::string& op_name,
+    const std::string& module_hier,
+    std::ifstream& trace_file) {
+  std::string line;
+  while (std::getline(trace_file, line) ) {
+    if (line.find(op_name) != std::string::npos) {
+      while (std::getline(trace_file, line) ) {
+        if (line.find("Module Hierarchy") != std::string::npos) {
+          return (line.find(module_hier) != std::string::npos);
+        }
+      }
+    }
+  }
+  return false;
+}
+} // namespace
+
+TEST(MobileProfiler, ModuleHierarchy) {
+  std::string filePath(__FILE__);
+  auto testModelFile = filePath.substr(0, filePath.find_last_of("/\\") + 1);
+  testModelFile.append("to_be_profiled_module.ptl");
+
+  std::vector<IValue> inputs;
+  inputs.emplace_back(at::rand({64, 64}));
+  inputs.emplace_back(at::rand({64, 64}));
+  std::string trace_file_name("/tmp/test_trace.trace");
+
+  mobile::Module bc = _load_for_mobile(testModelFile);
+  {
+    KinetoEdgeCPUProfiler profiler(
+        bc,
+        trace_file_name,
+        false, // record input_shapes
+        false, // profile memory
+        true, // record callstack
+        false, // record flops
+        true); // record module hierarchy
+    bc.forward(inputs);
+  } // End of profiler
+  std::ifstream trace_file(trace_file_name);
+  std::string line;
+  ASSERT_TRUE(trace_file.is_open());
+  trace_file.seekg(0, std::ios_base::beg);
+  ASSERT_TRUE(checkModuleHierarchyForOp("aten::sub", "top(C)::<unknown>.A0(A)::forward.aten::sub", trace_file));
+  trace_file.seekg(0, std::ios_base::beg);
+  ASSERT_TRUE(checkModuleHierarchyForOp("aten::mul", "top(C)::<unknown>.A0(A)::forward.SELF(A)::forward_impl_.SELF(A)::my_new_method.aten::mul", trace_file));
+  trace_file.seekg(0, std::ios_base::beg);
+  ASSERT_TRUE(checkModuleHierarchyForOp("aten::add", "top(C)::<unknown>.A0(A)::forward.SELF(A)::forward_impl_.aten::add", trace_file));
+  ASSERT_TRUE(checkModuleHierarchyForOp("aten::add", "top(C)::<unknown>.SELF(C)::call_b.B0(B)::forward.aten::add", trace_file));
+  ASSERT_TRUE(checkModuleHierarchyForOp("aten::add", "top(C)::<unknown>.aten::add", trace_file));
+}
+
+} // namespace mobile
+} // namespace jit
+} // namespace torch
+#endif
diff --git a/test/cpp/lite_interpreter_runtime/to_be_profiled_module.ptl b/test/cpp/lite_interpreter_runtime/to_be_profiled_module.ptl

new file mode 100644 (file)

index 0000000..243e41e

Binary files /dev/null and b/test/cpp/lite_interpreter_runtime/to_be_profiled_module.ptl differ
diff --git a/tools/build_variables.bzl b/tools/build_variables.bzl

index e7958ccba3bf328ae68ed0e4ca585d118bac8d9d..bfcf55adc522bdba8944bcce1ff9ad390cd40338 100644 (file)
--- a/tools/build_variables.bzl
+++ b/tools/build_variables.bzl
@@ -119,6 +119,10 @@ libtorch_profiler_sources = [
      "torch/csrc/autograd/profiler_kineto.cpp",
  ]
  
+libtorch_edge_profiler_sources = libtorch_profiler_sources + [
+    "torch/csrc/jit/mobile/profiler_edge.cpp",
+]
+
  core_trainer_sources = [
      "torch/csrc/autograd/anomaly_mode.cpp",
      "torch/csrc/autograd/autograd.cpp",
diff --git a/torch/csrc/autograd/profiler_kineto.cpp b/torch/csrc/autograd/profiler_kineto.cpp

index 526813d4fbde24dea8a08160e9f7f7a9066f45c7..da1ae6f14823f5f7afc2d2a60f50944c87d3e180 100644 (file)
--- a/torch/csrc/autograd/profiler_kineto.cpp
+++ b/torch/csrc/autograd/profiler_kineto.cpp
@@ -172,6 +172,14 @@ struct KinetoThreadLocalState : public ProfilerThreadLocalState {
      }
    }
  
+  const std::function<void(std::vector<KinetoEvent>&)>& getEventPostProcessingCallback() const {
+    return event_post_process_cb_;
+  }
+
+  void setEventPostProcessingCallback(std::function<void(std::vector<KinetoEvent>&)>&& cb) {
+    event_post_process_cb_ = std::move(cb);
+  }
+
  #ifdef USE_KINETO
    c10::DeviceType deviceTypeFromActivity(libkineto::ActivityType activity_type) {
      // fallthrough
@@ -258,6 +266,8 @@ struct KinetoThreadLocalState : public ProfilerThreadLocalState {
  #endif // USE_KINETO
    uint64_t start_time_;
    std::vector<KinetoEvent> kineto_events_;
+  // Optional, if event post-processing is enabled.
+  std::function<void(std::vector<KinetoEvent>&)> event_post_process_cb_;
  };
  
  std::vector<std::string> inputTypes(const at::RecordFunction& fn) {
@@ -496,6 +506,16 @@ void prepareProfiler(
  #endif // USE_KINETO
  }
  
+void enableProfilerWithEventPostProcess(
+    const ProfilerConfig& config,
+    const std::set<ActivityType>& activities,
+    std::function<void(std::vector<KinetoEvent>&)>&& cb,
+    const std::unordered_set<at::RecordScope>& scopes) {
+  enableProfiler(config, activities, scopes);
+  auto state_ptr = getProfilerTLSState();
+  state_ptr->setEventPostProcessingCallback(std::move(cb));
+}
+
  void enableProfiler(
      const ProfilerConfig& config,
      const std::set<ActivityType>& activities,
@@ -548,6 +568,11 @@ std::unique_ptr<ProfilerResult> disableProfiler() {
  
  #ifdef USE_KINETO
    state_ptr->cpu_trace->span.endTime = getTimeUs();
+
+  // Call events post processing callback before finalizing trace, if there is one.
+  if (state_ptr->getEventPostProcessingCallback()) {
+    state_ptr->getEventPostProcessingCallback()(state_ptr->kineto_events_);
+  }
    state_ptr->finalizeCPUTrace();
    libkineto::api().activityProfiler().transferCpuTrace(std::move(state_ptr->cpu_trace));
  
diff --git a/torch/csrc/autograd/profiler_kineto.h b/torch/csrc/autograd/profiler_kineto.h

index 310554a7ab642b19f86fec8342dafc0c03bc6d9b..21bc318ed808a003732aa68a46b1f68b2ec99a12 100644 (file)
--- a/torch/csrc/autograd/profiler_kineto.h
+++ b/torch/csrc/autograd/profiler_kineto.h
@@ -5,7 +5,11 @@
  
  #ifdef USE_KINETO
  // skip Kineto dependency on mobile
-#ifdef C10_MOBILE
+// unless explicitly asked for.
+// When is it explicitly asked for?
+// KinetoEdgeCPUProfiler uses KinetoProfiler for cpu
+// event profiling. This has dependency on cpu only libkineto
+#if defined(C10_MOBILE) && !defined(EDGE_PROFILER_USE_KINETO)
  #undef USE_KINETO
  #endif
  #endif
@@ -334,6 +338,28 @@ TORCH_API void enableProfiler(
      const std::set<ActivityType>& activities,
      const std::unordered_set<at::RecordScope>& scopes = {});
  
+/*
+ * Same as enableProfiler but with callback to do post-processing of
+ * KinetoEvents.
+ * enableProfilerWithEventPostProcess enables profiler to capture
+ * specified activities, with specified RecordFunction scope, if any.
+ * Additionally, it takes a functor that does in-place post processing of
+ * events, e.g. populate stack trace or module hierarchy information lazily
+ * using debug_handle.
+ * Example usage is with lite interpreter that has recording scope of LITE_INTERPRETER.
+ * In this case lite interpreter runtime, records debug handles in RecordFunction, along
+ * with other information. Debug handles are eventually passed down to KinetoEvent and
+ * recorded as part of the event. KinetoEdgeCPUProfiler,
+ * in torch/csrc/jit/mobile/profiler_edge.cpp, enables profiler using post-processing
+ * callback, via enableProfilerWithEventPostProcess, that takes these debug handles
+ * and generates stack trace and module hierarchy information, once profiling is done.
+ */
+TORCH_API void enableProfilerWithEventPostProcess(
+    const ProfilerConfig& config,
+    const std::set<ActivityType>& activities,
+    std::function<void(std::vector<KinetoEvent>&)>&& cb,
+    const std::unordered_set<at::RecordScope>& scopes = {});
+
  TORCH_API std::unique_ptr<ProfilerResult> disableProfiler();
  
  TORCH_API void prepareProfiler(
diff --git a/torch/csrc/jit/mobile/debug_info.cpp b/torch/csrc/jit/mobile/debug_info.cpp

index d0d8ada3fdb04b6a770eb89a4e1fb0d557e18b55..9c734f40a25a2b3f3c41ab2f336f58da00d07f22 100644 (file)
--- a/torch/csrc/jit/mobile/debug_info.cpp
+++ b/torch/csrc/jit/mobile/debug_info.cpp
@@ -44,11 +44,9 @@ std::pair<std::vector<StackEntry>, std::string> getStackTraceWithModuleHierarchy
            module_info.append(".").append(module_instance_info.instance_name());
          }
        } else {
-        module_info += ".UNKNOWN_INSTANCE(UNKNOWN_TYPE)";
+        module_info.append(".UNKNOWN_INSTANCE(UNKNOWN_TYPE)");
        }
        // Now add source range info to stack
-      // When we serialize function names, those can be added here.
-      // TODO: Add function name separately
        entries.emplace_back(
            StackEntry{prev_function_name, callstack_ptr->source_range()});
        if (callstack_ptr->function()) {
@@ -56,6 +54,11 @@ std::pair<std::vector<StackEntry>, std::string> getStackTraceWithModuleHierarchy
        } else {
          prev_function_name = callstack_ptr->function_name();
        }
+      // Function name appended here
+      // It is renamed to prev_function_name because for StackEntry
+      // it will be appended in the next iteration. This is the format
+      // in which format_stack_trace expects function names.
+      module_info.append("::").append(prev_function_name);
  
        if (callstack_ptr->callee()) {
          callstack_ptr = callstack_ptr->callee().value();
@@ -82,20 +85,21 @@ std::pair<std::string, std::string> getStackTraceWithModuleHierarchy(
    std::vector<StackEntry> stack_entries;
    std::string module_info =
        root_scope_string + "(" + top_module_type_name + ")";
-  std::string caller_fn_name = "FunctionName_UNKNOWN";
+  std::string caller_fn_name = "<unknown>";
+  module_info.append("::").append(caller_fn_name);
    for (const auto& debug_info : source_callstacks) {
      auto debug_info_pair =
          getStackTraceWithModuleHierarchy(debug_info, caller_fn_name);
      auto entries = std::move(debug_info_pair.first);
      stack_entries.insert(stack_entries.end(), entries.begin(), entries.end());
-    module_info += debug_info_pair.second;
+    module_info.append(debug_info_pair.second);
    }
    // Only last entry in the callstack will have a node name of interest.
    // Rest are likely CallMethod/CallFunction nodes
    auto last_entry = source_callstacks.back();
    const std::string& node_name =
        std::get<kDebugInfoTupleNodeNameIndex>(last_entry);
-  module_info += "." + node_name;
+  module_info.append(".").append(node_name);
    std::ostringstream ss;
    ss << "Module hierarchy:" << module_info << "\n";
    format_stack_trace(ss, stack_entries);
diff --git a/torch/csrc/jit/mobile/interpreter.cpp b/torch/csrc/jit/mobile/interpreter.cpp

index d82d84e7bfe2e46acb32f920f183db35d2ccfd6c..275b84beba97ba027a550b2fe5e7f0f95ce11ce8 100644 (file)
--- a/torch/csrc/jit/mobile/interpreter.cpp
+++ b/torch/csrc/jit/mobile/interpreter.cpp
@@ -54,7 +54,9 @@ bool InterpreterState::run(Stack& stack) {
    size_t pc = 0;
    while (true) {
      try {
-      Instruction inst = code_->instructions_with_handles_[pc].instruction;
+      auto inst_with_handle = code_->instructions_with_handles_.at(pc);
+      Instruction inst = inst_with_handle.instruction;
+      DebugHandle debug_handle = inst_with_handle.debug_handle;
  
        //    std::cout << "RUNNING " << pc << " " << code_->instructions_[pc];
        //    if (inst.op == OP) {
@@ -64,6 +66,17 @@ bool InterpreterState::run(Stack& stack) {
        //      }
        //    }
        //    std::cout << std::endl;
+
+      // TODO(iliacher): remove the workaround after RecordFunction is in
+      // Dispatcher
+      // Check with iliacher if has been done.
+      // Plus this is not safe as if you throw exception record function will be
+      // left enabled. That is a TODO
+      bool prev_value = isRecordFunctionEnabled();
+      if (!prev_value) {
+        // enable only for the RecordFunction
+        enableRecordFunction(true);
+      }
        switch (inst.op) {
          case OP: {
            if (at::hasGlobalCallbacks()) {
@@ -74,22 +87,15 @@ bool InterpreterState::run(Stack& stack) {
              }
            }
  
-          // TODO(iliacher): remove the workaround after RecordFunction is in
-          // Dispatcher
-          bool prev_value = isRecordFunctionEnabled();
-          if (!prev_value) {
-            // enable only for the RecordFunction
-            enableRecordFunction(true);
-          }
-          RECORD_USER_SCOPE_WITH_INPUTS(code_->op_names_[inst.X].name, stack);
-          if (!prev_value) {
-            enableRecordFunction(false);
-          }
+          RECORD_EDGE_SCOPE_WITH_DEBUG_HANDLE_AND_INPUTS(
+              code_->op_names_[inst.X].name, debug_handle, stack);
            code_->operators_[inst.X](stack);
            ++pc;
          } break;
          case OPN: {
            stack.push_back(inst.N);
+          RECORD_EDGE_SCOPE_WITH_DEBUG_HANDLE_AND_INPUTS(
+              code_->op_names_[inst.X].name, debug_handle, stack);
            code_->operators_[inst.X](stack);
            ++pc;
          } break;
@@ -99,6 +105,8 @@ bool InterpreterState::run(Stack& stack) {
                    .toObject()
                    ->type()
                    ->getMethod(code_->constants_[inst.X].toStringRef());
+          RECORD_EDGE_SCOPE_WITH_DEBUG_HANDLE_AND_INPUTS(
+              method.name(), debug_handle, stack);
            method.run(stack);
            ++pc;
          } break;
@@ -232,6 +240,10 @@ bool InterpreterState::run(Stack& stack) {
          default:
            AT_ERROR(toString(inst.op), " is invalid.");
        }
+
+      if (!prev_value) {
+        enableRecordFunction(false);
+      }
        // This exception must be caught first as it derived from c10::Error
      } catch (c10::BackendRuntimeException& e) {
        exception_pc_ = pc;
diff --git a/torch/csrc/jit/mobile/module.cpp b/torch/csrc/jit/mobile/module.cpp

index fad6447679bcd7aa688dffc1e133a73ad621f5c9..c04d9f74b7378177a0864b0b3e46fc9306dd0571 100644 (file)
--- a/torch/csrc/jit/mobile/module.cpp
+++ b/torch/csrc/jit/mobile/module.cpp
@@ -122,6 +122,24 @@ const std::map<std::string, at::Tensor> Module::named_parameters() const {
    return params;
  }
  
+std::string Module::getModuleHierarchy(const int64_t debug_handle) const {
+#if defined(SYMBOLICATE_MOBILE_DEBUG_HANDLE)
+  return getDebugTable().getModuleHierarchyInfo(
+      debug_handle, getTopModuleTypeName(*this));
+#else
+  return "";
+#endif
+}
+
+std::string Module::getCallStack(const int64_t debug_handle) const {
+#if defined(SYMBOLICATE_MOBILE_DEBUG_HANDLE)
+  return getDebugTable().getSourceDebugString(
+      debug_handle, getTopModuleTypeName(*this));
+#else
+  return "";
+#endif
+}
+
  // We will continue to support this API for now as this is being relied upon
  // for profiling.
  // We really need to change this part, so in the next step for profiling support
diff --git a/torch/csrc/jit/mobile/module.h b/torch/csrc/jit/mobile/module.h

index 8a37b8badf51c1e8577339faa00ed8ba8460c9c1..73637aa4584a0b843c17865ff6b3bc967dff21d7 100644 (file)
--- a/torch/csrc/jit/mobile/module.h
+++ b/torch/csrc/jit/mobile/module.h
@@ -79,6 +79,8 @@ class TORCH_API Module {
    const std::vector<at::Tensor> parameters() const;
    const std::map<std::string, at::Tensor> named_parameters() const;
    std::string get_forward_method_debug_info(size_t pc) const;
+  std::string getModuleHierarchy(const int64_t debug_handle) const;
+  std::string getCallStack(const int64_t debug_handle) const;
    /// Enables "training" mode.
    void train(bool on = true);
    /// Calls train(false) to enable "eval" mode.
diff --git a/torch/csrc/jit/mobile/profiler_edge.cpp b/torch/csrc/jit/mobile/profiler_edge.cpp

new file mode 100644 (file)

index 0000000..bcd5a62
--- /dev/null
+++ b/torch/csrc/jit/mobile/profiler_edge.cpp
@@ -0,0 +1,63 @@
+#include <torch/csrc/jit/mobile/profiler_edge.h>
+#include <string>
+#include <vector>
+
+namespace profiler = torch::autograd::profiler;
+namespace torch {
+namespace jit {
+namespace mobile {
+
+KinetoEdgeCPUProfiler::KinetoEdgeCPUProfiler(
+    const torch::jit::mobile::Module& m,
+    const std::string& fname,
+    const bool report_input_shapes,
+    const bool profile_memory,
+    const bool with_stack,
+    const bool with_flops,
+    const bool with_modules)
+    : m_(m), trace_file_name_(fname) {
+  profiler::ProfilerConfig config(
+      profiler::ProfilerState::KINETO,
+      report_input_shapes,
+      profile_memory,
+      with_stack,
+      with_flops,
+      with_modules);
+  profiler::prepareProfiler(config, {profiler::ActivityType::CPU});
+  if (with_modules || with_stack) {
+    auto post_processing = [this, with_stack, with_modules](
+                               std::vector<profiler::KinetoEvent>& events) {
+      for (auto& e : events) {
+        if (with_modules) {
+          // Since KinetoEvents's module hierarchy takes vector of strings we
+          // just construct a temporary vector using one string element
+          e.moduleHierarchy(std::vector<std::string>(
+              {this->m_.getModuleHierarchy(e.debugHandle())}));
+        } else if (with_stack) {
+          // Since KinetoEvents's stack trace takes vector of strings we just
+          // construct a temporary vector using one string element
+          e.stack(std::vector<std::string>(
+              {this->m_.getCallStack(e.debugHandle())}));
+        }
+      }
+    };
+    profiler::enableProfilerWithEventPostProcess(
+        config,
+        {profiler::ActivityType::CPU},
+        post_processing,
+        {at::RecordScope::LITE_INTERPRETER});
+  } else {
+    profiler::enableProfiler(
+        config,
+        {profiler::ActivityType::CPU},
+        {at::RecordScope::LITE_INTERPRETER});
+  }
+  trace_file_name_ = fname;
+}
+
+KinetoEdgeCPUProfiler::~KinetoEdgeCPUProfiler() {
+  profiler::disableProfiler()->save(trace_file_name_);
+}
+} // namespace mobile
+} // namespace jit
+} // namespace torch
diff --git a/torch/csrc/jit/mobile/profiler_edge.h b/torch/csrc/jit/mobile/profiler_edge.h

new file mode 100644 (file)

index 0000000..a245034
--- /dev/null
+++ b/torch/csrc/jit/mobile/profiler_edge.h
@@ -0,0 +1,68 @@
+#pragma once
+#include <torch/csrc/autograd/profiler_kineto.h>
+#include <torch/csrc/jit/mobile/module.h>
+
+namespace torch {
+namespace jit {
+namespace mobile {
+class TORCH_API KinetoEdgeCPUProfiler {
+ public:
+  // This profiler only profiles KINETO events
+  // No GPU_FALLBACK or NVTX
+  /*
+   * @param m is the instance of mobile Module which is being profiled.
+   *        Note that this implies that KinetoEdgeCPUProfiler can be used
+   *        to profile specific Module (see usage below), unliked ProfilerKineto
+   *        which can profile pytorch runtime in arbitrary scope.
+   * @param fname is the name of the file to which chrome trace is written.
+   * @param report_input_shapes: whether to record shapes of op's inputs.
+   * @param with_stack: whether to record model's python stacktrace for the op.
+   * @param with_flops: whether to report flops corresponding to the op.
+   * @param with_modules: whether to report original python module
+   *        hierarchy to which the op belongs.
+   *
+   * Usage pattern for this profiler must be as follows:
+   *
+   * {
+   *   KinetoEdgeCPUProfiler(m, filename, args);
+   *   m.forward(...);
+   * }
+   *
+   * The reason being that KinetoEdgeCPUProfiler has a dependency on Module
+   * and thus it must not outlive it.
+   *
+   * Thus, when KinetoEdgeCPUProfiler is used as RAII to do profiling
+   * within certain scope. In that scope, the captured reference to
+   * Module will outlive KinetoEdgeCPUProfiler. This is gauranteed because
+   * KinetoEdgeCPUProfiler must be constructed later than Module, on stack.
+   *
+   * An example of the anti-pattern and wrong usage is:
+   *
+   * std::shared_ptr<KinetoMobileCPUProfiler> profiler(m, filename, args);
+   * m.forward(...);
+   *
+   * Since KinetoEdgeCPUProfiler object would then be constructed on heap
+   * with its lifetime managed manually or via smart pointers.
+   */
+  KinetoEdgeCPUProfiler(
+      const torch::jit::mobile::Module& m,
+      const std::string& fname,
+      const bool report_input_shapes = false,
+      const bool profile_memory = false,
+      const bool with_stack = false,
+      const bool with_flops = false,
+      const bool with_modules = false);
+
+  ~KinetoEdgeCPUProfiler();
+
+ private:
+  /*
+   * We store a reference to Module to make such dependency explicit, since
+   * a Module reference is already stored in a functor.
+   */
+  const mobile::Module& m_;
+  std::string trace_file_name_;
+};
+} // namespace mobile
+} // namespace jit
+} // namespace torch
author	Kimish Patel <kimishpatel@fb.com>
	Sat, 14 Aug 2021 04:37:57 +0000 (21:37 -0700)
committer	Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
	Sat, 14 Aug 2021 04:40:19 +0000 (21:40 -0700)
.jenkins/pytorch/build.sh		patch \| blob \| history
CMakeLists.txt		patch \| blob \| history
android/common.sh		patch \| blob \| history
android/pytorch_android/build.gradle		patch \| blob \| history
aten/src/ATen/record_function.h		patch \| blob \| history
caffe2/CMakeLists.txt		patch \| blob \| history
cmake/Dependencies.cmake		patch \| blob \| history
scripts/build_ios.sh		patch \| blob \| history
test/cpp/jit/test_backend.cpp		patch \| blob \| history
test/cpp/jit/test_lite_interpreter.cpp		patch \| blob \| history
test/cpp/jit/test_misc.cpp		patch \| blob \| history
test/cpp/lite_interpreter_runtime/CMakeLists.txt		patch \| blob \| history
test/cpp/lite_interpreter_runtime/test_lite_interpreter_runtime.cpp		patch \| blob \| history
test/cpp/lite_interpreter_runtime/test_mobile_profiler.cpp	[new file with mode: 0644]	patch \| blob
test/cpp/lite_interpreter_runtime/to_be_profiled_module.ptl	[new file with mode: 0644]	patch \| blob
tools/build_variables.bzl		patch \| blob \| history
torch/csrc/autograd/profiler_kineto.cpp		patch \| blob \| history
torch/csrc/autograd/profiler_kineto.h		patch \| blob \| history
torch/csrc/jit/mobile/debug_info.cpp		patch \| blob \| history
torch/csrc/jit/mobile/interpreter.cpp		patch \| blob \| history
torch/csrc/jit/mobile/module.cpp		patch \| blob \| history
torch/csrc/jit/mobile/module.h		patch \| blob \| history
torch/csrc/jit/mobile/profiler_edge.cpp	[new file with mode: 0644]	patch \| blob
torch/csrc/jit/mobile/profiler_edge.h	[new file with mode: 0644]	patch \| blob