Remove GPU dependency from ProfileObserver (#17592)
authorMartin Schatz <mschatz@fb.com>
Mon, 4 Mar 2019 17:55:05 +0000 (09:55 -0800)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Mon, 4 Mar 2019 18:00:46 +0000 (10:00 -0800)
Summary:
Remove GPU dependency and register ProfileObserver.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/17592

Reviewed By: ezyang

Differential Revision: D14265801

Pulled By: mdschatz

fbshipit-source-id: f98c0c32653c64a8b087c58ece4f864dfbe1d4b8

caffe2/observers/CMakeLists.txt
caffe2/observers/profile_observer.cc [moved from caffe2/observers/profile_observer_gpu.cc with 69% similarity]
caffe2/observers/profile_observer.h
caffe2/python/pybind_state.cc

index 5cb7ff6..a1cbb07 100644 (file)
@@ -1,26 +1,16 @@
 if(USE_OBSERVERS)
   message(STATUS "Include Observer library")
-  set(GLOB profile_observer_files profile_observer_*.cc)
   set(Caffe2_CONTRIB_OBSERVERS_CPU_SRC
+    "${CMAKE_CURRENT_SOURCE_DIR}/profile_observer.cc"
     "${CMAKE_CURRENT_SOURCE_DIR}/time_observer.cc"
     "${CMAKE_CURRENT_SOURCE_DIR}/runcnt_observer.cc"
   )
-  set(Caffe2_CONTRIB_OBSERVERS_GPU_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/profile_observer_gpu.cc"
-  )
 
   set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} ${Caffe2_CONTRIB_OBSERVERS_CPU_SRC})
   set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
 
-  set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} ${Caffe2_CONTRIB_OBSERVERS_GPU_SRC})
-  set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} PARENT_SCOPE)
-
   # ---[ CPU test files
   file(GLOB tmp *_test.cc)
   set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
   set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} PARENT_SCOPE)
-  exclude(Caffe2_CPU_TEST_SRCS "${Caffe2_CPU_TEST_SRCS}" ${profile_observer_files})
-
-  # ---[ GPU test files
-  set(Caffe2_GPU_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/profile_observer_test.cc")
 endif()
similarity index 69%
rename from caffe2/observers/profile_observer_gpu.cc
rename to caffe2/observers/profile_observer.cc
index 5bd9b0a..2b2136b 100644 (file)
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "caffe2/core/logging.h"
 #include "profile_observer.h"
+#include "caffe2/core/logging.h"
 
 namespace caffe2 {
 
@@ -64,52 +64,11 @@ void ProfileOperatorObserver::Dump() const {
 }
 
 void ProfileOperatorObserver::Start() {
-  auto cudaOp = dynamic_cast_if_rtti<const Operator<CUDAContext>*>(subject_);
-  if (cudaOp) {
-    auto context = cudaOp->getContext();
-    int device;
-    cudaGetDevice(&device);
-
-    cudaSetDevice(context->device_id());
-    cudaEventCreate(&start_);
-    cudaEventRecord(start_, context->cuda_stream());
-
-    cudaSetDevice(device);
-
-    cudaError_t error = cudaGetLastError();
-    if (error != cudaSuccess) {
-      CAFFE_THROW("Encountered CUDA error Start: ", cudaGetErrorString(error));
-    }
-  } else {
-    start_time_ = timer_.MilliSeconds();
-  }
+  start_time_ = timer_.MilliSeconds();
 }
 
 void ProfileOperatorObserver::Stop() {
-  auto cudaOp = dynamic_cast_if_rtti<const Operator<CUDAContext>*>(subject_);
-  if (cudaOp) {
-    auto context = cudaOp->getContext();
-    int device;
-    cudaGetDevice(&device);
-
-    cudaSetDevice(context->device_id());
-    cudaEventCreate(&stop_);
-    cudaEventRecord(stop_, context->cuda_stream());
-    cudaEventSynchronize(stop_);
-    cudaEventElapsedTime(&run_time_, start_, stop_);
-    cudaEventDestroy(start_);
-    cudaEventDestroy(stop_);
-
-    cudaSetDevice(device);
-
-    cudaError_t error = cudaGetLastError();
-    if (error != cudaSuccess) {
-      CAFFE_THROW("Encountered CUDA error Stop: ", cudaGetErrorString(error));
-    }
-  } else {
-    run_time_ = timer_.MilliSeconds() - start_time_;
-  }
-
+  run_time_ = timer_.MilliSeconds() - start_time_;
   Dump();
 }
 
index ab110da..89cd83f 100644 (file)
@@ -19,7 +19,6 @@
 #include <unordered_map>
 
 #include "caffe2/core/common.h"
-#include "caffe2/core/context_gpu.h"
 #include "caffe2/core/event.h"
 #include "caffe2/core/net.h"
 #include "caffe2/core/observer.h"
@@ -45,12 +44,11 @@ class ProfileCounter {
   Timer timer_;
   float start_time_ = 0.0f;
   float run_time_ = 0.0f;
-  cudaEvent_t start_;
-  cudaEvent_t stop_;
 };
 
-class ProfileOperatorObserver : public ProfileCounter,
-                                public ObserverBase<OperatorBase> {
+class CAFFE2_API ProfileOperatorObserver final
+    : public ProfileCounter,
+      public ObserverBase<OperatorBase> {
  public:
   explicit ProfileOperatorObserver(OperatorBase* subject) = delete;
   explicit ProfileOperatorObserver(
@@ -96,9 +94,9 @@ class ProfileOperatorObserver : public ProfileCounter,
   void Stop() override;
 };
 
-class ProfileObserver final : public OperatorAttachingNetObserver<
-                                  ProfileOperatorObserver,
-                                  ProfileObserver> {
+class CAFFE2_API ProfileObserver final : public OperatorAttachingNetObserver<
+                                             ProfileOperatorObserver,
+                                             ProfileObserver> {
  public:
   explicit ProfileObserver(NetBase* subject)
       : OperatorAttachingNetObserver<ProfileOperatorObserver, ProfileObserver>(
index 0bff1e0..a4756a4 100644 (file)
@@ -13,6 +13,7 @@
 #include "caffe2/core/operator.h"
 #include "caffe2/core/stats.h"
 #include "caffe2/core/transform.h"
+#include "caffe2/observers/profile_observer.h"
 #include "caffe2/observers/runcnt_observer.h"
 #include "caffe2/observers/time_observer.h"
 #include "caffe2/onnx/backend.h"
@@ -1136,6 +1137,7 @@ void addGlobalMethods(py::module& m) {
     }                                                         \
   }
 
+        REGISTER_PYTHON_EXPOSED_OBSERVER(ProfileObserver);
         REGISTER_PYTHON_EXPOSED_OBSERVER(TimeObserver);
 #undef REGISTER_PYTHON_EXPOSED_OBSERVER