Add more logging in BaseGPUDevice::ComputeHelper for kernel completion.

author James Qin <jamesqin@google.com>

Thu, 17 May 2018 03:52:32 +0000 (20:52 -0700)

committer TensorFlower Gardener <gardener@tensorflow.org>

Thu, 17 May 2018 03:54:52 +0000 (20:54 -0700)
author James Qin <jamesqin@google.com>
Thu, 17 May 2018 03:52:32 +0000 (20:52 -0700)
committer TensorFlower Gardener <gardener@tensorflow.org>
Thu, 17 May 2018 03:54:52 +0000 (20:54 -0700)
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc

index b3deab6..48d4c52 100644 (file)
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -431,6 +431,13 @@ void BaseGPUDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
    }
  }
  
+string BaseGPUDevice::ComputeOpKernelDebugString(const OpKernel& op_kernel,
+                                                 const int& stream_id) {
+  return strings::StrCat(op_kernel.name(), " op ", op_kernel.type_string(),
+                         " on GPU ", tf_gpu_id_.value(), " stream[", stream_id,
+                         "]");
+}
+
  void BaseGPUDevice::ComputeHelper(OpKernel* op_kernel,
                                    OpKernelContext* context) {
    GPUDeviceContext* gpu_device_context = device_contexts_[0];
@@ -445,9 +452,8 @@ void BaseGPUDevice::ComputeHelper(OpKernel* op_kernel,
    const bool vlog_2 = vlog_1 && VLOG_IS_ON(2);
  
    if (vlog_1) {
-    VLOG(1) << "GpuDevice::Compute " << op_kernel->name() << " op "
-            << op_kernel->type_string() << " on GPU" << tf_gpu_id_ << " stream["
-            << stream_id << "]";
+    VLOG(1) << "GpuDevice::ComputeHelper "
+            << ComputeOpKernelDebugString(*op_kernel, stream_id);
    }
  
    const auto num_streams = streams_.size();
@@ -491,6 +497,18 @@ void BaseGPUDevice::ComputeHelper(OpKernel* op_kernel,
        // all streams.  Given that this flag is typically used for
        // debugging it makes more sense to sync all GPU activity.
        context->SetStatus(GPUUtil::SyncAll(this));
+      if (vlog_1) {
+        VLOG(1) << "GpuDevice::ComputeHelper finished "
+                << ComputeOpKernelDebugString(*op_kernel, stream_id);
+      }
+    } else if (vlog_1) {
+      VLOG(1) << "GpuDevice::ComputeHelper scheduled "
+              << ComputeOpKernelDebugString(*op_kernel, stream_id);
+    }
+  } else {
+    if (vlog_1) {
+      VLOG(1) << "GpuDevice::ComputeHelper failed to schedule "
+              << ComputeOpKernelDebugString(*op_kernel, stream_id);
      }
    }
  }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h

index 3e958a7..737a351 100644 (file)
--- a/tensorflow/core/common_runtime/gpu/gpu_device.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h
@@ -139,6 +139,9 @@ class BaseGPUDevice : public LocalDevice {
  
    void ComputeHelper(OpKernel* op_kernel, OpKernelContext* context);
  
+  string ComputeOpKernelDebugString(const OpKernel& op_kernel,
+                                    const int& stream_id);
+
    // This method returns an initialization status, in addition to
    // calling the "done" StatusCallback, if there is a failure to
    // allocate memory or if the tensor "from" is not DMA-copyable.
author	James Qin <jamesqin@google.com>
	Thu, 17 May 2018 03:52:32 +0000 (20:52 -0700)
committer	TensorFlower Gardener <gardener@tensorflow.org>
	Thu, 17 May 2018 03:54:52 +0000 (20:54 -0700)
tensorflow/core/common_runtime/gpu/gpu_device.cc		patch \| blob \| history
tensorflow/core/common_runtime/gpu/gpu_device.h		patch \| blob \| history