From deca317a9c8b4567cccc3270fc63065dbbe23c69 Mon Sep 17 00:00:00 2001 From: James Qin Date: Wed, 16 May 2018 20:52:32 -0700 Subject: [PATCH] Add more logging in BaseGPUDevice::ComputeHelper for kernel completion. PiperOrigin-RevId: 196933479 --- tensorflow/core/common_runtime/gpu/gpu_device.cc | 24 +++++++++++++++++++++--- tensorflow/core/common_runtime/gpu/gpu_device.h | 3 +++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index b3deab6..48d4c52 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -431,6 +431,13 @@ void BaseGPUDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) { } } +string BaseGPUDevice::ComputeOpKernelDebugString(const OpKernel& op_kernel, + const int& stream_id) { + return strings::StrCat(op_kernel.name(), " op ", op_kernel.type_string(), + " on GPU ", tf_gpu_id_.value(), " stream[", stream_id, + "]"); +} + void BaseGPUDevice::ComputeHelper(OpKernel* op_kernel, OpKernelContext* context) { GPUDeviceContext* gpu_device_context = device_contexts_[0]; @@ -445,9 +452,8 @@ void BaseGPUDevice::ComputeHelper(OpKernel* op_kernel, const bool vlog_2 = vlog_1 && VLOG_IS_ON(2); if (vlog_1) { - VLOG(1) << "GpuDevice::Compute " << op_kernel->name() << " op " - << op_kernel->type_string() << " on GPU" << tf_gpu_id_ << " stream[" - << stream_id << "]"; + VLOG(1) << "GpuDevice::ComputeHelper " + << ComputeOpKernelDebugString(*op_kernel, stream_id); } const auto num_streams = streams_.size(); @@ -491,6 +497,18 @@ void BaseGPUDevice::ComputeHelper(OpKernel* op_kernel, // all streams. Given that this flag is typically used for // debugging it makes more sense to sync all GPU activity. context->SetStatus(GPUUtil::SyncAll(this)); + if (vlog_1) { + VLOG(1) << "GpuDevice::ComputeHelper finished " + << ComputeOpKernelDebugString(*op_kernel, stream_id); + } + } else if (vlog_1) { + VLOG(1) << "GpuDevice::ComputeHelper scheduled " + << ComputeOpKernelDebugString(*op_kernel, stream_id); + } + } else { + if (vlog_1) { + VLOG(1) << "GpuDevice::ComputeHelper failed to schedule " + << ComputeOpKernelDebugString(*op_kernel, stream_id); } } } diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index 3e958a7..737a351 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -139,6 +139,9 @@ class BaseGPUDevice : public LocalDevice { void ComputeHelper(OpKernel* op_kernel, OpKernelContext* context); + string ComputeOpKernelDebugString(const OpKernel& op_kernel, + const int& stream_id); + // This method returns an initialization status, in addition to // calling the "done" StatusCallback, if there is a failure to // allocate memory or if the tensor "from" is not DMA-copyable. -- 2.7.4