From ae209397b1733f31e8fa260722aaee49cf3f0f4b Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6@vols.utk.edu>
Date: Fri, 4 Sep 2020 15:03:49 -0400
Subject: [PATCH] [OpenMP] Begin Printing Information Dumps In Libomptarget and
 Plugins

Summary:
This patch starts adding support for adding information dumps to libomptarget
and rtl plugins. The information printing is controlled by the
LIBOMPTARGET_INFO environment variable introduced in D86483. The goal of this
patch is to provide the user with additional information about the device
during kernel execution and providing the user with information dumps in the
case of failure. This patch added the ability to dump the pointer mapping table
as well as printing the number of blocks and threads in the cuda RTL.

Reviewers: jdoerfort gkistanova	ye-luo

Subscribers: guansong openmp-commits sstefan1 yaxunl ye-luo

Tags: #OpenMP

Differential Revision: https://reviews.llvm.org/D87165
---
 openmp/libomptarget/include/Debug.h          | 25 ++++++++++++++++++++++-
 openmp/libomptarget/plugins/cuda/src/rtl.cpp | 30 ++++++++++++++++++----------
 openmp/libomptarget/src/interface.cpp        | 24 +++++++++++++++++++---
 openmp/libomptarget/test/offloading/info.c   | 15 ++++++++++++++
 4 files changed, 79 insertions(+), 15 deletions(-)
 create mode 100644 openmp/libomptarget/test/offloading/info.c

diff --git a/openmp/libomptarget/include/Debug.h b/openmp/libomptarget/include/Debug.h
index b7092dd..4f42794 100644
--- a/openmp/libomptarget/include/Debug.h
+++ b/openmp/libomptarget/include/Debug.h
@@ -70,23 +70,26 @@ static inline int getDebugLevel() {
 #define GETNAME2(name) #name
 #define GETNAME(name) GETNAME2(name)
 
-// Messaging interface
+/// Print a generic message string from libomptarget or a plugin RTL
 #define MESSAGE0(_str)                                                         \
   do {                                                                         \
     fprintf(stderr, GETNAME(TARGET_NAME) " message: %s\n", _str);              \
   } while (0)
 
+/// Print a printf formatting string message from libomptarget or a plugin RTL
 #define MESSAGE(_str, ...)                                                     \
   do {                                                                         \
     fprintf(stderr, GETNAME(TARGET_NAME) " message: " _str "\n", __VA_ARGS__); \
   } while (0)
 
+/// Print fatal error message with an error string and error identifier
 #define FATAL_MESSAGE0(_num, _str)                                             \
   do {                                                                         \
     fprintf(stderr, GETNAME(TARGET_NAME) " fatal error %d: %s\n", _num, _str); \
     abort();                                                                   \
   } while (0)
 
+/// Print fatal error message with a printf string and error identifier
 #define FATAL_MESSAGE(_num, _str, ...)                                         \
   do {                                                                         \
     fprintf(stderr, GETNAME(TARGET_NAME) " fatal error %d:" _str "\n", _num,   \
@@ -94,12 +97,20 @@ static inline int getDebugLevel() {
     abort();                                                                   \
   } while (0)
 
+/// Print a generic error string from libomptarget or a plugin RTL
 #define FAILURE_MESSAGE(...)                                                   \
   do {                                                                         \
     fprintf(stderr, GETNAME(TARGET_NAME) " error: ");                          \
     fprintf(stderr, __VA_ARGS__);                                              \
   } while (0)
 
+/// Print a generic information string used if LIBOMPTARGET_INFO=1
+#define INFO_MESSAGE(_num, ...)                                                \
+  do {                                                                         \
+    fprintf(stderr, GETNAME(TARGET_NAME) " device %d info: ", _num);           \
+    fprintf(stderr, __VA_ARGS__);                                              \
+  } while (0)
+
 // Debugging messages
 #ifdef OMPTARGET_DEBUG
 #include <stdio.h>
@@ -110,6 +121,7 @@ static inline int getDebugLevel() {
     fprintf(stderr, __VA_ARGS__);                                              \
   }
 
+/// Emit a message for debugging
 #define DP(...)                                                                \
   do {                                                                         \
     if (getDebugLevel() > 0) {                                                 \
@@ -117,6 +129,7 @@ static inline int getDebugLevel() {
     }                                                                          \
   } while (false)
 
+/// Emit a message for debugging or failure if debugging is disabled
 #define REPORT(...)                                                            \
   do {                                                                         \
     if (getDebugLevel() > 0) {                                                 \
@@ -133,4 +146,14 @@ static inline int getDebugLevel() {
 #define REPORT(...) FAILURE_MESSAGE(__VA_ARGS__);
 #endif // OMPTARGET_DEBUG
 
+/// Emit a message giving the user extra information about the runtime if
+#define INFO(_id, ...)                                                         \
+  do {                                                                         \
+    if (getDebugLevel() > 0) {                                                 \
+      DEBUGP(DEBUG_PREFIX, __VA_ARGS__);                                       \
+    } else if (getInfoLevel() > 0) {                                           \
+      INFO_MESSAGE(_id, __VA_ARGS__);                                          \
+    }                                                                          \
+  } while (false)
+
 #endif // _OMPTARGET_DEBUG_H
diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index 2675f83..1a0bffb 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -29,7 +29,7 @@
 #ifdef OMPTARGET_DEBUG
 #define CUDA_ERR_STRING(err)                                                   \
   do {                                                                         \
-    if (getDebugLevel() > 0) {                                                      \
+    if (getDebugLevel() > 0) {                                                 \
       const char *errStr;                                                      \
       cuGetErrorString(err, &errStr);                                          \
       DP("CUDA error is: %s\n", errStr);                                       \
@@ -277,14 +277,15 @@ class DeviceRTLTy {
     E.Entries.push_back(entry);
   }
 
-  // Return true if the entry is associated with device
-  bool findOffloadEntry(const int DeviceId, const void *Addr) const {
+  // Return a pointer to the entry associated with the pointer
+  const __tgt_offload_entry *getOffloadEntry(const int DeviceId,
+                                             const void *Addr) const {
     for (const __tgt_offload_entry &Itr :
          DeviceData[DeviceId].FuncGblEntries.back().Entries)
       if (Itr.addr == Addr)
-        return true;
+        return &Itr;
 
-    return false;
+    return nullptr;
   }
 
   // Return the pointer to the target entries table
@@ -492,9 +493,11 @@ public:
       DeviceData[DeviceId].BlocksPerGrid = EnvTeamLimit;
     }
 
-    DP("Max number of CUDA blocks %d, threads %d & warp size %d\n",
-       DeviceData[DeviceId].BlocksPerGrid, DeviceData[DeviceId].ThreadsPerBlock,
-       DeviceData[DeviceId].WarpSize);
+    INFO(DeviceId,
+         "Device supports up to %d CUDA blocks and %d threads with a "
+         "warp size of %d\n",
+         DeviceData[DeviceId].BlocksPerGrid,
+         DeviceData[DeviceId].ThreadsPerBlock, DeviceData[DeviceId].WarpSize);
 
     // Set default number of teams
     if (EnvNumTeams > 0) {
@@ -926,9 +929,14 @@ public:
       CudaBlocksPerGrid = TeamNum;
     }
 
-    // Run on the device.
-    DP("Launch kernel with %d blocks and %d threads\n", CudaBlocksPerGrid,
-       CudaThreadsPerBlock);
+    INFO(DeviceId,
+         "Launching kernel %s with %d blocks and %d threads in %s "
+         "mode\n",
+         (getOffloadEntry(DeviceId, TgtEntryPtr))
+             ? getOffloadEntry(DeviceId, TgtEntryPtr)->name
+             : "(null)",
+         CudaBlocksPerGrid, CudaThreadsPerBlock,
+         (KernelInfo->ExecutionMode == SPMD) ? "SPMD" : "Generic");
 
     CUstream Stream = getStream(DeviceId, AsyncInfo);
     Err = cuLaunchKernel(KernelInfo->Func, CudaBlocksPerGrid, /* gridDimY */ 1,
diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index d22e597..084f2ac 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -16,6 +16,7 @@
 #include "rtl.h"
 
 #include <cassert>
+#include <cstdio>
 #include <cstdlib>
 #include <mutex>
 
@@ -24,8 +25,22 @@ kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default;
 std::mutex TargetOffloadMtx;
 
 ////////////////////////////////////////////////////////////////////////////////
-/// manage the success or failure of a target construct
+/// dump a table of all the host-target pointer pairs on failure
+static void dumpTargetPointerMappings() {
+  for (const auto &Device : Devices) {
+    fprintf(stderr, "Device %d:\n", Device.DeviceID);
+    fprintf(stderr, "%-18s %-18s %s\n", "Host Ptr", "Target Ptr", "Size (B)");
+    for (const auto &HostTargetMap : Device.HostDataToTargetMap) {
+      fprintf(stderr, DPxMOD " " DPxMOD " %lu\n",
+              DPxPTR(HostTargetMap.HstPtrBegin),
+              DPxPTR(HostTargetMap.TgtPtrBegin),
+              HostTargetMap.HstPtrEnd - HostTargetMap.HstPtrBegin);
+    }
+  }
+}
 
+////////////////////////////////////////////////////////////////////////////////
+/// manage the success or failure of a target construct
 static void HandleDefaultTargetOffload() {
   TargetOffloadMtx.lock();
   if (TargetOffloadPolicy == tgt_default) {
@@ -60,8 +75,11 @@ static void HandleTargetOutcome(bool success) {
       break;
     case tgt_mandatory:
       if (!success) {
-        if (getInfoLevel() > 0)
-          MESSAGE0("LIBOMPTARGET_INFO is not supported yet");
+        if (getInfoLevel() > 1)
+          dumpTargetPointerMappings();
+        else
+          FAILURE_MESSAGE("run with env LIBOMPTARGET_INFO>1 to dump tables\n");
+
         FATAL_MESSAGE0(1, "failure of target construct while offloading is mandatory");
       }
       break;
diff --git a/openmp/libomptarget/test/offloading/info.c b/openmp/libomptarget/test/offloading/info.c
new file mode 100644
index 0000000..e0d3f1a
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/info.c
@@ -0,0 +1,15 @@
+// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_INFO=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=INFO
+
+#include <stdio.h>
+#include <omp.h>
+
+int main() {
+    int ptr = 1;
+
+// INFO: CUDA device {{[0-9]+}} info: Device supports up to {{[0-9]+}} CUDA blocks and {{[0-9]+}} threads with a warp size of {{[0-9]+}}
+// INFO: CUDA device {{[0-9]+}} info: Launching kernel {{.*}} with {{[0-9]+}} blocks and {{[0-9]+}} threads in Generic mode
+#pragma omp target map(tofrom:ptr)
+  {ptr = 1;}
+
+  return 0;
+}
-- 
2.7.4