--- /dev/null
+//===--- print_tracing.h - OpenMP interface definitions -------- AMDGPU -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBOMPTARGET_PLUGINS_AMGGPU_SRC_PRINT_TRACING_H_INCLUDED
+#define LIBOMPTARGET_PLUGINS_AMGGPU_SRC_PRINT_TRACING_H_INCLUDED
+
+enum PrintTraceControlBits {
+ LAUNCH = 1, // print a message to stderr for each kernel launch
+ RTL_TIMING = 2, // Print timing info around each RTL step
+ STARTUP_DETAILS = 4, // Details around loading up kernel
+ RTL_TO_STDOUT = 8 // Redirect RTL tracing to stdout
+};
+
+extern int print_kernel_trace; // set by environment variable
+
+#endif
#include "Debug.h"
#include "get_elf_mach_gfx_name.h"
#include "omptargetplugin.h"
+#include "print_tracing.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
DeviceInfo.GPUName[device_id] = GetInfoName;
}
- if (print_kernel_trace == 4)
+ if (print_kernel_trace & STARTUP_DETAILS)
fprintf(stderr, "Device#%-2d CU's: %2d %s\n", device_id,
DeviceInfo.ComputeUnits[device_id],
DeviceInfo.GPUName[device_id].c_str());
if (Max_Teams > DeviceInfo.HardTeamLimit)
Max_Teams = DeviceInfo.HardTeamLimit;
- if (print_kernel_trace == 4) {
+ if (print_kernel_trace & STARTUP_DETAILS) {
fprintf(stderr, "RTLDeviceInfoTy::Max_Teams: %d\n",
RTLDeviceInfoTy::Max_Teams);
fprintf(stderr, "Max_Teams: %d\n", Max_Teams);
DP("Reduced threadsPerGroup to flat-attr-group-size limit %d\n",
threadsPerGroup);
}
- if (print_kernel_trace == 4)
+ if (print_kernel_trace & STARTUP_DETAILS)
fprintf(stderr, "threadsPerGroup: %d\n", threadsPerGroup);
DP("Preparing %d threads\n", threadsPerGroup);
num_groups = Max_Teams;
DP("Set default num of groups %d\n", num_groups);
- if (print_kernel_trace == 4) {
+ if (print_kernel_trace & STARTUP_DETAILS) {
fprintf(stderr, "num_groups: %d\n", num_groups);
fprintf(stderr, "num_teams: %d\n", num_teams);
}
if (num_teams > 0) {
num_groups = (num_teams < num_groups) ? num_teams : num_groups;
}
- if (print_kernel_trace == 4) {
+ if (print_kernel_trace & STARTUP_DETAILS) {
fprintf(stderr, "num_groups: %d\n", num_groups);
fprintf(stderr, "DeviceInfo.EnvNumTeams %d\n", DeviceInfo.EnvNumTeams);
fprintf(stderr, "DeviceInfo.EnvTeamLimit %d\n", DeviceInfo.EnvTeamLimit);
}
if (num_groups > Max_Teams) {
num_groups = Max_Teams;
- if (print_kernel_trace == 4)
+ if (print_kernel_trace & STARTUP_DETAILS)
fprintf(stderr, "Limiting num_groups %d to Max_Teams %d \n", num_groups,
Max_Teams);
}
if (num_groups > num_teams && num_teams > 0) {
num_groups = num_teams;
- if (print_kernel_trace == 4)
+ if (print_kernel_trace & STARTUP_DETAILS)
fprintf(stderr, "Limiting num_groups %d to clause num_teams %d \n",
num_groups, num_teams);
}
num_groups > DeviceInfo.EnvMaxTeamsDefault)
num_groups = DeviceInfo.EnvMaxTeamsDefault;
}
- if (print_kernel_trace == 4) {
+ if (print_kernel_trace & STARTUP_DETAILS) {
fprintf(stderr, "threadsPerGroup: %d\n", threadsPerGroup);
fprintf(stderr, "num_groups: %d\n", num_groups);
fprintf(stderr, "loop_tripcount: %ld\n", loop_tripcount);
loop_tripcount, // From run_region arg
KernelInfo->device_id);
- if (print_kernel_trace >= 1)
+ if (print_kernel_trace >= LAUNCH) {
// enum modes are SPMD, GENERIC, NONE 0,1,2
- fprintf(stderr,
+ // if doing rtl timing, print to stderr, unless stdout requested.
+ bool traceToStdout = print_kernel_trace & (RTL_TO_STDOUT | RTL_TIMING);
+ fprintf(traceToStdout ? stdout : stderr,
"DEVID:%2d SGN:%1d ConstWGSize:%-4d args:%2d teamsXthrds:(%4dX%4d) "
"reqd:(%4dX%4d) n:%s\n",
device_id, KernelInfo->ExecutionMode, KernelInfo->ConstWGSize,
arg_num, num_groups, threadsPerGroup, num_teams, thread_limit,
KernelInfo->Name);
+ }
// Run on the device.
{