return HSA_STATUS_ERROR;
}
}
-
-hsa_status_t interop_hsa_get_kernel_info(
- const std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
- int DeviceId, const char *kernel_name,
- hsa_executable_symbol_info_t kernel_info, uint32_t *value) {
- /*
- // Typical usage:
- uint32_t value;
- interop_hsa_get_kernel_addr(gpu_place, "kernel_name",
- HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
- &val);
- */
-
- if (!kernel_name || !value)
- return HSA_STATUS_ERROR;
-
- hsa_status_t status = HSA_STATUS_SUCCESS;
- // get the kernel info
- std::string kernelStr = std::string(kernel_name);
- auto It = KernelInfoTable.find(kernelStr);
- if (It != KernelInfoTable.end()) {
- atl_kernel_info_t info = It->second;
- switch (kernel_info) {
- case HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE:
- *value = info.group_segment_size;
- break;
- case HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE:
- *value = info.private_segment_size;
- break;
- case HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE:
- // return the size for non-implicit args
- *value = info.kernel_segment_size - sizeof(impl_implicit_args_t);
- break;
- default:
- *value = 0;
- status = HSA_STATUS_ERROR;
- break;
- }
- } else {
- *value = 0;
- status = HSA_STATUS_ERROR;
- }
-
- return status;
-}
} Elf_Note;
#endif
-// The following include file and following structs/enums
-// have been replicated on a per-use basis below. For example,
-// llvm::AMDGPU::HSAMD::Kernel::Metadata has several fields,
-// but we may care only about kernargSegmentSize_ for now, so
-// we just include that field in our KernelMD implementation. We
-// chose this approach to replicate in order to avoid forcing
-// a dependency on LLVM_INCLUDE_DIR just to compile the runtime.
-// #include "llvm/Support/AMDGPUMetadata.h"
-// typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
-// typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-// typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
-// using llvm::AMDGPU::HSAMD::AccessQualifier;
-// using llvm::AMDGPU::HSAMD::AddressSpaceQualifier;
-// using llvm::AMDGPU::HSAMD::ValueKind;
-// using llvm::AMDGPU::HSAMD::ValueType;
-
class KernelArgMD {
public:
enum class ValueKind {
};
static const std::map<std::string, KernelArgMD::ValueKind> ArgValueKind = {
- // Including only those fields that are relevant to the runtime.
- // {"ByValue", KernelArgMD::ValueKind::ByValue},
- // {"GlobalBuffer", KernelArgMD::ValueKind::GlobalBuffer},
- // {"DynamicSharedPointer",
- // KernelArgMD::ValueKind::DynamicSharedPointer},
- // {"Sampler", KernelArgMD::ValueKind::Sampler},
- // {"Image", KernelArgMD::ValueKind::Image},
- // {"Pipe", KernelArgMD::ValueKind::Pipe},
- // {"Queue", KernelArgMD::ValueKind::Queue},
- {"HiddenGlobalOffsetX", KernelArgMD::ValueKind::HiddenGlobalOffsetX},
- {"HiddenGlobalOffsetY", KernelArgMD::ValueKind::HiddenGlobalOffsetY},
- {"HiddenGlobalOffsetZ", KernelArgMD::ValueKind::HiddenGlobalOffsetZ},
- {"HiddenNone", KernelArgMD::ValueKind::HiddenNone},
- {"HiddenPrintfBuffer", KernelArgMD::ValueKind::HiddenPrintfBuffer},
- {"HiddenDefaultQueue", KernelArgMD::ValueKind::HiddenDefaultQueue},
- {"HiddenCompletionAction", KernelArgMD::ValueKind::HiddenCompletionAction},
- {"HiddenMultiGridSyncArg", KernelArgMD::ValueKind::HiddenMultiGridSyncArg},
- {"HiddenHostcallBuffer", KernelArgMD::ValueKind::HiddenHostcallBuffer},
// v3
// {"by_value", KernelArgMD::ValueKind::ByValue},
// {"global_buffer", KernelArgMD::ValueKind::GlobalBuffer},
DP("to find the kernel name: %s size: %lu\n", e->name, strlen(e->name));
- uint32_t kernarg_segment_size;
+ // errors in kernarg_segment_size previously treated as = 0 (or as undef)
+ uint32_t kernarg_segment_size = 0;
auto &KernelInfoMap = DeviceInfo.KernelInfoTable[device_id];
- hsa_status_t err = interop_hsa_get_kernel_info(
- KernelInfoMap, device_id, e->name,
- HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
- &kernarg_segment_size);
+ hsa_status_t err = HSA_STATUS_SUCCESS;
+ if (!e->name) {
+ err = HSA_STATUS_ERROR;
+ } else {
+ std::string kernelStr = std::string(e->name);
+ auto It = KernelInfoMap.find(kernelStr);
+ if (It != KernelInfoMap.end()) {
+ atl_kernel_info_t info = It->second;
+ // return the size for non-implicit args
+ kernarg_segment_size =
+ info.kernel_segment_size - sizeof(impl_implicit_args_t);
+ } else {
+ err = HSA_STATUS_ERROR;
+ }
+ }
// each arg is a void * in this openmp implementation
uint32_t arg_num = kernarg_segment_size / sizeof(void *);