unsigned long offset_y;
unsigned long offset_z;
unsigned long hostcall_ptr;
- char num_gpu_queues;
- unsigned long gpu_queue_ptr;
- char num_cpu_queues;
- unsigned long cpu_worker_signals;
- unsigned long cpu_queue_ptr;
- unsigned long kernarg_template_ptr;
+ unsigned long unused0;
+ unsigned long unused1;
+ unsigned long unused2;
} impl_implicit_args_t;
+static_assert(sizeof(impl_implicit_args_t) == 56, "");
// ---------------------- Kernel Start -------------
typedef struct atl_kernel_info_s {
uint32_t vgpr_spill_count;
uint32_t kernel_segment_size;
uint32_t num_args;
- std::vector<uint64_t> arg_alignments;
- std::vector<uint64_t> arg_offsets;
- std::vector<uint64_t> arg_sizes;
} atl_kernel_info_t;
typedef struct atl_symbol_info_s {
};
KernelArgMD()
- : name_(std::string()), typeName_(std::string()), size_(0), offset_(0),
- align_(0), valueKind_(ValueKind::Unknown) {}
+ : name_(std::string()), size_(0), offset_(0),
+ valueKind_(ValueKind::Unknown) {}
// fields
std::string name_;
- std::string typeName_;
uint32_t size_;
uint32_t offset_;
- uint32_t align_;
ValueKind valueKind_;
};
-class KernelMD {
-public:
- KernelMD() : kernargSegmentSize_(0ull) {}
-
- // fields
- uint64_t kernargSegmentSize_;
-};
-
static const std::map<std::string, KernelArgMD::ValueKind> ArgValueKind = {
// v3
// {"by_value", KernelArgMD::ValueKind::ByValue},
foronly_string(value, [&](size_t N, const unsigned char *str) {
kernelarg->name_ = std::string(str, str + N);
});
- } else if (message_is_string(key, ".type_name")) {
- foronly_string(value, [&](size_t N, const unsigned char *str) {
- kernelarg->typeName_ = std::string(str, str + N);
- });
} else if (message_is_string(key, ".size")) {
foronly_unsigned(value, [&](uint64_t x) { kernelarg->size_ = x; });
} else if (message_is_string(key, ".offset")) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
- atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0, {}, {}, {}};
+ atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0};
uint64_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count;
msgpack_errors += map_lookup_uint64_t(element, ".sgpr_count", &sgpr_count);
"iterate args map in kernel args metadata");
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
- // populate info with sizes and offsets
- info.arg_sizes.push_back(lcArg.size_);
// v3 has offset field and not align field
size_t new_offset = lcArg.offset_;
size_t padding = new_offset - offset;
offset = new_offset;
- info.arg_offsets.push_back(lcArg.offset_);
DP("Arg[%lu] \"%s\" (%u, %u)\n", i, lcArg.name_.c_str(), lcArg.size_,
lcArg.offset_);
offset += lcArg.size_;
}
}
- // add size of implicit args, e.g.: offset x, y and z and pipe pointer, but
- // do not count the compiler set implicit args, but set your own implicit
- // args by discounting the compiler set implicit args
+ // TODO: Probably don't want this arithmetic
info.kernel_segment_size =
- (hasHiddenArgs ? kernel_explicit_args_size : kernel_segment_size) +
- sizeof(impl_implicit_args_t);
+ (hasHiddenArgs ? kernel_explicit_args_size : kernel_segment_size);
DP("[%s: kernarg seg size] (%lu --> %u)\n", kernelName.c_str(),
kernel_segment_size, info.kernel_segment_size);
auto It = KernelInfoMap.find(kernelStr);
if (It != KernelInfoMap.end()) {
atl_kernel_info_t info = It->second;
- // return the size for non-implicit args
- kernarg_segment_size =
- info.kernel_segment_size - sizeof(impl_implicit_args_t);
+ kernarg_segment_size = info.kernel_segment_size;
} else {
err = HSA_STATUS_ERROR;
}
}
- // each arg is a void * in this openmp implementation
- uint32_t arg_num = kernarg_segment_size / sizeof(void *);
- std::vector<size_t> arg_sizes(arg_num);
- for (std::vector<size_t>::iterator it = arg_sizes.begin();
- it != arg_sizes.end(); it++) {
- *it = sizeof(void *);
- }
-
// default value GENERIC (in case symbol is missing from cubin file)
llvm::omp::OMPTgtExecModeFlags ExecModeVal =
llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC;