packet->completion_signal = {0}; // may want a pool of signals
KernelArgPool *ArgPool = nullptr;
+ void *kernarg = nullptr;
{
auto it = KernelArgPoolMap.find(std::string(KernelInfo->Name));
if (it != KernelArgPoolMap.end()) {
device_id);
}
{
- void *kernarg = nullptr;
if (ArgPool) {
assert(ArgPool->kernarg_segment_size == (arg_num * sizeof(void *)));
kernarg = ArgPool->allocate(arg_num);
packet->kernarg_address = kernarg;
}
- {
- hsa_signal_t s = DeviceInfo.FreeSignalPool.pop();
- if (s.handle == 0) {
- DP("Failed to get signal instance\n");
- return OFFLOAD_FAIL;
- }
- packet->completion_signal = s;
- hsa_signal_store_relaxed(packet->completion_signal, 1);
+ hsa_signal_t s = DeviceInfo.FreeSignalPool.pop();
+ if (s.handle == 0) {
+ DP("Failed to get signal instance\n");
+ return OFFLOAD_FAIL;
}
+ packet->completion_signal = s;
+ hsa_signal_store_relaxed(packet->completion_signal, 1);
+ // Publish the packet indicating it is ready to be processed
core::packet_store_release(reinterpret_cast<uint32_t *>(packet),
core::create_header(), packet->setup);
+ // Since the packet is already published, its contents must not be
+ // accessed any more
hsa_signal_store_relaxed(queue->doorbell_signal, packet_id);
- while (hsa_signal_wait_scacquire(packet->completion_signal,
- HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX,
+ while (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX,
HSA_WAIT_STATE_BLOCKED) != 0)
;
assert(ArgPool);
- ArgPool->deallocate(packet->kernarg_address);
- DeviceInfo.FreeSignalPool.push(packet->completion_signal);
+ ArgPool->deallocate(kernarg);
+ DeviceInfo.FreeSignalPool.push(s);
}
DP("Kernel completed\n");