static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
kmp_depnode_t *sink,
kmp_task_t *sink_task) {
+#if OMPX_TASKGRAPH
+ kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
+ kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
+ if (source->dn.task && sink_task) {
+ // Not supporting dependency between two tasks that one is within the TDG
+ // and the other is not
+ KMP_ASSERT(task_source->is_taskgraph == task_sink->is_taskgraph);
+ }
+ if (task_sink->is_taskgraph &&
+ __kmp_tdg_is_recording(task_sink->tdg->tdg_status)) {
+ kmp_node_info_t *source_info =
+ &task_sink->tdg->record_map[task_source->td_task_id];
+ bool exists = false;
+ for (int i = 0; i < source_info->nsuccessors; i++) {
+ if (source_info->successors[i] == task_sink->td_task_id) {
+ exists = true;
+ break;
+ }
+ }
+ if (!exists) {
+ if (source_info->nsuccessors >= source_info->successors_size) {
+ source_info->successors_size = 2 * source_info->successors_size;
+ kmp_int32 *old_succ_ids = source_info->successors;
+ kmp_int32 *new_succ_ids = (kmp_int32 *)__kmp_allocate(
+ source_info->successors_size * sizeof(kmp_int32));
+ source_info->successors = new_succ_ids;
+ __kmp_free(old_succ_ids);
+ }
+
+ source_info->successors[source_info->nsuccessors] = task_sink->td_task_id;
+ source_info->nsuccessors++;
+
+ kmp_node_info_t *sink_info =
+ &(task_sink->tdg->record_map[task_sink->td_task_id]);
+ sink_info->npredecessors++;
+ }
+ }
+#endif
#ifdef KMP_SUPPORT_GRAPH_OUTPUT
kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
// do not use sink->dn.task as that is only filled after the dependences
// link node as successor of list elements
for (kmp_depnode_list_t *p = plist; p; p = p->next) {
kmp_depnode_t *dep = p->node;
+#if OMPX_TASKGRAPH
+ kmp_tdg_status tdg_status = KMP_TDG_NONE;
+ if (task) {
+ kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
+ if (td->is_taskgraph)
+ tdg_status = KMP_TASK_TO_TASKDATA(task)->tdg->tdg_status;
+ if (__kmp_tdg_is_recording(tdg_status))
+ __kmp_track_dependence(gtid, dep, node, task);
+ }
+#endif
if (dep->dn.task) {
KMP_ACQUIRE_DEPNODE(gtid, dep);
if (dep->dn.task) {
- __kmp_track_dependence(gtid, dep, node, task);
+#if OMPX_TASKGRAPH
+ if (!(__kmp_tdg_is_recording(tdg_status)) && task)
+#endif
+ __kmp_track_dependence(gtid, dep, node, task);
dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node);
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
"%p\n",
if (!sink)
return 0;
kmp_int32 npredecessors = 0;
+#if OMPX_TASKGRAPH
+ kmp_tdg_status tdg_status = KMP_TDG_NONE;
+ kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
+ if (task) {
+ if (td->is_taskgraph)
+ tdg_status = KMP_TASK_TO_TASKDATA(task)->tdg->tdg_status;
+ if (__kmp_tdg_is_recording(tdg_status) && sink->dn.task)
+ __kmp_track_dependence(gtid, sink, source, task);
+ }
+#endif
if (sink->dn.task) {
// synchronously add source to sink' list of successors
KMP_ACQUIRE_DEPNODE(gtid, sink);
if (sink->dn.task) {
- __kmp_track_dependence(gtid, sink, source, task);
+#if OMPX_TASKGRAPH
+ if (!(__kmp_tdg_is_recording(tdg_status)) && task)
+#endif
+ __kmp_track_dependence(gtid, sink, source, task);
sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source);
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
"%p\n",
gtid, KMP_TASK_TO_TASKDATA(sink->dn.task),
KMP_TASK_TO_TASKDATA(task)));
+#if OMPX_TASKGRAPH
+ if (__kmp_tdg_is_recording(tdg_status)) {
+ kmp_taskdata_t *tdd = KMP_TASK_TO_TASKDATA(sink->dn.task);
+ if (tdd->is_taskgraph) {
+ if (tdd->td_flags.onced)
+ // decrement npredecessors if sink->dn.task belongs to a taskgraph
+ // and
+ // 1) the task is reset to its initial state (by kmp_free_task) or
+ // 2) the task is complete but not yet reset
+ npredecessors--;
+ }
+ }
+#endif
npredecessors++;
}
KMP_RELEASE_DEPNODE(gtid, sink);
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *current_task = thread->th.th_current_task;
+#if OMPX_TASKGRAPH
+ // record TDG with deps
+ if (new_taskdata->is_taskgraph &&
+ __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
+ kmp_tdg_info_t *tdg = new_taskdata->tdg;
+ // extend record_map if needed
+ if (new_taskdata->td_task_id >= tdg->map_size) {
+ __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
+ if (new_taskdata->td_task_id >= tdg->map_size) {
+ kmp_uint old_size = tdg->map_size;
+ kmp_uint new_size = old_size * 2;
+ kmp_node_info_t *old_record = tdg->record_map;
+ kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate(
+ new_size * sizeof(kmp_node_info_t));
+ KMP_MEMCPY(new_record, tdg->record_map,
+ old_size * sizeof(kmp_node_info_t));
+ tdg->record_map = new_record;
+
+ __kmp_free(old_record);
+
+ for (kmp_int i = old_size; i < new_size; i++) {
+ kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
+ __kmp_successors_size * sizeof(kmp_int32));
+ new_record[i].task = nullptr;
+ new_record[i].successors = successorsList;
+ new_record[i].nsuccessors = 0;
+ new_record[i].npredecessors = 0;
+ new_record[i].successors_size = __kmp_successors_size;
+ KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
+ }
+ // update the size at the end, so that we avoid other
+ // threads use old_record while map_size is already updated
+ tdg->map_size = new_size;
+ }
+ __kmp_release_bootstrap_lock(&tdg->graph_lock);
+ }
+ tdg->record_map[new_taskdata->td_task_id].task = new_task;
+ tdg->record_map[new_taskdata->td_task_id].parent_task =
+ new_taskdata->td_parent;
+ KMP_ATOMIC_INC(&tdg->num_tasks);
+ }
+#endif
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
if (!current_task->ompt_task_info.frame.enter_frame.ptr)
static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
kmp_task_team_t *task_team);
static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
+#if OMPX_TASKGRAPH
+static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id);
+int __kmp_taskloop_task(int gtid, void *ptask);
+#endif
#ifdef BUILD_TIED_TASK_STACK
}
// Check mutexinoutset dependencies, acquire locks
kmp_depnode_t *node = tasknew->td_depnode;
+#if OMPX_TASKGRAPH
+ if (!tasknew->is_taskgraph && UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
+#else
if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
+#endif
for (int i = 0; i < node->dn.mtx_num_locks; ++i) {
KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
task->data2.priority = 0;
taskdata->td_flags.freed = 1;
+#if OMPX_TASKGRAPH
+ // do not free tasks in taskgraph
+ if (!taskdata->is_taskgraph) {
+#endif
// deallocate the taskdata and shared variable blocks associated with this task
#if USE_FAST_MEMORY
__kmp_fast_free(thread, taskdata);
#else /* ! USE_FAST_MEMORY */
__kmp_thread_free(thread, taskdata);
#endif
+#if OMPX_TASKGRAPH
+ } else {
+ taskdata->td_flags.complete = 0;
+ taskdata->td_flags.started = 0;
+ taskdata->td_flags.freed = 0;
+ taskdata->td_flags.executing = 0;
+ taskdata->td_flags.task_serial =
+ (taskdata->td_parent->td_flags.final ||
+ taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser);
+
+ // taskdata->td_allow_completion_event.pending_events_count = 1;
+ KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
+ KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
+ // start at one because counts current task and children
+ KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
+ }
+#endif
+
KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
}
flags.detachable == TASK_DETACHABLE || flags.hidden_helper;
ret = ret ||
KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0;
+#if OMPX_TASKGRAPH
+ if (taskdata->td_taskgroup && taskdata->is_taskgraph)
+ ret = ret || KMP_ATOMIC_LD_ACQ(&taskdata->td_taskgroup->count) > 0;
+#endif
return ret;
}
kmp_info_t *thread = __kmp_threads[gtid];
kmp_task_team_t *task_team =
thread->th.th_task_team; // might be NULL for serial teams...
+#if OMPX_TASKGRAPH
+ // to avoid seg fault when we need to access taskdata->td_flags after free when using vanilla taskloop
+ bool is_taskgraph;
+#endif
#if KMP_DEBUG
kmp_int32 children = 0;
#endif
KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
+#if OMPX_TASKGRAPH
+ is_taskgraph = taskdata->is_taskgraph;
+#endif
+
// Pop task from stack if tied
#ifdef BUILD_TIED_TASK_STACK
if (taskdata->td_flags.tiedness == TASK_TIED) {
if (completed) {
taskdata->td_flags.complete = 1; // mark the task as completed
+#if OMPX_TASKGRAPH
+ taskdata->td_flags.onced = 1; // mark the task as ran once already
+#endif
#if OMPT_SUPPORT
// This is not a detached task, we are done here
#endif
KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
KMP_DEBUG_ASSERT(children >= 0);
+#if OMPX_TASKGRAPH
+ if (taskdata->td_taskgroup && !taskdata->is_taskgraph)
+#else
if (taskdata->td_taskgroup)
+#endif
KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
} else if (task_team && (task_team->tt.tt_found_proxy_tasks ||
task_team->tt.tt_hidden_helper_task_encountered)) {
// KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
resumed_task->td_flags.executing = 1; // resume previous task
+#if OMPX_TASKGRAPH
+ if (is_taskgraph && __kmp_track_children_task(taskdata) &&
+ taskdata->td_taskgroup) {
+ // TDG: we only release taskgroup barrier here because
+ // free_task_and_ancestors will call
+ // __kmp_free_task, which resets all task parameters such as
+ // taskdata->started, etc. If we release the barrier earlier, these
+ // parameters could be read before being reset. This is not an issue for
+ // non-TDG implementation because we never reuse a task(data) structure
+ KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
+ }
+#endif
+
KA_TRACE(
10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
gtid, taskdata, resumed_task));
task->td_flags.executing = 1;
task->td_flags.complete = 0;
task->td_flags.freed = 0;
+#if OMPX_TASKGRAPH
+ task->td_flags.onced = 0;
+#endif
task->td_depnode = NULL;
task->td_last_tied = task;
if (task->td_dephash) {
int children;
task->td_flags.complete = 1;
+#if OMPX_TASKGRAPH
+ task->td_flags.onced = 1;
+#endif
children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks);
kmp_tasking_flags_t flags_old = task->td_flags;
if (children == 0 && flags_old.complete == 1) {
taskdata->td_flags.executing = 0;
taskdata->td_flags.complete = 0;
taskdata->td_flags.freed = 0;
-
+#if OMPX_TASKGRAPH
+ taskdata->td_flags.onced = 0;
+#endif
KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
// start at one because counts current task and children
KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
}
}
+#if OMPX_TASKGRAPH
+ kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
+ if (tdg && __kmp_tdg_is_recording(tdg->tdg_status) &&
+ (task_entry != (kmp_routine_entry_t)__kmp_taskloop_task)) {
+ taskdata->is_taskgraph = 1;
+ taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
+ taskdata->td_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
+ }
+#endif
KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
gtid, taskdata, taskdata->td_parent));
bool serialize_immediate) {
kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+#if OMPX_TASKGRAPH
+ if (new_taskdata->is_taskgraph &&
+ __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
+ kmp_tdg_info_t *tdg = new_taskdata->tdg;
+ // extend the record_map if needed
+ if (new_taskdata->td_task_id >= new_taskdata->tdg->map_size) {
+ __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
+ // map_size could have been updated by another thread if recursive
+ // taskloop
+ if (new_taskdata->td_task_id >= tdg->map_size) {
+ kmp_uint old_size = tdg->map_size;
+ kmp_uint new_size = old_size * 2;
+ kmp_node_info_t *old_record = tdg->record_map;
+ kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate(
+ new_size * sizeof(kmp_node_info_t));
+
+ KMP_MEMCPY(new_record, old_record, old_size * sizeof(kmp_node_info_t));
+ tdg->record_map = new_record;
+
+ __kmp_free(old_record);
+
+ for (kmp_int i = old_size; i < new_size; i++) {
+ kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
+ __kmp_successors_size * sizeof(kmp_int32));
+ new_record[i].task = nullptr;
+ new_record[i].successors = successorsList;
+ new_record[i].nsuccessors = 0;
+ new_record[i].npredecessors = 0;
+ new_record[i].successors_size = __kmp_successors_size;
+ KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
+ }
+ // update the size at the end, so that we avoid other
+ // threads use old_record while map_size is already updated
+ tdg->map_size = new_size;
+ }
+ __kmp_release_bootstrap_lock(&tdg->graph_lock);
+ }
+ // record a task
+ if (tdg->record_map[new_taskdata->td_task_id].task == nullptr) {
+ tdg->record_map[new_taskdata->td_task_id].task = new_task;
+ tdg->record_map[new_taskdata->td_task_id].parent_task =
+ new_taskdata->td_parent;
+ KMP_ATOMIC_INC(&tdg->num_tasks);
+ }
+ }
+#endif
+
/* Should we execute the new task or queue it? For now, let's just always try
to queue it. If the queue fills up, then we'll execute it. */
if (new_taskdata->td_flags.proxy == TASK_PROXY ||
taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
}
#endif // OMPT_SUPPORT && OMPT_OPTIONAL
-
}
KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
without help of the runtime library.
*/
void *__kmpc_task_reduction_init(int gtid, int num, void *data) {
+#if OMPX_TASKGRAPH
+ kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
+ if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
+ kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
+ this_tdg->rec_taskred_data =
+ __kmp_allocate(sizeof(kmp_task_red_input_t) * num);
+ this_tdg->rec_num_taskred = num;
+ KMP_MEMCPY(this_tdg->rec_taskred_data, data,
+ sizeof(kmp_task_red_input_t) * num);
+ }
+#endif
return __kmp_task_reduction_init(gtid, num, (kmp_task_red_input_t *)data);
}
has two parameters, pointer to object to be initialized and pointer to omp_orig
*/
void *__kmpc_taskred_init(int gtid, int num, void *data) {
+#if OMPX_TASKGRAPH
+ kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
+ if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
+ kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
+ this_tdg->rec_taskred_data =
+ __kmp_allocate(sizeof(kmp_task_red_input_t) * num);
+ this_tdg->rec_num_taskred = num;
+ KMP_MEMCPY(this_tdg->rec_taskred_data, data,
+ sizeof(kmp_task_red_input_t) * num);
+ }
+#endif
return __kmp_task_reduction_init(gtid, num, (kmp_taskred_input_t *)data);
}
kmp_int32 num = tg->reduce_num_data;
kmp_int32 tid = thread->th.th_info.ds.ds_tid;
+#if OMPX_TASKGRAPH
+ if ((thread->th.th_current_task->is_taskgraph) &&
+ (!__kmp_tdg_is_recording(
+ __kmp_global_tdgs[__kmp_curr_tdg_idx]->tdg_status))) {
+ tg = thread->th.th_current_task->td_taskgroup;
+ KMP_ASSERT(tg != NULL);
+ KMP_ASSERT(tg->reduce_data != NULL);
+ arr = (kmp_taskred_data_t *)(tg->reduce_data);
+ num = tg->reduce_num_data;
+ }
+#endif
+
KMP_ASSERT(data != NULL);
while (tg != NULL) {
for (int i = 0; i < num; ++i) {
KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
taskdata->td_flags.complete = 1; // mark the task as completed
+#if OMPX_TASKGRAPH
+ taskdata->td_flags.onced = 1;
+#endif
if (taskdata->td_taskgroup)
KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
//
// thread: allocating thread
// task_src: pointer to source task to be duplicated
+// taskloop_recur: used only when dealing with taskgraph,
+// indicating whether we need to update task->td_task_id
// returns: a pointer to the allocated kmp_task_t structure (task).
-kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
+kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src
+#if OMPX_TASKGRAPH
+ , int taskloop_recur
+#endif
+) {
kmp_task_t *task;
kmp_taskdata_t *taskdata;
kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
task = KMP_TASKDATA_TO_TASK(taskdata);
// Initialize new task (only specific fields not affected by memcpy)
+#if OMPX_TASKGRAPH
+ if (!taskdata->is_taskgraph || taskloop_recur)
+ taskdata->td_task_id = KMP_GEN_TASK_ID();
+ else if (taskdata->is_taskgraph &&
+ __kmp_tdg_is_recording(taskdata_src->tdg->tdg_status))
+ taskdata->td_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
+#else
taskdata->td_task_id = KMP_GEN_TASK_ID();
+#endif
if (task->shareds != NULL) { // need setup shareds pointer
shareds_offset = (char *)task_src->shareds - (char *)taskdata_src;
task->shareds = &((char *)taskdata)[shareds_offset];
lastpriv = 1;
}
}
+
+#if OMPX_TASKGRAPH
+ next_task = __kmp_task_dup_alloc(thread, task, /* taskloop_recur */ 0);
+#else
next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
+#endif
+
kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
kmp_taskloop_bounds_t next_task_bounds =
kmp_taskloop_bounds_t(next_task, task_bounds);
lb1 = ub0 + st;
// create pattern task for 2nd half of the loop
+#if OMPX_TASKGRAPH
+ next_task = __kmp_task_dup_alloc(thread, task,
+ /* taskloop_recur */ 1);
+#else
next_task = __kmp_task_dup_alloc(thread, task); // duplicate the task
+#endif
// adjust lower bound (upper bound is not changed) for the 2nd half
*(kmp_uint64 *)((char *)next_task + lower_offset) = lb1;
if (ptask_dup != NULL) // construct firstprivates, etc.
p->codeptr_ra = codeptr_ra;
#endif
+#if OMPX_TASKGRAPH
+ kmp_taskdata_t *new_task_data = KMP_TASK_TO_TASKDATA(new_task);
+ new_task_data->tdg = taskdata->tdg;
+ new_task_data->is_taskgraph = 0;
+#endif
+
#if OMPT_SUPPORT
// schedule new task with correct return address for OMPT events
__kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
__kmpc_taskgroup(loc, gtid);
}
+#if OMPX_TASKGRAPH
+ KMP_ATOMIC_DEC(&__kmp_tdg_task_id);
+#endif
// =========================================================================
// calculate loop parameters
kmp_taskloop_bounds_t task_bounds(task, lb, ub);
return taskdata->td_task_team != NULL;
}
+
+#if OMPX_TASKGRAPH
+// __kmp_find_tdg: identify a TDG through its ID
+// gtid: Global Thread ID
+// tdg_id: ID of the TDG
+// returns: If a TDG corresponding to this ID is found and not
+// its initial state, return the pointer to it, otherwise nullptr
+static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id) {
+ kmp_tdg_info_t *res = nullptr;
+ if (__kmp_max_tdgs == 0)
+ return res;
+
+ if (__kmp_global_tdgs == NULL)
+ __kmp_global_tdgs = (kmp_tdg_info_t **)__kmp_allocate(
+ sizeof(kmp_tdg_info_t *) * __kmp_max_tdgs);
+
+ if ((__kmp_global_tdgs[tdg_id]) &&
+ (__kmp_global_tdgs[tdg_id]->tdg_status != KMP_TDG_NONE))
+ res = __kmp_global_tdgs[tdg_id];
+ return res;
+}
+
+// __kmp_start_record: launch the execution of a previous
+// recorded TDG
+// gtid: Global Thread ID
+// tdg: ID of the TDG
+void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
+ KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_READY);
+ KA_TRACE(10, ("__kmp_exec_tdg(enter): T#%d tdg_id=%d num_roots=%d\n", gtid,
+ tdg->tdg_id, tdg->num_roots));
+ kmp_node_info_t *this_record_map = tdg->record_map;
+ kmp_int32 *this_root_tasks = tdg->root_tasks;
+ kmp_int32 this_num_roots = tdg->num_roots;
+ kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
+
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_taskdata_t *parent_task = thread->th.th_current_task;
+
+ if (tdg->rec_taskred_data) {
+ __kmpc_taskred_init(gtid, tdg->rec_num_taskred, tdg->rec_taskred_data);
+ }
+
+ for (kmp_int32 j = 0; j < this_num_tasks; j++) {
+ kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(this_record_map[j].task);
+
+ td->td_parent = parent_task;
+ this_record_map[j].parent_task = parent_task;
+
+ kmp_taskgroup_t *parent_taskgroup =
+ this_record_map[j].parent_task->td_taskgroup;
+
+ KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter,
+ this_record_map[j].npredecessors);
+ KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_incomplete_child_tasks);
+
+ if (parent_taskgroup) {
+ KMP_ATOMIC_INC(&parent_taskgroup->count);
+ // The taskgroup is different so we must update it
+ td->td_taskgroup = parent_taskgroup;
+ } else if (td->td_taskgroup != nullptr) {
+ // If the parent doesnt have a taskgroup, remove it from the task
+ td->td_taskgroup = nullptr;
+ }
+ if (this_record_map[j].parent_task->td_flags.tasktype == TASK_EXPLICIT)
+ KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_allocated_child_tasks);
+ }
+
+ for (kmp_int32 j = 0; j < this_num_roots; ++j) {
+ __kmp_omp_task(gtid, this_record_map[this_root_tasks[j]].task, true);
+ }
+ KA_TRACE(10, ("__kmp_exec_tdg(exit): T#%d tdg_id=%d num_roots=%d\n", gtid,
+ tdg->tdg_id, tdg->num_roots));
+}
+
+// __kmp_start_record: set up a TDG structure and turn the
+// recording flag to true
+// gtid: Global Thread ID of the encountering thread
+// input_flags: Flags associated with the TDG
+// tdg_id: ID of the TDG to record
+static inline void __kmp_start_record(kmp_int32 gtid,
+ kmp_taskgraph_flags_t *flags,
+ kmp_int32 tdg_id) {
+ kmp_tdg_info_t *tdg =
+ (kmp_tdg_info_t *)__kmp_allocate(sizeof(kmp_tdg_info_t));
+ __kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg;
+ // Initializing the TDG structure
+ tdg->tdg_id = tdg_id;
+ tdg->map_size = INIT_MAPSIZE;
+ tdg->num_roots = -1;
+ tdg->root_tasks = nullptr;
+ tdg->tdg_status = KMP_TDG_RECORDING;
+ tdg->rec_num_taskred = 0;
+ tdg->rec_taskred_data = nullptr;
+ KMP_ATOMIC_ST_RLX(&tdg->num_tasks, 0);
+
+ // Initializing the list of nodes in this TDG
+ kmp_node_info_t *this_record_map =
+ (kmp_node_info_t *)__kmp_allocate(INIT_MAPSIZE * sizeof(kmp_node_info_t));
+ for (kmp_int32 i = 0; i < INIT_MAPSIZE; i++) {
+ kmp_int32 *successorsList =
+ (kmp_int32 *)__kmp_allocate(__kmp_successors_size * sizeof(kmp_int32));
+ this_record_map[i].task = nullptr;
+ this_record_map[i].successors = successorsList;
+ this_record_map[i].nsuccessors = 0;
+ this_record_map[i].npredecessors = 0;
+ this_record_map[i].successors_size = __kmp_successors_size;
+ KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0);
+ }
+
+ __kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map;
+}
+
+// __kmpc_start_record_task: Wrapper around __kmp_start_record to mark
+// the beginning of the record process of a task region
+// loc_ref: Location of TDG, not used yet
+// gtid: Global Thread ID of the encountering thread
+// input_flags: Flags associated with the TDG
+// tdg_id: ID of the TDG to record, for now, incremental integer
+// returns: 1 if we record, otherwise, 0
+kmp_int32 __kmpc_start_record_task(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_int32 input_flags, kmp_int32 tdg_id) {
+
+ kmp_int32 res;
+ kmp_taskgraph_flags_t *flags = (kmp_taskgraph_flags_t *)&input_flags;
+ KA_TRACE(10,
+ ("__kmpc_start_record_task(enter): T#%d loc=%p flags=%d tdg_id=%d\n",
+ gtid, loc_ref, input_flags, tdg_id));
+
+ if (__kmp_max_tdgs == 0) {
+ KA_TRACE(
+ 10,
+ ("__kmpc_start_record_task(abandon): T#%d loc=%p flags=%d tdg_id = %d, "
+ "__kmp_max_tdgs = 0\n",
+ gtid, loc_ref, input_flags, tdg_id));
+ return 1;
+ }
+
+ __kmpc_taskgroup(loc_ref, gtid);
+ if (kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id)) {
+ // TODO: use re_record flag
+ __kmp_exec_tdg(gtid, tdg);
+ res = 0;
+ } else {
+ __kmp_curr_tdg_idx = tdg_id;
+ KMP_DEBUG_ASSERT(__kmp_curr_tdg_idx < __kmp_max_tdgs);
+ __kmp_start_record(gtid, flags, tdg_id);
+ __kmp_num_tdg++;
+ res = 1;
+ }
+ KA_TRACE(10, ("__kmpc_start_record_task(exit): T#%d TDG %d starts to %s\n",
+ gtid, tdg_id, res ? "record" : "execute"));
+ return res;
+}
+
+// __kmp_end_record: set up a TDG after recording it
+// gtid: Global thread ID
+// tdg: Pointer to the TDG
+void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
+ // Store roots
+ kmp_node_info_t *this_record_map = tdg->record_map;
+ kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
+ kmp_int32 *this_root_tasks =
+ (kmp_int32 *)__kmp_allocate(this_num_tasks * sizeof(kmp_int32));
+ kmp_int32 this_map_size = tdg->map_size;
+ kmp_int32 this_num_roots = 0;
+ kmp_info_t *thread = __kmp_threads[gtid];
+
+ for (kmp_int32 i = 0; i < this_num_tasks; i++) {
+ if (this_record_map[i].npredecessors == 0) {
+ this_root_tasks[this_num_roots++] = i;
+ }
+ }
+
+ // Update with roots info and mapsize
+ tdg->map_size = this_map_size;
+ tdg->num_roots = this_num_roots;
+ tdg->root_tasks = this_root_tasks;
+ KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_RECORDING);
+ tdg->tdg_status = KMP_TDG_READY;
+
+ if (thread->th.th_current_task->td_dephash) {
+ __kmp_dephash_free(thread, thread->th.th_current_task->td_dephash);
+ thread->th.th_current_task->td_dephash = NULL;
+ }
+
+ // Reset predecessor counter
+ for (kmp_int32 i = 0; i < this_num_tasks; i++) {
+ KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter,
+ this_record_map[i].npredecessors);
+ }
+ KMP_ATOMIC_ST_RLX(&__kmp_tdg_task_id, 0);
+}
+
+// __kmpc_end_record_task: wrapper around __kmp_end_record to mark
+// the end of recording phase
+//
+// loc_ref: Source location information
+// gtid: Global thread ID
+// input_flags: Flags attached to the graph
+// tdg_id: ID of the TDG just finished recording
+void __kmpc_end_record_task(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_int32 input_flags, kmp_int32 tdg_id) {
+ kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id);
+
+ KA_TRACE(10, ("__kmpc_end_record_task(enter): T#%d loc=%p finishes recording"
+ " tdg=%d with flags=%d\n",
+ gtid, loc_ref, tdg_id, input_flags));
+ if (__kmp_max_tdgs) {
+ // TODO: use input_flags->nowait
+ __kmpc_end_taskgroup(loc_ref, gtid);
+ if (__kmp_tdg_is_recording(tdg->tdg_status))
+ __kmp_end_record(gtid, tdg);
+ }
+ KA_TRACE(10, ("__kmpc_end_record_task(exit): T#%d loc=%p finished recording"
+ " tdg=%d, its status is now READY\n",
+ gtid, loc_ref, tdg_id));
+}
+#endif