int gtid = __kmp_entry_gtid();
MKLOC(loc, "GOMP_barrier");
KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid));
+#if OMPT_SUPPORT && OMPT_TRACE
+ ompt_frame_t * ompt_frame;
+ if (ompt_enabled ) {
+ ompt_frame = __ompt_get_task_frame_internal(0);
+ ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
+ }
+#endif
__kmpc_barrier(&loc, gtid);
}
ompt_parallel_id_t ompt_parallel_id;
if (ompt_enabled) {
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
- task_info->frame.exit_runtime_frame = NULL;
ompt_parallel_id = __ompt_parallel_id_new(gtid);
__kmp_allocate(sizeof(ompt_lw_taskteam_t));
__ompt_lw_taskteam_init(lwt, thr, gtid, (void *) task, ompt_parallel_id);
lwt->ompt_task_info.task_id = my_ompt_task_id;
- lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
__ompt_lw_taskteam_link(lwt, thr);
#if OMPT_TRACE
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT
- ompt_frame_t *parent_frame;
+ ompt_frame_t *parent_frame, *frame;
if (ompt_enabled) {
parent_frame = __ompt_get_task_frame_internal(0);
#if OMPT_SUPPORT
if (ompt_enabled) {
- parent_frame->reenter_runtime_frame = NULL;
+ frame = __ompt_get_task_frame_internal(0);
+ frame->exit_runtime_frame = __builtin_frame_address(1);
}
#endif
}
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
serialized_task_id = task_info->task_id;
- // Record that we re-entered the runtime system in the implicit
- // task frame representing the parallel region.
- ompt_frame = &task_info->frame;
- ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
-
// unlink if necessary. no-op if there is not a lightweight task.
ompt_lw_taskteam_t *lwt = __ompt_lw_taskteam_unlink(thr);
// GOMP allocates/frees lwt since it can't be kept on the stack
if (lwt) {
__kmp_free(lwt);
-#if OMPT_SUPPORT
- if (ompt_enabled) {
- // Since a lightweight task was destroyed, make sure that the
- // remaining deepest task knows the stack frame where the runtime
- // was reentered.
- ompt_frame = __ompt_get_task_frame_internal(0);
- ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
- }
-#endif
}
}
#endif
#if OMPT_SUPPORT
if (ompt_enabled) {
- // Set reenter frame in parent task, which will become current task
- // in the midst of join. This is needed before the end_parallel callback.
- ompt_frame = __ompt_get_task_frame_internal(1);
- ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
+ // Implicit task is finished here, in the barrier we might schedule deferred tasks,
+ // these don't see the implicit task on the stack
+ ompt_frame = __ompt_get_task_frame_internal(0);
+ ompt_frame->exit_runtime_frame = NULL;
}
#endif
, fork_context_gnu
#endif
);
-#if OMPT_SUPPORT
- if (ompt_enabled) {
- ompt_frame->reenter_runtime_frame = NULL;
- }
-#endif
}
else {
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_enabled) {
// Record that we re-entered the runtime system in the frame that
// created the parallel region.
- ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
+ ompt_task_info_t *parent_task_info = __ompt_get_taskinfo(0);
if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
- ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
- parallel_id, task_info->task_id,
+ parallel_id, parent_task_info->task_id,
OMPT_INVOKER(fork_context_gnu));
}
- ompt_frame->reenter_runtime_frame = NULL;
+ parent_task_info->frame.reenter_runtime_frame = NULL;
thr->th.ompt_thread_info.state =
(((thr->th.th_team)->t.t_serialized) ?
MKLOC(loc, "GOMP_parallel");
KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid));
+#if OMPT_SUPPORT
+ ompt_task_info_t *parent_task_info, *task_info;
+ if (ompt_enabled) {
+ parent_task_info = __ompt_get_taskinfo(0);
+ parent_task_info->frame.reenter_runtime_frame = __builtin_frame_address(1);
+ }
+#endif
if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
if (num_threads != 0) {
__kmp_push_num_threads(&loc, gtid, num_threads);
else {
__kmp_GOMP_serialized_parallel(&loc, gtid, task);
}
+#if OMPT_SUPPORT
+ if (ompt_enabled) {
+ task_info = __ompt_get_taskinfo(0);
+ task_info->frame.exit_runtime_frame = __builtin_frame_address(0);
+ }
+#endif
task(data);
xexpand(KMP_API_NAME_GOMP_PARALLEL_END)();
+#if OMPT_SUPPORT
+ if (ompt_enabled) {
+ task_info->frame.exit_runtime_frame = NULL;
+ parent_task_info->frame.reenter_runtime_frame = NULL;
+ }
+#endif
}
void
KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid));
}
-#define PARALLEL_LOOP(func, schedule) \
+#define PARALLEL_LOOP(func, schedule, ompt_pre, ompt_post) \
void func (void (*task) (void *), void *data, unsigned num_threads, \
long lb, long ub, long str, long chunk_sz, unsigned flags) \
{ \
KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \
gtid, lb, ub, str, chunk_sz )); \
\
+ ompt_pre(); \
if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \
if (num_threads != 0) { \
__kmp_push_num_threads(&loc, gtid, num_threads); \
(schedule) != kmp_sch_static); \
task(data); \
xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); \
+ ompt_post(); \
\
KA_TRACE(20, ( #func " exit: T#%d\n", gtid)); \
}
-PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC), kmp_sch_static)
-PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC), kmp_sch_dynamic_chunked)
-PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED), kmp_sch_guided_chunked)
-PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME), kmp_sch_runtime)
+PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC), kmp_sch_static,
+ OMPT_LOOP_PRE, OMPT_LOOP_POST)
+PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC), kmp_sch_dynamic_chunked,
+ OMPT_LOOP_PRE, OMPT_LOOP_POST)
+PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED), kmp_sch_guided_chunked,
+ OMPT_LOOP_PRE, OMPT_LOOP_POST)
+PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME), kmp_sch_runtime,
+ OMPT_LOOP_PRE, OMPT_LOOP_POST)
void
print_ids(2);
print_frame(0);
#pragma omp barrier
+ print_ids(0);
}
+ print_ids(0);
}
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+ // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
// implicit barrier
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+ // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// implicit barrier
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]