This is an alternative approach to address inconsistencies pointed out in: D90078
This patch makes sure that the return address is reset, when leaving the scope.
In some cases, I had to move the macro out of an if-statement to have it in the
right scope, in some cases I added an additional block to restrict the scope.
This patch does not handle inconsistencies, which might occur if the return
address is still set when we call into the application.
Test case (repeated_calls.c) provided by @hbae
Differential Revision: https://reviews.llvm.org/D91692
parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
}
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(gtid);
}
+ OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
#if INCLUDE_SSC_MARKS
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
__kmp_threads[global_tid]->th.th_ident = loc;
// TODO: explicit barrier_wait_id:
kmp_team_t *team;
ompt_wait_id_t lck;
void *codeptr_ra;
+ OMPT_STORE_RETURN_ADDRESS(gtid);
if (ompt_enabled.enabled) {
- OMPT_STORE_RETURN_ADDRESS(gtid);
team = __kmp_team_from_gtid(gtid);
lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
/* OMPT state update */
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(gtid);
}
+ OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
/* This barrier is not a barrier region boundary */
#if USE_ITT_NOTIFY
// Consider next barrier a user-visible barrier for barrier region boundaries
// Nesting checks are already handled by the single construct checks
-
+ {
#if OMPT_SUPPORT
- if (ompt_enabled.enabled) {
OMPT_STORE_RETURN_ADDRESS(gtid);
- }
#endif
#if USE_ITT_NOTIFY
__kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
ompt_frame->enter_frame = ompt_data_none;
}
#endif
+ }
}
/* -------------------------------------------------------------------------- */
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident =
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
// TODO: implicit barrier: should be exposed
#if USE_ITT_NOTIFY
gtid, lb, ub, str, chunk_sz)); \
\
if ((str > 0) ? (lb < ub) : (lb > ub)) { \
- IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
- KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
- (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
- (schedule) != kmp_sch_static); \
- IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
- status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
- (kmp_int *)p_ub, (kmp_int *)&stride); \
+ { \
+ IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
+ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
+ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
+ (schedule) != kmp_sch_static); \
+ } \
+ { \
+ IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
+ status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
+ (kmp_int *)p_ub, (kmp_int *)&stride); \
+ } \
if (status) { \
KMP_DEBUG_ASSERT(stride == str); \
*p_ub += (str > 0) ? 1 : -1; \
gtid, lb, ub, str, chunk_sz)); \
\
if ((str > 0) ? (lb < ub) : (lb > ub)) { \
- IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
- KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
- (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \
- IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
- status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
- (kmp_int *)p_ub, (kmp_int *)&stride); \
+ { \
+ IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
+ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
+ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
+ TRUE); \
+ } \
+ { \
+ IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
+ status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
+ (kmp_int *)p_ub, (kmp_int *)&stride); \
+ } \
if (status) { \
KMP_DEBUG_ASSERT(stride == str); \
*p_ub += (str > 0) ? 1 : -1; \
task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
(kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
+ {
#if OMPT_SUPPORT
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
-
+ }
task(data);
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid));
return return_address;
}
-#define OMPT_STORE_RETURN_ADDRESS(gtid) \
+/*#define OMPT_STORE_RETURN_ADDRESS(gtid) \
if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \
!__kmp_threads[gtid]->th.ompt_thread_info.return_address) \
__kmp_threads[gtid]->th.ompt_thread_info.return_address = \
- __builtin_return_address(0)
+ __builtin_return_address(0)*/
+#define OMPT_STORE_RETURN_ADDRESS(gtid) \
+ OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address(0)};
#define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid)
#define OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid) \
((ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \
#define OMPT_REDUCTION_END
#endif // ! OMPT_SUPPORT && OMPT_OPTIONAL
+class OmptReturnAddressGuard {
+private:
+ bool SetAddress{false};
+ int Gtid;
+
+public:
+ OmptReturnAddressGuard(int Gtid, void *ReturnAddress) : Gtid(Gtid) {
+ if (ompt_enabled.enabled && Gtid >= 0 && __kmp_threads[Gtid] &&
+ !__kmp_threads[Gtid]->th.ompt_thread_info.return_address) {
+ SetAddress = true;
+ __kmp_threads[Gtid]->th.ompt_thread_info.return_address = ReturnAddress;
+ }
+ }
+ ~OmptReturnAddressGuard() {
+ if (SetAddress)
+ __kmp_threads[Gtid]->th.ompt_thread_info.return_address = NULL;
+ }
+};
+
#endif
--- /dev/null
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+
+#define USE_PRIVATE_TOOL 1
+#include "callback.h"
+
+__attribute__((noinline))
+int foo(int x) {
+#pragma omp parallel num_threads(2)
+ {
+#pragma omp atomic
+ x++;
+ }
+ return x;
+}
+
+__attribute__((noinline))
+int bar(int x) {
+#pragma omp parallel num_threads(2)
+ {
+#pragma omp critical
+ x++;
+ }
+ return x;
+}
+
+int main() {
+ int y;
+ y = foo(y);
+ y = bar(y);
+ y = foo(y);
+ return 0;
+
+ // CHECK-NOT: {{^}}0: Could not register callback
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // First call to foo
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin
+ // CHECK-SAME: {{.*}}codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+
+ // Call to bar
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
+
+ // Second call to foo
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
+ // CHECK-SAME: {{.*}}codeptr_ra=[[RETURN_ADDRESS]]
+
+}
+
+static void on_ompt_callback_thread_begin(
+ ompt_thread_t thread_type,
+ ompt_data_t *thread_data) {
+ if (thread_data->ptr)
+ printf("%s\n", "0: thread_data initially not null");
+ thread_data->value = ompt_get_unique_id();
+ printf("%" PRIu64 ":" _TOOL_PREFIX
+ " ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n",
+ ompt_get_thread_data()->value, ompt_thread_t_values[thread_type],
+ thread_type, thread_data->value);
+}
+
+static void on_ompt_callback_parallel_begin(
+ ompt_data_t *encountering_task_data,
+ const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
+ uint32_t requested_team_size, int flag, const void *codeptr_ra) {
+ if (parallel_data->ptr)
+ printf("0: parallel_data initially not null\n");
+ parallel_data->value = ompt_get_unique_id();
+ int invoker = flag & 0xF;
+ const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
+ const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams";
+ printf("%" PRIu64 ":" _TOOL_PREFIX
+ " ompt_event_%s_begin: parent_task_id=%" PRIu64
+ ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
+ "parallel_id=%" PRIu64 ", requested_%s=%" PRIu32
+ ", codeptr_ra=%p, invoker=%d\n",
+ ompt_get_thread_data()->value, event, encountering_task_data->value,
+ encountering_task_frame->exit_frame.ptr,
+ encountering_task_frame->enter_frame.ptr, parallel_data->value, size,
+ requested_team_size, codeptr_ra, invoker);
+}
+
+int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num,
+ ompt_data_t *tool_data) {
+ ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback");
+ ompt_get_unique_id = (ompt_get_unique_id_t)lookup("ompt_get_unique_id");
+ ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
+
+ register_callback(ompt_callback_thread_begin);
+ register_callback(ompt_callback_parallel_begin);
+ printf("0: NULL_POINTER=%p\n", (void *)NULL);
+ return 1; // success
+}
+
+void ompt_finalize(ompt_data_t *tool_data) {}
+
+ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
+ const char *runtime_version) {
+ static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,
+ &ompt_finalize, 0};
+ return &ompt_start_tool_result;
+}