From 924cff0a3988975996bd5808d3b64e320ee73c84 Mon Sep 17 00:00:00 2001 From: Joachim Protze Date: Fri, 3 Nov 2017 17:09:00 +0000 Subject: [PATCH] Updating implementation of OMPT as specified in OpenMP 5.0 Preview 2 (TR6) The TR6 document is expected to be publically released around November 15. This patch does not implement OMPT for libomptarget. Patch by Simon Convent and Joachim Protze Differential Revision: https://reviews.llvm.org/D39182 llvm-svn: 317339 --- openmp/CREDITS.txt | 8 +- openmp/runtime/.clang-format | 2 +- openmp/runtime/CMakeLists.txt | 2 +- openmp/runtime/src/exports_so.txt | 9 - openmp/runtime/src/include/50/ompt.h.var | 275 +++++++++------------ openmp/runtime/src/kmp_csupport.cpp | 1 + openmp/runtime/src/kmp_dispatch.cpp | 1 + openmp/runtime/src/kmp_gsupport.cpp | 10 +- openmp/runtime/src/kmp_i18n.cpp | 21 +- openmp/runtime/src/kmp_itt.inl | 2 + openmp/runtime/src/kmp_lock.cpp | 7 +- openmp/runtime/src/kmp_runtime.cpp | 7 +- openmp/runtime/src/kmp_tasking.cpp | 53 ++-- openmp/runtime/src/kmp_threadprivate.cpp | 7 - openmp/runtime/src/ompt-event-specific.h | 50 ++-- openmp/runtime/src/ompt-general.cpp | 84 ++----- openmp/runtime/src/ompt-internal.h | 4 +- openmp/runtime/src/ompt-specific.cpp | 1 + openmp/runtime/test/lock/omp_init_lock.c | 42 ---- openmp/runtime/test/ompt/callback.h | 32 +-- openmp/runtime/test/ompt/cancel/cancel_parallel.c | 2 +- .../runtime/test/ompt/cancel/cancel_worksharing.c | 4 +- openmp/runtime/test/ompt/loadtool/tool_available.c | 10 +- openmp/runtime/test/ompt/ompt-signal.h | 11 +- openmp/runtime/test/ompt/tasks/dependences.c | 2 +- 25 files changed, 251 insertions(+), 396 deletions(-) delete mode 100644 openmp/runtime/test/lock/omp_init_lock.c diff --git a/openmp/CREDITS.txt b/openmp/CREDITS.txt index 6d927d3..a8ab67a 100644 --- a/openmp/CREDITS.txt +++ b/openmp/CREDITS.txt @@ -12,10 +12,6 @@ N: Carlo Bertolli W: http://ibm.com D: IBM contributor to PowerPC support in CMake files and elsewhere. -N: Diego Caballero -E: diego.l.caballero@gmail.com -D: Fork performance improvements - N: Sunita Chandrasekaran D: Contributor to testsuite from OpenUH @@ -55,3 +51,7 @@ D: Making build work for FreeBSD. N: Cheng Wang D: Contributor to testsuite from OpenUH + +N: Diego Caballero +E: diego.l.caballero@gmail.com +D: Fork performance improvements diff --git a/openmp/runtime/.clang-format b/openmp/runtime/.clang-format index 590e1e2..194ff63 100644 --- a/openmp/runtime/.clang-format +++ b/openmp/runtime/.clang-format @@ -2,4 +2,4 @@ BasedOnStyle: LLVM AlignTrailingComments: false SortIncludes: false -... +--- diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index 1adc825..370393f 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -330,7 +330,7 @@ if(LIBOMP_OMPT_SUPPORT AND (NOT LIBOMP_HAVE_OMPT_SUPPORT)) libomp_error_say("OpenMP Tools Interface requested but not available in this implementation") endif() if(LIBOMP_OMPT_SUPPORT AND (${LIBOMP_OMP_VERSION} LESS 50)) - libomp_error_say("OpenMP Tools Interface only available with OpenMP 5.0, LIBOMP_OMP_VERSION is ${LIBOMP_OMP_VERSION}") + message( FATAL_ERROR "OMPT is only available with OpenMP 5.0, LIBOMP_OMP_VERSION is ${LIBOMP_OMP_VERSION}" ) endif() # TSAN-support diff --git a/openmp/runtime/src/exports_so.txt b/openmp/runtime/src/exports_so.txt index ff16fae..894f578 100644 --- a/openmp/runtime/src/exports_so.txt +++ b/openmp/runtime/src/exports_so.txt @@ -30,15 +30,6 @@ VERSION { # icc drops weak attribute at linking step without the following line: Annotate*; # TSAN annotation - # - # OMPT state placeholders - # - ompt_idle; - ompt_overhead; - ompt_barrier_wait; - ompt_task_wait; - ompt_mutex_wait; - ompc_*; # omp.h renames some standard functions to ompc_*. kmp_*; # Intel extensions. kmpc_*; # Intel extensions. diff --git a/openmp/runtime/src/include/50/ompt.h.var b/openmp/runtime/src/include/50/ompt.h.var index a73662e..0cae6a0 100644 --- a/openmp/runtime/src/include/50/ompt.h.var +++ b/openmp/runtime/src/include/50/ompt.h.var @@ -41,13 +41,6 @@ macro(ompt_get_target_info) \ macro(ompt_get_num_devices) -#define FOREACH_OMPT_PLACEHOLDER_FN(macro) \ - macro (ompt_idle) \ - macro (ompt_overhead) \ - macro (ompt_barrier_wait) \ - macro (ompt_task_wait) \ - macro (ompt_mutex_wait) - #define FOREACH_OMP_STATE(macro) \ \ /* first available state */ \ @@ -96,59 +89,59 @@ macro (ompt_mutex_impl_queuing, 2) /* based on some fair policy */ \ macro (ompt_mutex_impl_speculative, 3) /* based on HW-supported speculation */ -#define FOREACH_OMPT_EVENT(macro) \ - \ - /*--- Mandatory Events ---*/ \ - macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \ - macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \ - \ - macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \ - macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \ - \ - macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \ - macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \ - macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \ - \ - macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ - macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op*/ \ - macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit*/ \ - \ - macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \ - \ - macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ - macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ - \ - /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/ \ - \ - macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 14) /* sync region wait begin or end*/ \ - \ - macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 15) /* mutex released */ \ - \ - /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */ \ - \ - macro (ompt_callback_task_dependences, ompt_callback_task_dependences_t, 16) /* report task dependences */\ - macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 17) /* report task dependence */\ - \ - macro (ompt_callback_work, ompt_callback_work_t, 18) /* task at work begin or end*/\ - \ - macro (ompt_callback_master, ompt_callback_master_t, 19) /* task at master begin or end */\ - \ - macro (ompt_callback_target_map, ompt_callback_target_map_t, 20) /* target map */ \ - \ - macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 21) /* sync region begin or end */ \ - \ - macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 22) /* lock init */ \ - macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 23) /* lock destroy */ \ - \ - macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 24) /* mutex acquire */ \ - macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 25) /* mutex acquired */ \ - \ - macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 26) /* nest lock */ \ - \ - macro (ompt_callback_flush, ompt_callback_flush_t, 27) /* after executing flush */ \ - \ - macro (ompt_callback_cancel, ompt_callback_cancel_t, 28) /*cancel innermost binding region*/\ - macro (ompt_callback_idle, ompt_callback_idle_t, 29) /* begin or end idle state */\ +#define FOREACH_OMPT_EVENT(macro) \ + \ + /*--- Mandatory Events ---*/ \ + macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \ + macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \ + \ + macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \ + macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \ + \ + macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \ + macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \ + macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \ + \ + macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ + macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \ + macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \ + \ + macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \ + \ + macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ + macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ + \ + macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \ + macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \ + \ + /* Optional Events */ \ + macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \ + \ + macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \ + \ + macro (ompt_callback_task_dependences, ompt_callback_task_dependences_t, 18) /* report task dependences */ \ + macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \ + \ + macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \ + \ + macro (ompt_callback_master, ompt_callback_master_t, 21) /* task at master begin or end */ \ + \ + macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \ + \ + macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \ + \ + macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) /* lock init */ \ + macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) /* lock destroy */ \ + \ + macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) /* mutex acquire */ \ + macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) /* mutex acquired */ \ + \ + macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) /* nest lock */ \ + \ + macro (ompt_callback_flush, ompt_callback_flush_t, 29) /* after executing flush */ \ + \ + macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) /* cancel innermost binding region */ \ + macro (ompt_callback_idle, ompt_callback_idle_t, 31) /* begin or end idle state */ @@ -163,7 +156,7 @@ typedef uint64_t ompt_id_t; #define ompt_id_none 0 -typedef union ompt_data_u { +typedef union ompt_data_t { uint64_t value; /* data initialized by runtime to unique id */ void *ptr; /* pointer under tool control */ } ompt_data_t; @@ -189,16 +182,16 @@ typedef struct ompt_frame_t { * dependences types *---------------------*/ -typedef enum ompt_task_dependence_flag_e { +typedef enum ompt_task_dependence_flag_t { // a two bit field for the dependence type ompt_task_dependence_type_out = 1, ompt_task_dependence_type_in = 2, ompt_task_dependence_type_inout = 3, } ompt_task_dependence_flag_t; -typedef struct ompt_task_dependence_s { +typedef struct ompt_task_dependence_t { void *variable_addr; - uint32_t dependence_flags; + unsigned int dependence_flags; } ompt_task_dependence_t; @@ -231,7 +224,7 @@ typedef enum ompt_callbacks_e{ /*--------------------- * set callback results *---------------------*/ -typedef enum ompt_set_result_e { +typedef enum ompt_set_result_t { ompt_set_error = 0, ompt_set_never = 1, ompt_set_sometimes = 2, @@ -243,7 +236,7 @@ typedef enum ompt_set_result_e { /*---------------------- * mutex implementations *----------------------*/ -typedef enum ompt_mutex_impl_e { +typedef enum ompt_mutex_impl_t { #define ompt_mutex_impl_macro(impl, code) impl = code, FOREACH_OMPT_MUTEX_IMPL(ompt_mutex_impl_macro) #undef ompt_mutex_impl_macro @@ -262,13 +255,14 @@ typedef ompt_interface_fn_t (*ompt_function_lookup_t)( ); /* threads */ -typedef enum { +typedef enum ompt_thread_type_t { ompt_thread_initial = 1, // start the enumeration at 1 ompt_thread_worker = 2, - ompt_thread_other = 3 + ompt_thread_other = 3, + ompt_thread_unknown = 4 } ompt_thread_type_t; -typedef enum { +typedef enum ompt_invoker_t { ompt_invoker_program = 1, /* program invokes master task */ ompt_invoker_runtime = 2 /* runtime invokes master task */ } ompt_invoker_t; @@ -287,9 +281,9 @@ typedef void (*ompt_wait_callback_t) ( ); /* parallel and workshares */ -typedef enum ompt_scope_endpoint_e { +typedef enum ompt_scope_endpoint_t { ompt_scope_begin = 1, - ompt_scope_end = 2 + ompt_scope_end = 2 } ompt_scope_endpoint_t; @@ -303,23 +297,23 @@ typedef void (*ompt_callback_implicit_task_t) ( ); typedef void (*ompt_callback_parallel_begin_t) ( - ompt_data_t *parent_task_data, /* data of parent task */ - const ompt_frame_t *parent_frame, /* frame data of parent task */ - ompt_data_t *parallel_data, /* data of parallel region */ - unsigned int requested_team_size, /* requested number of threads in team */ - ompt_invoker_t invoker, /* invoker of master task */ - const void *codeptr_ra /* return address of runtime call */ + ompt_data_t *encountering_task_data, /* data of encountering task */ + const ompt_frame_t *encountering_task_frame, /* frame data of encountering task */ + ompt_data_t *parallel_data, /* data of parallel region */ + unsigned int requested_team_size, /* requested number of threads in team */ + ompt_invoker_t invoker, /* invoker of master task */ + const void *codeptr_ra /* return address of runtime call */ ); typedef void (*ompt_callback_parallel_end_t) ( ompt_data_t *parallel_data, /* data of parallel region */ - ompt_data_t *task_data, /* data of task */ + ompt_data_t *encountering_task_data, /* data of encountering task */ ompt_invoker_t invoker, /* invoker of master task */ const void *codeptr_ra /* return address of runtime call */ ); /* tasks */ -typedef enum ompt_task_type_e { +typedef enum ompt_task_type_t { ompt_task_initial = 0x1, ompt_task_implicit = 0x2, ompt_task_explicit = 0x4, @@ -331,7 +325,7 @@ typedef enum ompt_task_type_e { ompt_task_merged = 0x80000000 } ompt_task_type_t; -typedef enum ompt_task_status_e { +typedef enum ompt_task_status_t { ompt_task_complete = 1, ompt_task_yield = 2, ompt_task_cancel = 3, @@ -345,12 +339,12 @@ typedef void (*ompt_callback_task_schedule_t) ( ); typedef void (*ompt_callback_task_create_t) ( - ompt_data_t *parent_task_data, /* data of parent task */ - const ompt_frame_t *parent_frame, /* frame data for parent task */ - ompt_data_t *new_task_data, /* data of created task */ - int type, /* type of created task */ - int has_dependences, /* created task has dependences */ - const void *codeptr_ra /* return address of runtime call */ + ompt_data_t *encountering_task_data, /* data of parent task */ + const ompt_frame_t *encountering_task_frame, /* frame data for parent task */ + ompt_data_t *new_task_data, /* data of created task */ + int type, /* type of created task */ + int has_dependences, /* created task has dependences */ + const void *codeptr_ra /* return address of runtime call */ ); /* task dependences */ @@ -366,7 +360,7 @@ typedef void (*ompt_callback_task_dependence_t) ( ); /* target and device */ -typedef enum ompt_target_type_e { +typedef enum ompt_target_type_t { ompt_target = 1, ompt_target_enter_data = 2, ompt_target_exit_data = 3, @@ -382,7 +376,7 @@ typedef void (*ompt_callback_target_t) ( const void *codeptr_ra ); -typedef enum ompt_target_data_op_e { +typedef enum ompt_target_data_op_t { ompt_target_data_alloc = 1, ompt_target_data_transfer_to_dev = 2, ompt_target_data_transfer_from_dev = 3, @@ -424,6 +418,24 @@ typedef void (*ompt_callback_device_finalize_t) ( uint64_t device_num ); +typedef void (*ompt_callback_device_load_t) ( + uint64_t device_num, + const char * filename, + int64_t offset_in_file, + void * vma_in_file, + size_t bytes, + void * host_addr, + void * device_addr, + uint64_t module_id +); + +#define ompt_addr_unknown ((void *) ~0) + +typedef void (*ompt_callback_device_unload_t) ( + uint64_t device_num, + uint64_t module_id +); + /* control_tool */ typedef int (*ompt_callback_control_tool_t) ( uint64_t command, /* command of control call */ @@ -432,13 +444,13 @@ typedef int (*ompt_callback_control_tool_t) ( const void *codeptr_ra /* return address of runtime call */ ); -typedef enum ompt_mutex_kind_e { - ompt_mutex = 0x10, - ompt_mutex_lock = 0x11, +typedef enum ompt_mutex_kind_t { + ompt_mutex = 0x10, + ompt_mutex_lock = 0x11, ompt_mutex_nest_lock = 0x12, - ompt_mutex_critical = 0x13, - ompt_mutex_atomic = 0x14, - ompt_mutex_ordered = 0x20 + ompt_mutex_critical = 0x13, + ompt_mutex_atomic = 0x14, + ompt_mutex_ordered = 0x20 } ompt_mutex_kind_t; typedef void (*ompt_callback_mutex_acquire_t) ( @@ -472,14 +484,14 @@ typedef void (*ompt_callback_idle_t) ( ompt_scope_endpoint_t endpoint /* endpoint of idle time */ ); -typedef enum ompt_work_type_e { - ompt_work_loop = 1, - ompt_work_sections = 2, +typedef enum ompt_work_type_t { + ompt_work_loop = 1, + ompt_work_sections = 2, ompt_work_single_executor = 3, - ompt_work_single_other = 4, - ompt_work_workshare = 5, - ompt_work_distribute = 6, - ompt_work_taskloop = 7 + ompt_work_single_other = 4, + ompt_work_workshare = 5, + ompt_work_distribute = 6, + ompt_work_taskloop = 7 } ompt_work_type_t; typedef void (*ompt_callback_work_t) ( @@ -491,9 +503,9 @@ typedef void (*ompt_callback_work_t) ( const void *codeptr_ra /* return address of runtime call */ ); -typedef enum ompt_sync_region_kind_e { - ompt_sync_region_barrier = 1, - ompt_sync_region_taskwait = 2, +typedef enum ompt_sync_region_kind_t { + ompt_sync_region_barrier = 1, + ompt_sync_region_taskwait = 2, ompt_sync_region_taskgroup = 3 } ompt_sync_region_kind_t; @@ -505,7 +517,7 @@ typedef void (*ompt_callback_sync_region_t) ( const void *codeptr_ra /* return address of runtime call */ ); -typedef enum ompt_cancel_flag_e { +typedef enum ompt_cancel_flag_t { ompt_cancel_parallel = 0x1, ompt_cancel_sections = 0x2, ompt_cancel_do = 0x4, @@ -591,61 +603,29 @@ OMPT_API_FUNCTION(int, ompt_get_proc_id, (void)); /**************************************************************************** - * PLACEHOLDERS FOR PERFORMANCE REPORTING - ***************************************************************************/ - -/* idle */ -OMPT_API_FUNCTION(void, ompt_idle, ( - void -)); - -/* overhead */ -OMPT_API_FUNCTION(void, ompt_overhead, ( - void -)); - -/* barrier wait */ -OMPT_API_FUNCTION(void, ompt_barrier_wait, ( - void -)); - -/* task wait */ -OMPT_API_FUNCTION(void, ompt_task_wait, ( - void -)); - -/* mutex wait */ -OMPT_API_FUNCTION(void, ompt_mutex_wait, ( - void -)); - - - -/**************************************************************************** * INITIALIZATION FUNCTIONS ***************************************************************************/ -typedef struct ompt_fns_t ompt_fns_t; - OMPT_API_FUNCTION(int, ompt_initialize, ( ompt_function_lookup_t ompt_fn_lookup, - ompt_fns_t *fns + ompt_data_t *tool_data )); OMPT_API_FUNCTION(void, ompt_finalize, ( - ompt_fns_t *fns + ompt_data_t *tool_data )); -struct ompt_fns_t { +typedef struct ompt_start_tool_result_t { ompt_initialize_t initialize; ompt_finalize_t finalize; -}; + ompt_data_t tool_data; +} ompt_start_tool_result_t; /* initialization interface to be defined by tool */ #ifdef _WIN32 __declspec(dllexport) #endif -ompt_fns_t * ompt_start_tool( +ompt_start_tool_result_t * ompt_start_tool( unsigned int omp_version, const char * runtime_version ); @@ -657,15 +637,6 @@ OMPT_API_FUNCTION(int, ompt_set_callback, ( ompt_callback_t callback )); -typedef enum ompt_set_callback_rc_e { /* non-standard */ - ompt_set_callback_error = 0, - ompt_has_event_no_callback = 1, - ompt_no_event_no_callback = 2, - ompt_has_event_may_callback = 3, - ompt_has_event_must_callback = 4, -} ompt_set_callback_rc_t; - - OMPT_API_FUNCTION(int, ompt_get_callback, ( ompt_callbacks_t which, ompt_callback_t *callback diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index d39d9f0..b99b46b 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -20,6 +20,7 @@ #include "kmp_stats.h" #if OMPT_SUPPORT +#include "ompt-internal.h" #include "ompt-specific.h" #endif diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp index cb2bc3f..3da6f58 100644 --- a/openmp/runtime/src/kmp_dispatch.cpp +++ b/openmp/runtime/src/kmp_dispatch.cpp @@ -36,6 +36,7 @@ #endif #if OMPT_SUPPORT +#include "ompt-internal.h" #include "ompt-specific.h" #endif diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp index 9068163..b0b2349 100644 --- a/openmp/runtime/src/kmp_gsupport.cpp +++ b/openmp/runtime/src/kmp_gsupport.cpp @@ -429,8 +429,8 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); parent_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); } - OMPT_STORE_RETURN_ADDRESS(gtid); #endif MKLOC(loc, "GOMP_parallel_start"); @@ -832,8 +832,8 @@ LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), if (ompt_enabled.enabled) { \ __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); \ parent_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); \ - } \ - OMPT_STORE_RETURN_ADDRESS(gtid); + OMPT_STORE_RETURN_ADDRESS(gtid); \ + } #define OMPT_LOOP_POST() \ if (ompt_enabled.enabled) { \ @@ -1055,8 +1055,8 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task)(void *), if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); parent_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); } - OMPT_STORE_RETURN_ADDRESS(gtid); #endif MKLOC(loc, "GOMP_parallel_sections_start"); @@ -1131,8 +1131,8 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), void *data, if (ompt_enabled.enabled) { parent_task_info = __ompt_get_task_info_object(0); parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); } - OMPT_STORE_RETURN_ADDRESS(gtid); #endif if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { if (num_threads != 0) { diff --git a/openmp/runtime/src/kmp_i18n.cpp b/openmp/runtime/src/kmp_i18n.cpp index 42e1c70..757f3b6 100644 --- a/openmp/runtime/src/kmp_i18n.cpp +++ b/openmp/runtime/src/kmp_i18n.cpp @@ -38,9 +38,6 @@ kmp_msg_t __kmp_msg_empty = {kmp_mt_dummy, 0, "", 0}; kmp_msg_t __kmp_msg_null = {kmp_mt_dummy, 0, NULL, 0}; static char const *no_message_available = "(No message available)"; -static void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, - va_list ap); - enum kmp_i18n_cat_status { KMP_I18N_CLOSED, // Not yet opened or closed. KMP_I18N_OPENED, // Opened successfully, ready to use. @@ -787,7 +784,9 @@ kmp_msg_t __kmp_msg_error_mesg(char const *mesg) { } // __kmp_msg_error_mesg // ----------------------------------------------------------------------------- -void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, va_list args) { +void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, ...) { + + va_list args; kmp_i18n_id_t format; // format identifier kmp_msg_t fmsg; // formatted message kmp_str_buf_t buffer; @@ -816,6 +815,7 @@ void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, va_list args) { __kmp_str_free(&fmsg.str); // Format other messages. + va_start(args, message); for (;;) { message = va_arg(args, kmp_msg_t); if (message.type == kmp_mt_dummy && message.str == NULL) { @@ -838,6 +838,7 @@ void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, va_list args) { __kmp_str_buf_cat(&buffer, fmsg.str, fmsg.len); __kmp_str_free(&fmsg.str); } + va_end(args); // Print formatted messages. // This lock prevents multiple fatal errors on the same problem. @@ -851,18 +852,8 @@ void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, va_list args) { } // __kmp_msg -void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, ...) { - va_list args; - va_start(args, message); - __kmp_msg(severity, message, args); - va_end(args); -} - void __kmp_fatal(kmp_msg_t message, ...) { - va_list args; - va_start(args, message); - __kmp_msg(kmp_ms_fatal, message, args); - va_end(args); + __kmp_msg(kmp_ms_fatal, message, __kmp_msg_null); #if KMP_OS_WINDOWS // Delay to give message a chance to appear before reaping __kmp_thread_sleep(500); diff --git a/openmp/runtime/src/kmp_itt.inl b/openmp/runtime/src/kmp_itt.inl index 65c3a81..01b1ea0 100644 --- a/openmp/runtime/src/kmp_itt.inl +++ b/openmp/runtime/src/kmp_itt.inl @@ -3,6 +3,7 @@ * kmp_itt.inl -- Inline functions of ITT Notify. */ + //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,6 +13,7 @@ // //===----------------------------------------------------------------------===// + // Inline function definitions. This file should be included into kmp_itt.h file // for production build (to let compliler inline functions) or into kmp_itt.c // file for debug build (to reduce the number of files to recompile and save diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp index 9c7dcaf..bb18a6d 100644 --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -3058,12 +3058,11 @@ kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, if (idx == __kmp_i_lock_table.size) { // Double up the space for block pointers int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; - kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate( + kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; + __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate( 2 * row * sizeof(kmp_indirect_lock_t *)); - KMP_MEMCPY(new_table, __kmp_i_lock_table.table, + KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row * sizeof(kmp_indirect_lock_t *)); - kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; - __kmp_i_lock_table.table = new_table; __kmp_free(old_table); // Allocate new objects in the new blocks for (int i = row; i < 2 * row; ++i) diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 438f7c1..574cfc4 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -1097,9 +1097,10 @@ inline static void propagateFPControl(kmp_team_t *team) { // There is no point looking at t_fp_control_saved here. // If it is TRUE, we still have to update the values if they are different - // from those we now have. If it is FALSE we didn't save anything yet, but - // our objective is the same. We have to ensure that the values in the team - // are the same as those we have. + // from those we now have. + // If it is FALSE we didn't save anything yet, but our objective is the + // same. We have to ensure that the values in the team are the same as + // those we have. // So, this code achieves what we need whether or not t_fp_control_saved is // true. By checking whether the value needs updating we avoid unnecessary // writes that would put the cache-line into a written state, causing all diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 154a9f9..748f36a 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -536,6 +536,7 @@ static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid, taskdata->td_flags.task_serial = 1; // Execute this task immediately, not deferred. + __kmp_task_start(gtid, task, current_task); #if OMPT_SUPPORT @@ -566,8 +567,8 @@ static void __kmpc_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task, void *frame_address, void *return_address) { - __kmpc_omp_task_begin_if0_template(loc_ref, gtid, task, frame_address, - return_address); + return __kmpc_omp_task_begin_if0_template( + loc_ref, gtid, task, frame_address, return_address); } #endif // OMPT_SUPPORT @@ -588,7 +589,8 @@ void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, return; } #endif - __kmpc_omp_task_begin_if0_template(loc_ref, gtid, task, NULL, NULL); + return __kmpc_omp_task_begin_if0_template(loc_ref, gtid, task, NULL, + NULL); } #ifdef TASK_UNUSED @@ -843,6 +845,7 @@ static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref, kmp_task_t *task) { KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); + // this routine will provide task to resume __kmp_task_finish(gtid, task, NULL); @@ -965,7 +968,7 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, } #if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) + if (__builtin_expect(ompt_enabled.enabled, 0)) __ompt_task_init(task, tid); #endif @@ -1211,7 +1214,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, ANNOTATE_HAPPENS_BEFORE(task); #if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) + if (__builtin_expect(ompt_enabled.enabled, 0)) __ompt_task_init(taskdata, gtid); #endif @@ -1306,7 +1309,7 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task, #if OMPT_SUPPORT ompt_thread_info_t oldInfo; kmp_info_t *thread; - if (UNLIKELY(ompt_enabled.enabled)) { + if (__builtin_expect(ompt_enabled.enabled, 0)) { // Store the threads states and restore them after the task thread = __kmp_threads[gtid]; oldInfo = thread->th.ompt_thread_info; @@ -1330,7 +1333,7 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task, (this_team->t.t_cancel_request == cancel_parallel)) { #if OMPT_SUPPORT && OMPT_OPTIONAL ompt_data_t *task_data; - if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) { + if (__builtin_expect(ompt_enabled.ompt_callback_cancel, 0)) { __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); ompt_callbacks.ompt_callback(ompt_callback_cancel)( task_data, @@ -1376,7 +1379,7 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task, // OMPT task begin #if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) + if (__builtin_expect(ompt_enabled.enabled, 0)) __ompt_task_start(task, current_task, gtid); #endif @@ -1391,7 +1394,7 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task, KMP_POP_PARTITIONED_TIMER(); #if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) + if (__builtin_expect(ompt_enabled.enabled, 0)) __ompt_task_finish(task, current_task); #endif #if OMP_40_ENABLED @@ -1399,7 +1402,7 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task, #endif // OMP_40_ENABLED #if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) { + if (__builtin_expect(ompt_enabled.enabled, 0)) { thread->th.ompt_thread_info = oldInfo; taskdata->ompt_task_info.frame.exit_frame = NULL; } @@ -1450,7 +1453,7 @@ kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, #if OMPT_SUPPORT kmp_taskdata_t *parent; - if (UNLIKELY(ompt_enabled.enabled)) { + if (__builtin_expect(ompt_enabled.enabled, 0)) { parent = new_taskdata->td_parent; if (ompt_enabled.ompt_callback_task_create) { ompt_data_t task_data = ompt_data_none; @@ -1481,7 +1484,7 @@ kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, ANNOTATE_HAPPENS_BEFORE(new_task); #if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) { + if (__builtin_expect(ompt_enabled.enabled, 0)) { parent->ompt_task_info.frame.enter_frame = NULL; } #endif @@ -1547,7 +1550,8 @@ kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, #if OMPT_SUPPORT kmp_taskdata_t *parent = NULL; - if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) { + if (__builtin_expect(ompt_enabled.enabled && !new_taskdata->td_flags.started, + 0)) { OMPT_STORE_RETURN_ADDRESS(gtid); parent = new_taskdata->td_parent; if (!parent->ompt_task_info.frame.enter_frame) @@ -1570,7 +1574,7 @@ kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata)); #if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) { + if (__builtin_expect(ompt_enabled.enabled && parent != NULL, 0)) { parent->ompt_task_info.frame.enter_frame = NULL; } #endif @@ -1699,8 +1703,9 @@ kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (UNLIKELY(ompt_enabled.enabled)) { OMPT_STORE_RETURN_ADDRESS(gtid); - return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(1), - OMPT_LOAD_RETURN_ADDRESS(gtid)); + return __kmpc_omp_taskwait_template(loc_ref, gtid, + OMPT_GET_FRAME_ADDRESS(1), + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif return __kmpc_omp_taskwait_template(loc_ref, gtid, NULL, NULL); @@ -1741,7 +1746,7 @@ kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) { if (task_team != NULL) { if (KMP_TASKING_ENABLED(task_team)) { #if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) + if (__builtin_expect(ompt_enabled.enabled, 0)) thread->th.ompt_thread_info.ompt_task_yielded = 1; #endif __kmp_execute_tasks_32( @@ -1749,7 +1754,7 @@ kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) { &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint); #if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) + if (__builtin_expect(ompt_enabled.enabled, 0)) thread->th.ompt_thread_info.ompt_task_yielded = 0; #endif } @@ -1984,7 +1989,7 @@ void __kmpc_taskgroup(ident_t *loc, int gtid) { taskdata->td_taskgroup = tg_new; #if OMPT_SUPPORT && OMPT_OPTIONAL - if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) { + if (__builtin_expect(ompt_enabled.ompt_callback_sync_region, 0)) { void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); if (!codeptr) codeptr = OMPT_GET_RETURN_ADDRESS(0); @@ -2013,7 +2018,7 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { ompt_data_t my_task_data; ompt_data_t my_parallel_data; void *codeptr; - if (UNLIKELY(ompt_enabled.enabled)) { + if (__builtin_expect(ompt_enabled.enabled, 0)) { team = thread->th.th_team; my_task_data = taskdata->ompt_task_info.task_data; // FIXME: I think this is wrong for lwt! @@ -2038,7 +2043,7 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { #endif /* USE_ITT_BUILD */ #if OMPT_SUPPORT && OMPT_OPTIONAL - if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) { + if (__builtin_expect(ompt_enabled.ompt_callback_sync_region_wait, 0)) { ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data), &(my_task_data), codeptr); @@ -2062,7 +2067,7 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { } #if OMPT_SUPPORT && OMPT_OPTIONAL - if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) { + if (__builtin_expect(ompt_enabled.ompt_callback_sync_region_wait, 0)) { ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data), &(my_task_data), codeptr); @@ -2090,7 +2095,7 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { ANNOTATE_HAPPENS_AFTER(taskdata); #if OMPT_SUPPORT && OMPT_OPTIONAL - if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) { + if (__builtin_expect(ompt_enabled.ompt_callback_sync_region, 0)) { ompt_callbacks.ompt_callback(ompt_callback_sync_region)( ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data), &(my_task_data), codeptr); @@ -3484,7 +3489,7 @@ kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) { ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n", thread, taskdata, taskdata->td_parent)); #if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) + if (__builtin_expect(ompt_enabled.enabled, 0)) __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid); #endif return task; diff --git a/openmp/runtime/src/kmp_threadprivate.cpp b/openmp/runtime/src/kmp_threadprivate.cpp index 362c075..bbcfadd 100644 --- a/openmp/runtime/src/kmp_threadprivate.cpp +++ b/openmp/runtime/src/kmp_threadprivate.cpp @@ -226,13 +226,6 @@ void __kmp_common_destroy_gtid(int gtid) { struct private_common *tn; struct shared_common *d_tn; - if (!TCR_4(__kmp_init_gtid)) { - // This is possible when one of multiple roots initiates early library - // termination in a sequential region while other teams are active, and its - // child threads are about to end. - return; - } - KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid)); if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) { diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h index 5f9a980..86ee699 100644 --- a/openmp/runtime/src/ompt-event-specific.h +++ b/openmp/runtime/src/ompt-event-specific.h @@ -36,70 +36,66 @@ | Mandatory Events +--------------------------------------------------------------------------*/ +#define ompt_callback_thread_begin_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_thread_end_implemented ompt_event_MAY_ALWAYS + #define ompt_callback_parallel_begin_implemented ompt_event_MAY_ALWAYS #define ompt_callback_parallel_end_implemented ompt_event_MAY_ALWAYS #define ompt_callback_task_create_implemented ompt_event_MAY_ALWAYS #define ompt_callback_task_schedule_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_thread_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_thread_end_implemented ompt_event_MAY_ALWAYS - -#define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS - #define ompt_callback_implicit_task_implemented ompt_event_MAY_ALWAYS -/*---------------------------------------------------------------------------- - | Target Related Events (not yet implemented) - +--------------------------------------------------------------------------*/ - #define ompt_callback_target_implemented ompt_event_UNIMPLEMENTED #define ompt_callback_target_data_op_implemented ompt_event_UNIMPLEMENTED #define ompt_callback_target_submit_implemented ompt_event_UNIMPLEMENTED + +#define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS + #define ompt_callback_device_initialize_implemented ompt_event_UNIMPLEMENTED #define ompt_callback_device_finalize_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_map_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_device_load_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_device_unload_implemented ompt_event_UNIMPLEMENTED /*---------------------------------------------------------------------------- - | Optional Events (blame shifting) + | Optional Events +--------------------------------------------------------------------------*/ -#define ompt_callback_idle_implemented ompt_event_MAY_ALWAYS_OPTIONAL - #define ompt_callback_sync_region_wait_implemented \ ompt_event_MAY_ALWAYS_OPTIONAL #define ompt_callback_mutex_released_implemented ompt_event_MAY_ALWAYS_OPTIONAL -/*---------------------------------------------------------------------------- - | Optional Events (synchronous events) - +--------------------------------------------------------------------------*/ +#if OMP_40_ENABLED +#define ompt_callback_task_dependences_implemented \ + ompt_event_MAY_ALWAYS_OPTIONAL +#define ompt_callback_task_dependence_implemented ompt_event_MAY_ALWAYS_OPTIONAL +#else +#define ompt_callback_task_dependences_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_task_dependence_implemented ompt_event_UNIMPLEMENTED +#endif /* OMP_40_ENABLED */ #define ompt_callback_work_implemented ompt_event_MAY_ALWAYS_OPTIONAL #define ompt_callback_master_implemented ompt_event_MAY_ALWAYS_OPTIONAL +#define ompt_callback_target_map_implemented ompt_event_UNIMPLEMENTED + #define ompt_callback_sync_region_implemented ompt_event_MAY_ALWAYS_OPTIONAL +#define ompt_callback_lock_init_implemented ompt_event_MAY_ALWAYS_OPTIONAL +#define ompt_callback_lock_destroy_implemented ompt_event_MAY_ALWAYS_OPTIONAL + #define ompt_callback_mutex_acquire_implemented ompt_event_MAY_ALWAYS_OPTIONAL #define ompt_callback_mutex_acquired_implemented ompt_event_MAY_ALWAYS_OPTIONAL #define ompt_callback_nest_lock_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_callback_lock_init_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_callback_lock_destroy_implemented ompt_event_MAY_ALWAYS_OPTIONAL - #define ompt_callback_flush_implemented ompt_event_MAY_ALWAYS_OPTIONAL #define ompt_callback_cancel_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#if OMP_40_ENABLED -#define ompt_callback_task_dependences_implemented \ - ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_callback_task_dependence_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#else -#define ompt_callback_task_dependences_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_task_dependence_implemented ompt_event_UNIMPLEMENTED -#endif /* OMP_40_ENABLED */ +#define ompt_callback_idle_implemented ompt_event_MAY_ALWAYS_OPTIONAL #endif diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp index a6f0647..753fced 100644 --- a/openmp/runtime/src/ompt-general.cpp +++ b/openmp/runtime/src/ompt-general.cpp @@ -74,7 +74,7 @@ ompt_mutex_impl_info_t ompt_mutex_impl_info[] = { ompt_callbacks_internal_t ompt_callbacks; -static ompt_fns_t *ompt_fns = NULL; +static ompt_start_tool_result_t *ompt_start_tool_result = NULL; /***************************************************************************** * forward declarations @@ -97,7 +97,8 @@ OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void); * found, ompt_tool's return value is used to initialize the tool. Otherwise, * NULL is returned and OMPT won't be enabled */ -typedef ompt_fns_t *(*ompt_start_tool_t)(unsigned int, const char *); +typedef ompt_start_tool_result_t *(*ompt_start_tool_t)(unsigned int, + const char *); #if KMP_OS_UNIX @@ -109,13 +110,13 @@ _OMP_EXTERN #else #error Activation of OMPT is not supported on this platform. #endif -ompt_fns_t * +ompt_start_tool_result_t * ompt_start_tool(unsigned int omp_version, const char *runtime_version) { #ifdef KMP_DYNAMIC_LIB - ompt_fns_t *ret = NULL; + ompt_start_tool_result_t *ret = NULL; // Try next symbol in the address space ompt_start_tool_t next_tool = NULL; - next_tool = (ompt_start_tool_t)dlsym(RTLD_NEXT, "ompt_start_tool"); + *(void **)(&next_tool) = dlsym(RTLD_NEXT, "ompt_start_tool"); if (next_tool) ret = (next_tool)(omp_version, runtime_version); return ret; @@ -136,8 +137,8 @@ ompt_start_tool(unsigned int omp_version, const char *runtime_version) { // The number of loaded modules to start enumeration with EnumProcessModules() #define NUM_MODULES 128 -static ompt_fns_t *ompt_tool_windows(unsigned int omp_version, - const char *runtime_version) { +static ompt_start_tool_result_t * +ompt_tool_windows(unsigned int omp_version, const char *runtime_version) { int i; DWORD needed, new_size; HMODULE *modules; @@ -195,9 +196,9 @@ static ompt_fns_t *ompt_tool_windows(unsigned int omp_version, #error Either __attribute__((weak)) or psapi.dll are required for OMPT support #endif // OMPT_HAVE_WEAK_ATTRIBUTE -static ompt_fns_t *ompt_try_start_tool(unsigned int omp_version, - const char *runtime_version) { - ompt_fns_t *ret = NULL; +static ompt_start_tool_result_t * +ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) { + ompt_start_tool_result_t *ret = NULL; ompt_start_tool_t start_tool = NULL; #if KMP_OS_WINDOWS // Cannot use colon to describe a list of absolute paths on Windows @@ -220,7 +221,7 @@ static ompt_fns_t *ompt_try_start_tool(unsigned int omp_version, #if KMP_OS_UNIX void *h = dlopen(fname, RTLD_LAZY); if (h) { - start_tool = (ompt_start_tool_t)dlsym(h, "ompt_start_tool"); + *(void **)(&start_tool) = dlsym(h, "ompt_start_tool"); #elif KMP_OS_WINDOWS HMODULE h = LoadLibrary(fname); if (h) { @@ -275,7 +276,7 @@ void ompt_pre_init() { //-------------------------------------------------- // Load tool iff specified in environment variable //-------------------------------------------------- - ompt_fns = + ompt_start_tool_result = ompt_try_start_tool(__kmp_openmp_version, ompt_get_runtime_version()); memset(&ompt_enabled, 0, sizeof(ompt_enabled)); @@ -307,8 +308,9 @@ void ompt_post_init() { //-------------------------------------------------- // Initialize the tool if so indicated. //-------------------------------------------------- - if (ompt_fns) { - ompt_enabled.enabled = !!ompt_fns->initialize(ompt_fn_lookup, ompt_fns); + if (ompt_start_tool_result) { + ompt_enabled.enabled = !!ompt_start_tool_result->initialize( + ompt_fn_lookup, &(ompt_start_tool_result->tool_data)); ompt_thread_t *root_thread = ompt_get_thread(); @@ -331,7 +333,7 @@ void ompt_post_init() { void ompt_fini() { if (ompt_enabled.enabled) { - ompt_fns->finalize(ompt_fns); + ompt_start_tool_result->finalize(&(ompt_start_tool_result->tool_data)); } memset(&ompt_enabled, 0, sizeof(ompt_enabled)); @@ -577,56 +579,6 @@ OMPT_API_ROUTINE int ompt_get_proc_id(void) { } /***************************************************************************** - * placeholders - ****************************************************************************/ - -// Don't define this as static. The loader may choose to eliminate the symbol -// even though it is needed by tools. -#define OMPT_API_PLACEHOLDER - -// Ensure that placeholders don't have mangled names in the symbol table. -#ifdef __cplusplus -extern "C" { -#endif - -OMPT_API_PLACEHOLDER void ompt_idle(void) { - // This function is a placeholder used to represent the calling context of - // idle OpenMP worker threads. It is not meant to be invoked. - assert(0); -} - -OMPT_API_PLACEHOLDER void ompt_overhead(void) { - // This function is a placeholder used to represent the OpenMP context of - // threads working in the OpenMP runtime. It is not meant to be invoked. - assert(0); -} - -OMPT_API_PLACEHOLDER void ompt_barrier_wait(void) { - // This function is a placeholder used to represent the OpenMP context of - // threads waiting for a barrier in the OpenMP runtime. It is not meant - // to be invoked. - assert(0); -} - -OMPT_API_PLACEHOLDER void ompt_task_wait(void) { - // This function is a placeholder used to represent the OpenMP context of - // threads waiting for a task in the OpenMP runtime. It is not meant - // to be invoked. - assert(0); -} - -OMPT_API_PLACEHOLDER void ompt_mutex_wait(void) { - // This function is a placeholder used to represent the OpenMP context of - // threads waiting for a mutex in the OpenMP runtime. It is not meant - // to be invoked. - assert(0); -} - -#ifdef __cplusplus -}; -#endif - -/***************************************************************************** * compatability ****************************************************************************/ @@ -689,7 +641,5 @@ static ompt_interface_fn_t ompt_fn_lookup(const char *s) { FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn) - FOREACH_OMPT_PLACEHOLDER_FN(ompt_interface_fn) - return (ompt_interface_fn_t)0; } diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h index 6139e00..e529572 100644 --- a/openmp/runtime/src/ompt-internal.h +++ b/openmp/runtime/src/ompt-internal.h @@ -31,6 +31,8 @@ typedef struct ompt_callbacks_active_s { #undef ompt_event_macro } ompt_callbacks_active_t; +typedef struct kmp_taskdata kmp_taskdata_t; + #define TASK_TYPE_DETAILS_FORMAT(info) \ ((info->td_flags.task_serial || info->td_flags.tasking_ser) \ ? ompt_task_undeferred \ @@ -42,7 +44,7 @@ typedef struct ompt_callbacks_active_s { typedef struct { ompt_frame_t frame; ompt_data_t task_data; - struct kmp_taskdata *scheduling_parent; + kmp_taskdata_t *scheduling_parent; #if OMP_40_ENABLED int ndeps; ompt_task_dependence_t *deps; diff --git a/openmp/runtime/src/ompt-specific.cpp b/openmp/runtime/src/ompt-specific.cpp index 9631c2b..8097ce4 100644 --- a/openmp/runtime/src/ompt-specific.cpp +++ b/openmp/runtime/src/ompt-specific.cpp @@ -3,6 +3,7 @@ //****************************************************************************** #include "kmp.h" +#include "ompt-internal.h" #include "ompt-specific.h" #if KMP_OS_UNIX diff --git a/openmp/runtime/test/lock/omp_init_lock.c b/openmp/runtime/test/lock/omp_init_lock.c deleted file mode 100644 index 24b60d1..0000000 --- a/openmp/runtime/test/lock/omp_init_lock.c +++ /dev/null @@ -1,42 +0,0 @@ -// RUN: %libomp-compile-and-run -#include "omp_testsuite.h" -#include - -// This should be slightly less than KMP_I_LOCK_CHUNK, which is 1024 -#define LOCKS_PER_ITER 1000 -#define ITERATIONS (REPETITIONS + 1) - -// This tests concurrently using locks on one thread while initializing new -// ones on another thread. This exercises the global lock pool. -int test_omp_init_lock() { - int i; - omp_lock_t lcks[ITERATIONS * LOCKS_PER_ITER]; -#pragma omp parallel for schedule(static) num_threads(NUM_TASKS) - for (i = 0; i < ITERATIONS; i++) { - int j; - omp_lock_t *my_lcks = &lcks[i * LOCKS_PER_ITER]; - for (j = 0; j < LOCKS_PER_ITER; j++) { - omp_init_lock(&my_lcks[j]); - } - for (j = 0; j < LOCKS_PER_ITER * 100; j++) { - omp_set_lock(&my_lcks[j % LOCKS_PER_ITER]); - omp_unset_lock(&my_lcks[j % LOCKS_PER_ITER]); - } - } - // Wait until all repititions are done. The test is exercising growth of - // the global lock pool, which does not shrink when no locks are allocated. - { - int j; - for (j = 0; j < ITERATIONS * LOCKS_PER_ITER; j++) { - omp_destroy_lock(&lcks[j]); - } - } - - return 0; -} - -int main() { - // No use repeating this test, since it's exercising a private global pool - // which is not reset between test iterations. - return test_omp_init_lock(); -} diff --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h index 69efd83..56d763a 100755 --- a/openmp/runtime/test/ompt/callback.h +++ b/openmp/runtime/test/ompt/callback.h @@ -477,8 +477,8 @@ on_ompt_callback_master( static void on_ompt_callback_parallel_begin( - ompt_data_t *parent_task_data, - const ompt_frame_t *parent_task_frame, + ompt_data_t *encountering_task_data, + const ompt_frame_t *encountering_task_frame, ompt_data_t* parallel_data, uint32_t requested_team_size, ompt_invoker_t invoker, @@ -487,27 +487,27 @@ on_ompt_callback_parallel_begin( if(parallel_data->ptr) printf("%s\n", "0: parallel_data initially not null"); parallel_data->value = ompt_get_unique_id(); - printf("%" PRIu64 ": ompt_event_parallel_begin: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", codeptr_ra=%p, invoker=%d\n", ompt_get_thread_data()->value, parent_task_data->value, parent_task_frame->exit_frame, parent_task_frame->enter_frame, parallel_data->value, requested_team_size, codeptr_ra, invoker); + printf("%" PRIu64 ": ompt_event_parallel_begin: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", codeptr_ra=%p, invoker=%d\n", ompt_get_thread_data()->value, encountering_task_data->value, encountering_task_frame->exit_frame, encountering_task_frame->enter_frame, parallel_data->value, requested_team_size, codeptr_ra, invoker); } static void on_ompt_callback_parallel_end( ompt_data_t *parallel_data, - ompt_data_t *task_data, + ompt_data_t *encountering_task_data, ompt_invoker_t invoker, const void *codeptr_ra) { - printf("%" PRIu64 ": ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, invoker, codeptr_ra); + printf("%" PRIu64 ": ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, encountering_task_data->value, invoker, codeptr_ra); } static void on_ompt_callback_task_create( - ompt_data_t *parent_task_data, /* id of parent task */ - const ompt_frame_t *parent_frame, /* frame data for parent task */ - ompt_data_t* new_task_data, /* id of created task */ + ompt_data_t *encountering_task_data, + const ompt_frame_t *encountering_task_frame, + ompt_data_t* new_task_data, int type, int has_dependences, - const void *codeptr_ra) /* pointer to outlined function */ + const void *codeptr_ra) { if(new_task_data->ptr) printf("%s\n", "0: new_task_data initially not null"); @@ -516,7 +516,7 @@ on_ompt_callback_task_create( format_task_type(type, buffer); - //there is no paralllel_begin callback for implicit parallel region + //there is no parallel_begin callback for implicit parallel region //thus it is initialized in initial task if(type & ompt_task_initial) { @@ -527,7 +527,7 @@ on_ompt_callback_task_create( parallel_data->value = ompt_get_unique_id(); } - printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, parent_task_data ? parent_task_data->value : 0, parent_frame ? parent_frame->exit_frame : NULL, parent_frame ? parent_frame->enter_frame : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no"); + printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, encountering_task_data ? encountering_task_data->value : 0, encountering_task_frame ? encountering_task_frame->exit_frame : NULL, encountering_task_frame ? encountering_task_frame->enter_frame : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no"); } static void @@ -603,7 +603,7 @@ do{ \ int ompt_initialize( ompt_function_lookup_t lookup, - ompt_fns_t* fns) + ompt_data_t *tool_data) { ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info"); @@ -646,15 +646,15 @@ int ompt_initialize( return 1; //success } -void ompt_finalize(ompt_fns_t* fns) +void ompt_finalize(ompt_data_t *tool_data) { printf("0: ompt_event_runtime_shutdown\n"); } -ompt_fns_t* ompt_start_tool( +ompt_start_tool_result_t* ompt_start_tool( unsigned int omp_version, const char *runtime_version) { - static ompt_fns_t ompt_fns = {&ompt_initialize,&ompt_finalize}; - return &ompt_fns; + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0}; + return &ompt_start_tool_result; } diff --git a/openmp/runtime/test/ompt/cancel/cancel_parallel.c b/openmp/runtime/test/ompt/cancel/cancel_parallel.c index a374801..7339989 100644 --- a/openmp/runtime/test/ompt/cancel/cancel_parallel.c +++ b/openmp/runtime/test/ompt/cancel/cancel_parallel.c @@ -18,7 +18,7 @@ int main() } else { - delay(100); + usleep(100); printf("%" PRIu64 ": fuzzy_address=0x%lx or 0x%lx\n", ompt_get_thread_data()->value, ((uint64_t)(char*)(&& ompt_label_2))/256-1, ((uint64_t)(char*)(&& ompt_label_2))/256); #pragma omp cancellation point parallel print_fuzzy_address(2); //does not actually print the address but provides a label diff --git a/openmp/runtime/test/ompt/cancel/cancel_worksharing.c b/openmp/runtime/test/ompt/cancel/cancel_worksharing.c index 4107336..9e0889a 100644 --- a/openmp/runtime/test/ompt/cancel/cancel_worksharing.c +++ b/openmp/runtime/test/ompt/cancel/cancel_worksharing.c @@ -27,7 +27,7 @@ int main() { x++; OMPT_WAIT(condition,1); - delay(10000); + usleep(10000); #pragma omp cancellation point for } } @@ -44,7 +44,7 @@ int main() #pragma omp section { OMPT_WAIT(condition,2); - delay(10000); + usleep(10000); #pragma omp cancellation point sections } } diff --git a/openmp/runtime/test/ompt/loadtool/tool_available.c b/openmp/runtime/test/ompt/loadtool/tool_available.c index 0205193..22fdade 100644 --- a/openmp/runtime/test/ompt/loadtool/tool_available.c +++ b/openmp/runtime/test/ompt/loadtool/tool_available.c @@ -38,22 +38,22 @@ int main() int ompt_initialize( ompt_function_lookup_t lookup, - ompt_fns_t* fns) + ompt_data_t* tool_data) { printf("0: NULL_POINTER=%p\n", (void*)NULL); return 1; //success } -void ompt_finalize(ompt_fns_t* fns) +void ompt_finalize(ompt_data_t* tool_data) { printf("%d: ompt_event_runtime_shutdown\n", omp_get_thread_num()); } -ompt_fns_t* ompt_start_tool( +ompt_start_tool_result_t* ompt_start_tool( unsigned int omp_version, const char *runtime_version) { - static ompt_fns_t ompt_fns = {&ompt_initialize,&ompt_finalize}; - return &ompt_fns; + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0}; + return &ompt_start_tool_result; } #endif /* TOOL */ diff --git a/openmp/runtime/test/ompt/ompt-signal.h b/openmp/runtime/test/ompt/ompt-signal.h index b5c28cf..76e49ba 100644 --- a/openmp/runtime/test/ompt/ompt-signal.h +++ b/openmp/runtime/test/ompt/ompt-signal.h @@ -1,13 +1,6 @@ -#if defined(WIN32) || defined(_WIN32) -#include -#define delay() Sleep(1); -#else -#include -#define delay(t) usleep(t); -#endif - // These functions are used to provide a signal-wait mechanism to enforce expected scheduling for the test cases. // Conditional variable (s) needs to be shared! Initialize to 0 +#include #define OMPT_SIGNAL(s) ompt_signal(&s) //inline @@ -24,7 +17,7 @@ void ompt_wait(int *s, int v) { int wait=0; do{ - delay(10); + usleep(10); #pragma omp atomic read wait = (*s); }while(wait