Update implementation of OMPT to the specification OpenMP 5.0 Preview 1 (TR4).
authorJoachim Protze <protze@itc.rwth-aachen.de>
Wed, 1 Nov 2017 10:08:30 +0000 (10:08 +0000)
committerJoachim Protze <protze@itc.rwth-aachen.de>
Wed, 1 Nov 2017 10:08:30 +0000 (10:08 +0000)
The code is tested to work with latest clang, GNU and Intel compiler. The implementation
is optimized for low overhead when no tool is attached shifting the cost to execution with
tool attached.

This patch does not implement OMPT for libomptarget.

Patch by Simon Convent and Joachim Protze

Differential Revision: https://reviews.llvm.org/D38185

llvm-svn: 317085

91 files changed:
openmp/runtime/CMakeLists.txt
openmp/runtime/src/exports_so.txt
openmp/runtime/src/include/50/omp.h.var
openmp/runtime/src/include/50/omp_lib.f.var
openmp/runtime/src/include/50/omp_lib.f90.var
openmp/runtime/src/include/50/omp_lib.h.var
openmp/runtime/src/include/50/ompt.h.var
openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_atomic.h
openmp/runtime/src/kmp_barrier.cpp
openmp/runtime/src/kmp_cancel.cpp
openmp/runtime/src/kmp_config.h.cmake
openmp/runtime/src/kmp_csupport.cpp
openmp/runtime/src/kmp_dispatch.cpp
openmp/runtime/src/kmp_ftn_entry.h
openmp/runtime/src/kmp_ftn_os.h
openmp/runtime/src/kmp_global.cpp
openmp/runtime/src/kmp_gsupport.cpp
openmp/runtime/src/kmp_lock.cpp
openmp/runtime/src/kmp_lock.h
openmp/runtime/src/kmp_runtime.cpp
openmp/runtime/src/kmp_sched.cpp
openmp/runtime/src/kmp_settings.cpp
openmp/runtime/src/kmp_taskdeps.cpp
openmp/runtime/src/kmp_tasking.cpp
openmp/runtime/src/kmp_wait_release.h
openmp/runtime/src/ompt-event-specific.h
openmp/runtime/src/ompt-general.cpp
openmp/runtime/src/ompt-internal.h
openmp/runtime/src/ompt-specific.cpp
openmp/runtime/src/ompt-specific.h
openmp/runtime/src/z_Linux_util.cpp
openmp/runtime/test/CMakeLists.txt
openmp/runtime/test/lit.cfg
openmp/runtime/test/lit.site.cfg.in
openmp/runtime/test/ompt/callback.h [changed mode: 0644->0755]
openmp/runtime/test/ompt/cancel/cancel_parallel.c [new file with mode: 0644]
openmp/runtime/test/ompt/cancel/cancel_taskgroup.c [new file with mode: 0644]
openmp/runtime/test/ompt/cancel/cancel_worksharing.c [new file with mode: 0644]
openmp/runtime/test/ompt/loadtool/tool_available.c [new file with mode: 0644]
openmp/runtime/test/ompt/misc/api_calls.c [new file with mode: 0644]
openmp/runtime/test/ompt/misc/control_tool.c [new file with mode: 0644]
openmp/runtime/test/ompt/misc/control_tool_no_ompt_support.c [new file with mode: 0644]
openmp/runtime/test/ompt/misc/idle.c [new file with mode: 0644]
openmp/runtime/test/ompt/ompt-signal.h [new file with mode: 0644]
openmp/runtime/test/ompt/parallel/dynamic_enough_threads.c [new file with mode: 0644]
openmp/runtime/test/ompt/parallel/dynamic_not_enough_threads.c [new file with mode: 0644]
openmp/runtime/test/ompt/parallel/max_active_levels_serialized.c [new file with mode: 0644]
openmp/runtime/test/ompt/parallel/nested.c
openmp/runtime/test/ompt/parallel/nested_lwt.c
openmp/runtime/test/ompt/parallel/nested_serialized.c
openmp/runtime/test/ompt/parallel/no_thread_num_clause.c [new file with mode: 0644]
openmp/runtime/test/ompt/parallel/normal.c
openmp/runtime/test/ompt/parallel/not_enough_threads.c [new file with mode: 0644]
openmp/runtime/test/ompt/parallel/parallel_if0.c [new file with mode: 0644]
openmp/runtime/test/ompt/parallel/serialized.c
openmp/runtime/test/ompt/synchronization/barrier/explicit.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/barrier/for_loop.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/barrier/for_simd.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/barrier/parallel_region.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/barrier/sections.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/barrier/single.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/critical.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/flush.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/lock.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/master.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/nest_lock.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/ordered.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/taskgroup.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/taskwait.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/test_lock.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/test_nest_lock.c [new file with mode: 0644]
openmp/runtime/test/ompt/synchronization/test_nest_lock_parallel.c [new file with mode: 0644]
openmp/runtime/test/ompt/tasks/dependences.c [new file with mode: 0644]
openmp/runtime/test/ompt/tasks/explicit_task.c [new file with mode: 0644]
openmp/runtime/test/ompt/tasks/serialized.c [new file with mode: 0644]
openmp/runtime/test/ompt/tasks/task_in_joinbarrier.c [new file with mode: 0644]
openmp/runtime/test/ompt/tasks/task_types.c [new file with mode: 0644]
openmp/runtime/test/ompt/tasks/task_types_serialized.c [new file with mode: 0644]
openmp/runtime/test/ompt/tasks/taskyield.c [new file with mode: 0644]
openmp/runtime/test/ompt/tasks/untied_task.c [new file with mode: 0644]
openmp/runtime/test/ompt/worksharing/for/auto_split.c [new file with mode: 0644]
openmp/runtime/test/ompt/worksharing/for/base.h
openmp/runtime/test/ompt/worksharing/for/base_serialized.h
openmp/runtime/test/ompt/worksharing/for/base_split.h [new file with mode: 0644]
openmp/runtime/test/ompt/worksharing/for/dynamic_split.c [new file with mode: 0644]
openmp/runtime/test/ompt/worksharing/for/guided_split.c [new file with mode: 0644]
openmp/runtime/test/ompt/worksharing/for/runtime_split.c [new file with mode: 0644]
openmp/runtime/test/ompt/worksharing/for/static_split.c [new file with mode: 0644]
openmp/runtime/test/ompt/worksharing/sections.c [new file with mode: 0644]
openmp/runtime/test/ompt/worksharing/single.c [new file with mode: 0644]

index 0b0648a..1adc825 100644 (file)
@@ -321,12 +321,11 @@ endif()
 # OMPT-support
 set(LIBOMP_OMPT_DEBUG FALSE CACHE BOOL
   "Trace OMPT initialization?")
+#after testing: turn on ompt support by default for OpenMP 5.0 and higher
 set(LIBOMP_OMPT_SUPPORT FALSE CACHE BOOL
   "OMPT-support?")
-set(LIBOMP_OMPT_BLAME TRUE CACHE BOOL
-  "OMPT-blame?")
-set(LIBOMP_OMPT_TRACE TRUE CACHE BOOL
-  "OMPT-trace?")
+set(LIBOMP_OMPT_OPTIONAL TRUE CACHE BOOL
+  "OMPT-optional?")
 if(LIBOMP_OMPT_SUPPORT AND (NOT LIBOMP_HAVE_OMPT_SUPPORT))
   libomp_error_say("OpenMP Tools Interface requested but not available in this implementation")
 endif()
@@ -396,8 +395,7 @@ if(${LIBOMP_STANDALONE_BUILD})
   libomp_say("Use ITT notify       -- ${LIBOMP_USE_ITT_NOTIFY}")
   libomp_say("Use OMPT-support     -- ${LIBOMP_OMPT_SUPPORT}")
   if(${LIBOMP_OMPT_SUPPORT})
-    libomp_say("Use OMPT-blame       -- ${LIBOMP_OMPT_BLAME}")
-    libomp_say("Use OMPT-trace       -- ${LIBOMP_OMPT_TRACE}")
+    libomp_say("Use OMPT-optional  -- ${LIBOMP_OMPT_OPTIONAL}")
   endif()
   libomp_say("Use Adaptive locks   -- ${LIBOMP_USE_ADAPTIVE_LOCKS}")
   libomp_say("Use quad precision   -- ${LIBOMP_USE_QUAD_PRECISION}")
index d9a069f..ff16fae 100644 (file)
@@ -25,8 +25,7 @@ VERSION {
         #
         # OMPT API
         #
-        ompt_tool;           # OMPT initialization interface
-        ompt_control;        # OMPT control interface
+        ompt_start_tool;     # OMPT start interface
 
         # icc drops weak attribute at linking step without the following line:
         Annotate*;           # TSAN annotation
index ec6da6f..bd37f32 100644 (file)
     extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_on(void);
     extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_off(void);
 
+    /* OpenMP 5.0 Tool Control */
+    typedef enum omp_control_tool_result_t {
+        omp_control_tool_notool = -2,
+        omp_control_tool_nocallback = -1,
+        omp_control_tool_success = 0,
+        omp_control_tool_ignored = 1
+    } omp_control_tool_result_t;
+
+    typedef enum omp_control_tool_t {
+        omp_control_tool_start = 1,
+        omp_control_tool_pause = 2,
+        omp_control_tool_flush = 3,
+        omp_control_tool_end = 4
+    } omp_control_tool_t;
+    
+    extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*);
+
 #   undef __KAI_KMPC_CONVENTION
 
     /* Warning:
index c8a8751..2b85a84 100644 (file)
@@ -32,6 +32,8 @@
         integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind()
         integer, parameter :: kmp_cancel_kind        = omp_integer_kind
         integer, parameter :: omp_lock_hint_kind     = omp_integer_kind
+        integer, parameter :: omp_control_tool_kind  = omp_integer_kind
+        integer, parameter :: omp_control_tool_result_kind = omp_integer_kind
 
       end module omp_lib_kinds
 
             integer (kind=omp_lock_hint_kind) hint
           end subroutine omp_init_nest_lock_with_hint
 
+          function omp_control_tool(command, modifier)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_control_tool
+            integer (kind=omp_control_tool_kind) command
+            integer (kind=omp_control_tool_kind) modifier
+          end function omp_control_tool
+
         end interface
 
 !dec$ if defined(_WIN32)
 !dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation
 !dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device
 !dec$ attributes alias:'OMP_GET_MAX_TASK_PRIORITY' :: omp_get_max_task_priority
+!dec$ attributes alias:'OMP_CONTROL_TOOL' :: omp_control_tool
 
 !dec$ attributes alias:'omp_init_lock' :: omp_init_lock
 !dec$ attributes alias:'omp_init_lock_with_hint' :: omp_init_lock_with_hint
 !dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation
 !dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device
 !dec$ attributes alias:'_OMP_GET_MAX_TASK_PRIORTY' :: omp_get_max_task_priority
+!dec$ attributes alias:'_OMP_CONTROL_TOOL' :: omp_control_tool
 
 !dec$ attributes alias:'_omp_init_lock' :: omp_init_lock
 !dec$ attributes alias:'_omp_init_lock_with_hint' :: omp_init_lock_with_hint
 !dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock
 !dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock
 !dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock
+!dec$ attributes alias:'omp_control_tool_'::omp_control_tool
 
 !dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize
 !dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s
 !dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock
 !dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock
 !dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock
+!dec$ attributes alias:'_omp_control_tool_'::omp_control_tool
 
 !dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize
 !dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s
index c3dc5b7..17c56c9 100644 (file)
@@ -28,6 +28,8 @@
         integer, parameter :: kmp_affinity_mask_kind = c_intptr_t
         integer, parameter :: kmp_cancel_kind        = omp_integer_kind
         integer, parameter :: omp_lock_hint_kind     = omp_integer_kind
+        integer, parameter :: omp_control_tool_kind  = omp_integer_kind
+        integer, parameter :: omp_control_tool_result_kind = omp_integer_kind
 
       end module omp_lib_kinds
 
         integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm            = 131072
         integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive       = 262144
 
+        integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_start = 1
+        integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_pause = 2
+        integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_flush = 3
+        integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_end = 4
+
+        integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_notool = -2
+        integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_nocallback = -1
+        integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_success = 0
+        integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_ignored = 1
+
         interface
 
 !         ***
             integer (kind=omp_lock_hint_kind), value :: hint
           end subroutine omp_init_nest_lock_with_hint
 
+          function omp_control_tool(command, modifier) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_control_tool
+            integer (kind=omp_control_tool_kind), value :: command
+            integer (kind=omp_control_tool_kind), value :: modifier
+          end function omp_control_tool
+
         end interface
 
       end module omp_lib
index 5633251..377c46f 100644 (file)
@@ -29,6 +29,8 @@
       integer, parameter :: kmp_size_t_kind        = int_ptr_kind()
       integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind()
       integer, parameter :: omp_lock_hint_kind     = omp_integer_kind
+      integer, parameter :: omp_control_tool_kind  = omp_integer_kind
+      integer, parameter :: omp_control_tool_result_kind = omp_integer_kind
 
       integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@
       integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@
       integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm            = 131072
       integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive       = 262144
 
+      integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_start = 1
+      integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_pause = 2
+      integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_flush = 3
+      integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_end = 4
+
+      integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_notool = -2
+      integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_nocallback = -1
+      integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_success = 0
+      integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_ignored = 1
+
       interface
 
 !       ***
           integer (kind=omp_lock_hint_kind), value :: hint
         end subroutine omp_init_nest_lock_with_hint
 
+        function omp_control_tool(command, modifier) bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_control_tool
+          integer (kind=omp_control_tool_kind), value :: command
+          integer (kind=omp_control_tool_kind), value :: modifier
+        end function omp_control_tool
+
       end interface
 
 !DIR$ IF DEFINED (__INTEL_OFFLOAD)
index c5a642e..5b8ad30 100644 (file)
@@ -10,6 +10,7 @@
  *****************************************************************************/
 
 #include <stdint.h>
+#include <stddef.h>
 
 
 
  * iteration macros
  *****************************************************************************/
 
-#define FOREACH_OMPT_INQUIRY_FN(macro)  \
-    macro (ompt_enumerate_state)        \
-                                        \
-    macro (ompt_set_callback)           \
-    macro (ompt_get_callback)           \
-                                        \
-    macro (ompt_get_idle_frame)         \
-    macro (ompt_get_task_frame)         \
-                                        \
-    macro (ompt_get_state)              \
-                                        \
-    macro (ompt_get_parallel_id)        \
-    macro (ompt_get_parallel_team_size) \
-    macro (ompt_get_task_id)            \
-    macro (ompt_get_thread_id)
+#define FOREACH_OMPT_INQUIRY_FN(macro)      \
+    macro (ompt_enumerate_states)           \
+    macro (ompt_enumerate_mutex_impls)      \
+                                            \
+    macro (ompt_set_callback)               \
+    macro (ompt_get_callback)               \
+                                            \
+    macro (ompt_get_state)                  \
+                                            \
+    macro (ompt_get_parallel_info)          \
+    macro (ompt_get_task_info)              \
+    macro (ompt_get_thread_data)            \
+    macro (ompt_get_unique_id)              \
+                                            \
+    macro(ompt_get_num_places)              \
+    macro(ompt_get_place_proc_ids)          \
+    macro(ompt_get_place_num)               \
+    macro(ompt_get_partition_place_nums)    \
+    macro(ompt_get_proc_id)                 \
+                                            \
+    macro(ompt_get_target_info)             \
+    macro(ompt_get_num_devices)
 
 #define FOREACH_OMPT_PLACEHOLDER_FN(macro)  \
     macro (ompt_idle)                       \
     macro (ompt_task_wait)                  \
     macro (ompt_mutex_wait)
 
-#define FOREACH_OMPT_STATE(macro)                                                               \
+#define FOREACH_OMP_STATE(macro)                                                                \
                                                                                                 \
-    /* first */                                                                                 \
-    macro (ompt_state_first, 0x71)          /* initial enumeration state */                     \
+    /* first available state */                                                                 \
+    macro (omp_state_undefined, 0x102)      /* undefined thread state */                        \
                                                                                                 \
     /* work states (0..15) */                                                                   \
-    macro (ompt_state_work_serial, 0x00)    /* working outside parallel */                      \
-    macro (ompt_state_work_parallel, 0x01)  /* working within parallel */                       \
-    macro (ompt_state_work_reduction, 0x02) /* performing a reduction */                        \
+    macro (omp_state_work_serial, 0x000)    /* working outside parallel */                      \
+    macro (omp_state_work_parallel, 0x001)  /* working within parallel */                       \
+    macro (omp_state_work_reduction, 0x002) /* performing a reduction */                        \
                                                                                                 \
-    /* idle (16..31) */                                                                         \
-    macro (ompt_state_idle, 0x10)            /* waiting for work */                             \
+    /* barrier wait states (16..31) */                                                          \
+    macro (omp_state_wait_barrier, 0x010)   /* waiting at a barrier */                          \
+    macro (omp_state_wait_barrier_implicit_parallel, 0x011)                                     \
+                                            /* implicit barrier at the end of parallel region */\
+    macro (omp_state_wait_barrier_implicit_workshare, 0x012)                                    \
+                                            /* implicit barrier at the end of worksharing */    \
+    macro (omp_state_wait_barrier_implicit, 0x013)  /* implicit barrier */                      \
+    macro (omp_state_wait_barrier_explicit, 0x014)  /* explicit barrier */                      \
                                                                                                 \
-    /* overhead states (32..63) */                                                              \
-    macro (ompt_state_overhead, 0x20)        /* overhead excluding wait states */               \
+    /* task wait states (32..63) */                                                             \
+    macro (omp_state_wait_taskwait, 0x020)  /* waiting at a taskwait */                         \
+    macro (omp_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */                        \
                                                                                                 \
-    /* barrier wait states (64..79) */                                                          \
-    macro (ompt_state_wait_barrier, 0x40)    /* waiting at a barrier */                         \
-    macro (ompt_state_wait_barrier_implicit, 0x41)    /* implicit barrier */                    \
-    macro (ompt_state_wait_barrier_explicit, 0x42)    /* explicit barrier */                    \
+    /* mutex wait states (64..127) */                                                           \
+    macro (omp_state_wait_mutex, 0x040)                                                         \
+    macro (omp_state_wait_lock, 0x041)      /* waiting for lock */                              \
+    macro (omp_state_wait_critical, 0x042)  /* waiting for critical */                          \
+    macro (omp_state_wait_atomic, 0x043)    /* waiting for atomic */                            \
+    macro (omp_state_wait_ordered, 0x044)   /* waiting for ordered */                           \
                                                                                                 \
-    /* task wait states (80..95) */                                                             \
-    macro (ompt_state_wait_taskwait, 0x50)   /* waiting at a taskwait */                        \
-    macro (ompt_state_wait_taskgroup, 0x51)  /* waiting at a taskgroup */                       \
+    /* target wait states (128..255) */                                                         \
+    macro (omp_state_wait_target, 0x080)        /* waiting for target region */                 \
+    macro (omp_state_wait_target_map, 0x081)    /* waiting for target data mapping operation */ \
+    macro (omp_state_wait_target_update, 0x082) /* waiting for target update operation */       \
                                                                                                 \
-    /* mutex wait states (96..111) */                                                           \
-    macro (ompt_state_wait_lock, 0x60)       /* waiting for lock */                             \
-    macro (ompt_state_wait_nest_lock, 0x61)  /* waiting for nest lock */                        \
-    macro (ompt_state_wait_critical, 0x62)   /* waiting for critical */                         \
-    macro (ompt_state_wait_atomic, 0x63)     /* waiting for atomic */                           \
-    macro (ompt_state_wait_ordered, 0x64)    /* waiting for ordered */                          \
-    macro (ompt_state_wait_single, 0x6F)     /* waiting for single region (non-standard!) */    \
+    /* misc (256..511) */                                                                       \
+    macro (omp_state_idle, 0x100)           /* waiting for work */                              \
+    macro (omp_state_overhead, 0x101)       /* overhead excluding wait states */                \
                                                                                                 \
-    /* misc (112..127) */                                                                       \
-    macro (ompt_state_undefined, 0x70)       /* undefined thread state */
+    /* implementation-specific states (512..) */
 
 
+#define FOREACH_OMPT_MUTEX_IMPL(macro)                                                \
+    macro (ompt_mutex_impl_unknown, 0)      /* unknown implementatin */               \
+    macro (ompt_mutex_impl_spin, 1)         /* based on spin */                       \
+    macro (ompt_mutex_impl_queuing, 2)      /* based on some fair policy */           \
+    macro (ompt_mutex_impl_speculative, 3)  /* based on HW-supported speculation */
+
 #define FOREACH_OMPT_EVENT(macro)                                                                               \
                                                                                                                 \
     /*--- Mandatory Events ---*/                                                                                \
-    macro (ompt_event_parallel_begin,           ompt_new_parallel_callback_t,   1) /* parallel begin */         \
-    macro (ompt_event_parallel_end,             ompt_end_parallel_callback_t,   2) /* parallel end */           \
-                                                                                                                \
-    macro (ompt_event_task_begin,               ompt_new_task_callback_t,       3) /* task begin */             \
-    macro (ompt_event_task_end,                 ompt_task_callback_t,           4) /* task destroy */           \
-                                                                                                                \
-    macro (ompt_event_thread_begin,             ompt_thread_type_callback_t,    5) /* thread begin */           \
-    macro (ompt_event_thread_end,               ompt_thread_type_callback_t,    6) /* thread end */             \
+    macro (ompt_callback_thread_begin,          ompt_callback_thread_begin_t,   1) /* thread begin */           \
+    macro (ompt_callback_thread_end,            ompt_callback_thread_end_t,     2) /* thread end */             \
                                                                                                                 \
-    macro (ompt_event_control,                  ompt_control_callback_t,        7) /* support control calls */  \
+    macro (ompt_callback_parallel_begin,        ompt_callback_parallel_begin_t, 3) /* parallel begin */         \
+    macro (ompt_callback_parallel_end,          ompt_callback_parallel_end_t,   4) /* parallel end */           \
                                                                                                                 \
-    macro (ompt_event_runtime_shutdown,         ompt_callback_t,                8) /* runtime shutdown */       \
-                                                                                                                \
-    /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/                                      \
-    macro (ompt_event_idle_begin,               ompt_thread_callback_t,         9) /* begin idle state */       \
-    macro (ompt_event_idle_end,                 ompt_thread_callback_t,        10) /* end idle state */         \
+    macro (ompt_callback_task_create,           ompt_callback_task_create_t,    5) /* task begin */             \
+    macro (ompt_callback_task_schedule,         ompt_callback_task_schedule_t,  6) /* task schedule */          \
+    macro (ompt_callback_implicit_task,         ompt_callback_implicit_task_t,  7) /* implicit task   */        \
                                                                                                                 \
-    macro (ompt_event_wait_barrier_begin,       ompt_parallel_callback_t,      11) /* begin wait at barrier */  \
-    macro (ompt_event_wait_barrier_end,         ompt_parallel_callback_t,      12) /* end wait at barrier */    \
+    macro (ompt_callback_target,                ompt_callback_target_t,         8) /* target */                 \
+    macro (ompt_callback_target_data_op,        ompt_callback_target_data_op_t, 9) /* target data op*/          \
+    macro (ompt_callback_target_submit,         ompt_callback_target_submit_t, 10) /* target  submit*/          \
                                                                                                                 \
-    macro (ompt_event_wait_taskwait_begin,      ompt_parallel_callback_t,      13) /* begin wait at taskwait */ \
-    macro (ompt_event_wait_taskwait_end,        ompt_parallel_callback_t,      14) /* end wait at taskwait */   \
+    macro (ompt_callback_control_tool,          ompt_callback_control_tool_t,  11) /* control tool */           \
                                                                                                                 \
-    macro (ompt_event_wait_taskgroup_begin,     ompt_parallel_callback_t,      15) /* begin wait at taskgroup */\
-    macro (ompt_event_wait_taskgroup_end,       ompt_parallel_callback_t,      16) /* end wait at taskgroup */  \
+    macro (ompt_callback_device_initialize,     ompt_callback_device_initialize_t, 12) /* device initialize */  \
+    macro (ompt_callback_device_finalize,       ompt_callback_device_finalize_t, 13)   /* device finalize   */  \
                                                                                                                 \
-    macro (ompt_event_release_lock,             ompt_wait_callback_t,          17) /* lock release */           \
-    macro (ompt_event_release_nest_lock_last,   ompt_wait_callback_t,          18) /* last nest lock release */ \
-    macro (ompt_event_release_critical,         ompt_wait_callback_t,          19) /* critical release */       \
+    /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/                                      \
                                                                                                                 \
-    macro (ompt_event_release_atomic,           ompt_wait_callback_t,          20) /* atomic release */         \
+    macro (ompt_callback_sync_region_wait,      ompt_callback_sync_region_t,   14) /* sync region wait begin or end*/  \
                                                                                                                 \
-    macro (ompt_event_release_ordered,          ompt_wait_callback_t,          21) /* ordered release */        \
+    macro (ompt_callback_mutex_released,        ompt_callback_mutex_t,         15) /* mutex released */         \
                                                                                                                 \
     /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */                                 \
-    macro (ompt_event_implicit_task_begin,      ompt_parallel_callback_t,      22) /* implicit task begin   */  \
-    macro (ompt_event_implicit_task_end,        ompt_parallel_callback_t,      23) /* implicit task end  */     \
-                                                                                                                \
-    macro (ompt_event_initial_task_begin,       ompt_parallel_callback_t,      24) /* initial task begin   */   \
-    macro (ompt_event_initial_task_end,         ompt_parallel_callback_t,      25) /* initial task end  */      \
-                                                                                                                \
-    macro (ompt_event_task_switch,              ompt_task_pair_callback_t,     26) /* task switch */            \
-                                                                                                                \
-    macro (ompt_event_loop_begin,               ompt_new_workshare_callback_t, 27) /* task at loop begin */     \
-    macro (ompt_event_loop_end,                 ompt_parallel_callback_t,      28) /* task at loop end */       \
-                                                                                                                \
-    macro (ompt_event_sections_begin,           ompt_new_workshare_callback_t, 29) /* task at sections begin  */\
-    macro (ompt_event_sections_end,             ompt_parallel_callback_t,      30) /* task at sections end */   \
                                                                                                                 \
-    macro (ompt_event_single_in_block_begin,    ompt_new_workshare_callback_t, 31) /* task at single begin*/    \
-    macro (ompt_event_single_in_block_end,      ompt_parallel_callback_t,      32) /* task at single end */     \
+    macro (ompt_callback_task_dependences,      ompt_callback_task_dependences_t, 16) /* report task dependences  */\
+    macro (ompt_callback_task_dependence,       ompt_callback_task_dependence_t, 17) /* report task dependence  */\
                                                                                                                 \
-    macro (ompt_event_single_others_begin,      ompt_parallel_callback_t,      33) /* task at single begin */   \
-    macro (ompt_event_single_others_end,        ompt_parallel_callback_t,      34) /* task at single end */     \
+    macro (ompt_callback_work,                  ompt_callback_work_t,          18) /* task at work begin or end*/\
                                                                                                                 \
-    macro (ompt_event_workshare_begin,          ompt_new_workshare_callback_t, 35) /* task at workshare begin */\
-    macro (ompt_event_workshare_end,            ompt_parallel_callback_t,      36) /* task at workshare end */  \
+    macro (ompt_callback_master,                ompt_callback_master_t,        19) /* task at master begin or end */\
                                                                                                                 \
-    macro (ompt_event_master_begin,             ompt_parallel_callback_t,      37) /* task at master begin */   \
-    macro (ompt_event_master_end,               ompt_parallel_callback_t,      38) /* task at master end */     \
+    macro (ompt_callback_target_map,            ompt_callback_target_map_t,    20) /* target map */             \
                                                                                                                 \
-    macro (ompt_event_barrier_begin,            ompt_parallel_callback_t,      39) /* task at barrier begin  */ \
-    macro (ompt_event_barrier_end,              ompt_parallel_callback_t,      40) /* task at barrier end */    \
+    macro (ompt_callback_sync_region,           ompt_callback_sync_region_t,   21) /* sync region begin or end */ \
                                                                                                                 \
-    macro (ompt_event_taskwait_begin,           ompt_parallel_callback_t,      41) /* task at taskwait begin */ \
-    macro (ompt_event_taskwait_end,             ompt_parallel_callback_t,      42) /* task at task wait end */  \
+    macro (ompt_callback_lock_init,             ompt_callback_mutex_acquire_t, 22) /* lock init */              \
+    macro (ompt_callback_lock_destroy,          ompt_callback_mutex_t,         23) /* lock destroy */           \
                                                                                                                 \
-    macro (ompt_event_taskgroup_begin,          ompt_parallel_callback_t,      43) /* task at taskgroup begin */\
-    macro (ompt_event_taskgroup_end,            ompt_parallel_callback_t,      44) /* task at taskgroup end */  \
+    macro (ompt_callback_mutex_acquire,         ompt_callback_mutex_acquire_t, 24) /* mutex acquire */          \
+    macro (ompt_callback_mutex_acquired,        ompt_callback_mutex_t,         25) /* mutex acquired */         \
                                                                                                                 \
-    macro (ompt_event_release_nest_lock_prev,   ompt_wait_callback_t,          45) /* prev nest lock release */ \
+    macro (ompt_callback_nest_lock,             ompt_callback_nest_lock_t,     26) /* nest lock */              \
                                                                                                                 \
-    macro (ompt_event_wait_lock,                ompt_wait_callback_t,          46) /* lock wait */              \
-    macro (ompt_event_wait_nest_lock,           ompt_wait_callback_t,          47) /* nest lock wait */         \
-    macro (ompt_event_wait_critical,            ompt_wait_callback_t,          48) /* critical wait */          \
-    macro (ompt_event_wait_atomic,              ompt_wait_callback_t,          49) /* atomic wait */            \
-    macro (ompt_event_wait_ordered,             ompt_wait_callback_t,          50) /* ordered wait */           \
+    macro (ompt_callback_flush,                 ompt_callback_flush_t,         27) /* after executing flush */  \
                                                                                                                 \
-    macro (ompt_event_acquired_lock,            ompt_wait_callback_t,          51) /* lock acquired */          \
-    macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t,          52) /* 1st nest lock acquired */ \
-    macro (ompt_event_acquired_nest_lock_next,  ompt_wait_callback_t,          53) /* next nest lock acquired*/ \
-    macro (ompt_event_acquired_critical,        ompt_wait_callback_t,          54) /* critical acquired */      \
-    macro (ompt_event_acquired_atomic,          ompt_wait_callback_t,          55) /* atomic acquired */        \
-    macro (ompt_event_acquired_ordered,         ompt_wait_callback_t,          56) /* ordered acquired */       \
-                                                                                                                \
-    macro (ompt_event_init_lock,                ompt_wait_callback_t,          57) /* lock init */              \
-    macro (ompt_event_init_nest_lock,           ompt_wait_callback_t,          58) /* nest lock init */         \
-                                                                                                                \
-    macro (ompt_event_destroy_lock,             ompt_wait_callback_t,          59) /* lock destruction */       \
-    macro (ompt_event_destroy_nest_lock,        ompt_wait_callback_t,          60) /* nest lock destruction */  \
-                                                                                                                \
-    macro (ompt_event_flush,                    ompt_callback_t,               61) /* after executing flush */  \
-                                                                                                                \
-    macro (ompt_event_task_dependences,         ompt_task_dependences_callback_t, 69) /* report task dependences  */\
-    macro (ompt_event_task_dependence_pair,     ompt_task_pair_callback_t,     70) /* report task dependence pair */
+    macro (ompt_callback_cancel,                ompt_callback_cancel_t,        28) /*cancel innermost binding region*/\
+    macro (ompt_callback_idle,                  ompt_callback_idle_t,          29) /* begin or end idle state */\
 
 
 
  * identifiers
  *---------------------*/
 
-typedef uint64_t ompt_thread_id_t;
-#define ompt_thread_id_none ((ompt_thread_id_t) 0)     /* non-standard */
+typedef uint64_t ompt_id_t;
+#define ompt_id_none 0
 
-typedef uint64_t ompt_task_id_t;
-#define ompt_task_id_none ((ompt_task_id_t) 0)         /* non-standard */
+typedef union ompt_data_u {
+  uint64_t value; /* data initialized by runtime to unique id */
+  void *ptr;      /* pointer under tool control */
+} ompt_data_t;
 
-typedef uint64_t ompt_parallel_id_t;
-#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */
+static const ompt_data_t ompt_data_none = {0};
 
 typedef uint64_t ompt_wait_id_t;
-#define ompt_wait_id_none ((ompt_wait_id_t) 0)         /* non-standard */
+static const ompt_wait_id_t ompt_wait_id_none = 0;
 
+typedef void ompt_device_t;
 
 /*---------------------
  * ompt_frame_t
@@ -235,35 +211,44 @@ typedef struct ompt_task_dependence_s {
  *---------------------*/
 
 typedef enum {
-#define ompt_state_macro(state, code) state = code,
-    FOREACH_OMPT_STATE(ompt_state_macro)
-#undef ompt_state_macro
-} ompt_state_t;
+#define omp_state_macro(state, code) state = code,
+    FOREACH_OMP_STATE(omp_state_macro)
+#undef omp_state_macro
+} omp_state_t;
 
 
 /*---------------------
  * runtime events
  *---------------------*/
 
-typedef enum {
+typedef enum ompt_callbacks_e{
 #define ompt_event_macro(event, callback, eventid) event = eventid,
     FOREACH_OMPT_EVENT(ompt_event_macro)
 #undef ompt_event_macro
-} ompt_event_t;
+} ompt_callbacks_t;
 
 
 /*---------------------
  * set callback results
  *---------------------*/
-typedef enum {
-    ompt_set_result_registration_error              = 0,
-    ompt_set_result_event_may_occur_no_callback     = 1,
-    ompt_set_result_event_never_occurs              = 2,
-    ompt_set_result_event_may_occur_callback_some   = 3,
-    ompt_set_result_event_may_occur_callback_always = 4,
+typedef enum ompt_set_result_e {
+    ompt_set_error = 0,
+    ompt_set_never = 1,
+    ompt_set_sometimes = 2,
+    ompt_set_sometimes_paired = 3,
+    ompt_set_always = 4
 } ompt_set_result_t;
 
 
+/*----------------------
+ * mutex implementations
+ *----------------------*/
+typedef enum ompt_mutex_impl_e {
+#define ompt_mutex_impl_macro(impl, code) impl = code,
+    FOREACH_OMPT_MUTEX_IMPL(ompt_mutex_impl_macro)
+#undef ompt_mutex_impl_macro
+} ompt_mutex_impl_t;
+
 
 /*****************************************************************************
  * callback signatures
@@ -273,14 +258,10 @@ typedef enum {
 typedef void (*ompt_interface_fn_t)(void);
 
 typedef ompt_interface_fn_t (*ompt_function_lookup_t)(
-    const char *                      /* entry point to look up       */
+    const char *                          /* entry point to look up              */
 );
 
 /* threads */
-typedef void (*ompt_thread_callback_t) (
-    ompt_thread_id_t thread_id        /* ID of thread                 */
-);
-
 typedef enum {
     ompt_thread_initial = 1, // start the enumeration at 1
     ompt_thread_worker  = 2,
@@ -288,78 +269,262 @@ typedef enum {
 } ompt_thread_type_t;
 
 typedef enum {
-    ompt_invoker_program = 0,         /* program invokes master task  */
-    ompt_invoker_runtime = 1          /* runtime invokes master task  */
+    ompt_invoker_program = 1,             /* program invokes master task         */
+    ompt_invoker_runtime = 2              /* runtime invokes master task         */
 } ompt_invoker_t;
 
-typedef void (*ompt_thread_type_callback_t) (
-    ompt_thread_type_t thread_type,   /* type of thread               */
-    ompt_thread_id_t thread_id        /* ID of thread                 */
+typedef void (*ompt_callback_thread_begin_t) (
+    ompt_thread_type_t thread_type,       /* type of thread                      */
+    ompt_data_t *thread_data              /* data of thread                      */
+);
+
+typedef void (*ompt_callback_thread_end_t) (
+    ompt_data_t *thread_data              /* data of thread                      */
 );
 
 typedef void (*ompt_wait_callback_t) (
-    ompt_wait_id_t wait_id            /* wait id                      */
+    ompt_wait_id_t wait_id                /* wait data                           */
 );
 
 /* parallel and workshares */
-typedef void (*ompt_parallel_callback_t) (
-    ompt_parallel_id_t parallel_id,    /* id of parallel region       */
-    ompt_task_id_t task_id             /* id of task                  */
+typedef enum ompt_scope_endpoint_e {
+    ompt_scope_begin = 1,
+    ompt_scope_end = 2
+} ompt_scope_endpoint_t;
+
+
+/* implicit task */
+typedef void (*ompt_callback_implicit_task_t) (
+    ompt_scope_endpoint_t endpoint,       /* endpoint of implicit task           */
+    ompt_data_t *parallel_data,           /* data of parallel region             */
+    ompt_data_t *task_data,               /* data of implicit task               */
+    unsigned int team_size,               /* team size                           */
+    unsigned int thread_num               /* thread number of calling thread     */
 );
 
-typedef void (*ompt_new_workshare_callback_t) (
-    ompt_parallel_id_t parallel_id,   /* id of parallel region        */
-    ompt_task_id_t parent_task_id,    /* id of parent task            */
-    void *workshare_function          /* pointer to outlined function */
+typedef void (*ompt_callback_parallel_begin_t) (
+    ompt_data_t *parent_task_data,        /* data of parent task                 */
+    const ompt_frame_t *parent_frame,     /* frame data of parent task           */
+    ompt_data_t *parallel_data,           /* data of parallel region             */
+    unsigned int requested_team_size,     /* requested number of threads in team */
+    ompt_invoker_t invoker,               /* invoker of master task              */
+    const void *codeptr_ra                /* return address of runtime call      */
 );
 
-typedef void (*ompt_new_parallel_callback_t) (
-    ompt_task_id_t parent_task_id,    /* id of parent task            */
-    ompt_frame_t *parent_task_frame,  /* frame data of parent task    */
-    ompt_parallel_id_t parallel_id,   /* id of parallel region        */
-    uint32_t requested_team_size,     /* number of threads in team    */
-    void *parallel_function,          /* pointer to outlined function */
-    ompt_invoker_t invoker            /* who invokes master task?     */
+typedef void (*ompt_callback_parallel_end_t) (
+    ompt_data_t *parallel_data,           /* data of parallel region             */
+    ompt_data_t *task_data,               /* data of task                        */
+    ompt_invoker_t invoker,               /* invoker of master task              */ 
+    const void *codeptr_ra                /* return address of runtime call      */
 );
 
-typedef void (*ompt_end_parallel_callback_t) (
-    ompt_parallel_id_t parallel_id,   /* id of parallel region       */
-    ompt_task_id_t task_id,           /* id of task                  */
-    ompt_invoker_t invoker            /* who invokes master task?    */
+/* tasks */
+typedef enum ompt_task_type_e {
+    ompt_task_initial    = 0x1,
+    ompt_task_implicit   = 0x2,
+    ompt_task_explicit   = 0x4,
+    ompt_task_target     = 0x8,
+    ompt_task_undeferred = 0x8000000,
+    ompt_task_untied     = 0x10000000,
+    ompt_task_final      = 0x20000000,
+    ompt_task_mergeable  = 0x40000000,
+    ompt_task_merged     = 0x80000000
+} ompt_task_type_t;
+
+typedef enum ompt_task_status_e {
+    ompt_task_complete = 1,
+    ompt_task_yield    = 2,
+    ompt_task_cancel   = 3,
+    ompt_task_others   = 4
+} ompt_task_status_t;
+
+typedef void (*ompt_callback_task_schedule_t) (
+    ompt_data_t *prior_task_data,         /* data of prior task                  */
+    ompt_task_status_t prior_task_status, /* status of prior task                */
+    ompt_data_t *next_task_data           /* data of next task                   */
 );
 
-/* tasks */
-typedef void (*ompt_task_callback_t) (
-    ompt_task_id_t task_id            /* id of task                   */
+typedef void (*ompt_callback_task_create_t) (
+    ompt_data_t *parent_task_data,        /* data of parent task                 */
+    const ompt_frame_t *parent_frame,     /* frame data for parent task          */
+    ompt_data_t *new_task_data,           /* data of created task                */
+    int type,                             /* type of created task                */
+    int has_dependences,                  /* created task has dependences        */
+    const void *codeptr_ra                /* return address of runtime call      */
 );
 
-typedef void (*ompt_task_pair_callback_t) (
-    ompt_task_id_t first_task_id,
-    ompt_task_id_t second_task_id
+/* task dependences */
+typedef void (*ompt_callback_task_dependences_t) (
+    ompt_data_t *task_data,               /* data of task                        */
+    const ompt_task_dependence_t *deps,   /* dependences of task                 */
+    int ndeps                             /* dependences count of task           */
 );
 
-typedef void (*ompt_new_task_callback_t) (
-    ompt_task_id_t parent_task_id,    /* id of parent task            */
-    ompt_frame_t *parent_task_frame,  /* frame data for parent task   */
-    ompt_task_id_t  new_task_id,      /* id of created task           */
-    void *task_function               /* pointer to outlined function */
+typedef void (*ompt_callback_task_dependence_t) (
+    ompt_data_t *src_task_data,           /* data of source task                 */
+    ompt_data_t *sink_task_data           /* data of sink task                   */
 );
 
-/* task dependences */
-typedef void (*ompt_task_dependences_callback_t) (
-    ompt_task_id_t task_id,            /* ID of task with dependences */
-    const ompt_task_dependence_t *deps,/* vector of task dependences  */
-    int ndeps                          /* number of dependences       */
+/* target and device */
+typedef enum ompt_target_type_e {
+    ompt_target = 1,
+    ompt_target_enter_data = 2,
+    ompt_target_exit_data = 3,
+    ompt_target_update = 4
+} ompt_target_type_t;
+
+typedef void (*ompt_callback_target_t) (
+    ompt_target_type_t kind,
+    ompt_scope_endpoint_t endpoint,
+    uint64_t device_num,
+    ompt_data_t *task_data,
+    ompt_id_t target_id,
+    const void *codeptr_ra
 );
 
-/* program */
-typedef void (*ompt_control_callback_t) (
-    uint64_t command,                 /* command of control call      */
-    uint64_t modifier                 /* modifier of control call     */
+typedef enum ompt_target_data_op_e {
+    ompt_target_data_alloc = 1,
+    ompt_target_data_transfer_to_dev = 2,
+    ompt_target_data_transfer_from_dev = 3,
+    ompt_target_data_delete = 4
+} ompt_target_data_op_t;
+
+typedef void (*ompt_callback_target_data_op_t) (
+    ompt_id_t target_id,
+    ompt_id_t host_op_id,
+    ompt_target_data_op_t optype,
+    void *host_addr,
+    void *device_addr,
+    size_t bytes
 );
 
-typedef void (*ompt_callback_t)(void);
+typedef void (*ompt_callback_target_submit_t) (
+    ompt_id_t target_id,
+    ompt_id_t host_op_id
+);
 
+typedef void (*ompt_callback_target_map_t) (
+    ompt_id_t target_id,
+    unsigned int nitems,
+    void **host_addr,
+    void **device_addr,
+    size_t *bytes,
+    unsigned int *mapping_flags
+);
+
+typedef void (*ompt_callback_device_initialize_t) (
+    uint64_t device_num,
+    const char *type,
+    ompt_device_t *device,
+    ompt_function_lookup_t lookup,
+    const char *documentation
+);
+
+typedef void (*ompt_callback_device_finalize_t) (
+    uint64_t device_num
+);
+
+/* control_tool */
+typedef int (*ompt_callback_control_tool_t) (
+    uint64_t command,                     /* command of control call             */
+    uint64_t modifier,                    /* modifier of control call            */
+    void *arg,                            /* argument of control call            */
+    const void *codeptr_ra                /* return address of runtime call      */
+);
+
+typedef enum ompt_mutex_kind_e {
+    ompt_mutex = 0x10,
+    ompt_mutex_lock = 0x11,
+    ompt_mutex_nest_lock = 0x12,
+    ompt_mutex_critical = 0x13,
+    ompt_mutex_atomic = 0x14,
+    ompt_mutex_ordered = 0x20
+} ompt_mutex_kind_t;
+
+typedef void (*ompt_callback_mutex_acquire_t) (
+    ompt_mutex_kind_t kind,               /* mutex kind                          */
+    unsigned int hint,                    /* mutex hint                          */
+    unsigned int impl,                    /* mutex implementation                */
+    ompt_wait_id_t wait_id,               /* id of object being awaited          */
+    const void *codeptr_ra                /* return address of runtime call      */
+);
+
+typedef void (*ompt_callback_mutex_t) (
+    ompt_mutex_kind_t kind,               /* mutex kind                          */
+    ompt_wait_id_t wait_id,               /* id of object being awaited          */
+    const void *codeptr_ra                /* return address of runtime call      */
+);
+
+typedef void (*ompt_callback_nest_lock_t) (
+    ompt_scope_endpoint_t endpoint,       /* endpoint of nested lock             */
+    ompt_wait_id_t wait_id,               /* id of object being awaited          */
+    const void *codeptr_ra                /* return address of runtime call      */
+);
+
+typedef void (*ompt_callback_master_t) (
+    ompt_scope_endpoint_t endpoint,       /* endpoint of master region           */
+    ompt_data_t *parallel_data,           /* data of parallel region             */
+    ompt_data_t *task_data,               /* data of task                        */
+    const void *codeptr_ra                /* return address of runtime call      */
+);
+
+typedef void (*ompt_callback_idle_t) (
+    ompt_scope_endpoint_t endpoint        /* endpoint of idle time               */
+);
+
+typedef enum ompt_work_type_e {
+    ompt_work_loop = 1,
+    ompt_work_sections = 2,
+    ompt_work_single_executor = 3,
+    ompt_work_single_other = 4,
+    ompt_work_workshare = 5,
+    ompt_work_distribute = 6,
+    ompt_work_taskloop = 7
+} ompt_work_type_t;
+
+typedef void (*ompt_callback_work_t) (
+    ompt_work_type_t wstype,              /* type of work region                 */
+    ompt_scope_endpoint_t endpoint,       /* endpoint of work region             */
+    ompt_data_t *parallel_data,           /* data of parallel region             */
+    ompt_data_t *task_data,               /* data of task                        */
+    uint64_t count,                       /* quantity of work                    */
+    const void *codeptr_ra                /* return address of runtime call      */
+);
+
+typedef enum ompt_sync_region_kind_e {
+    ompt_sync_region_barrier = 1,
+    ompt_sync_region_taskwait = 2,
+    ompt_sync_region_taskgroup = 3
+} ompt_sync_region_kind_t;
+
+typedef void (*ompt_callback_sync_region_t) (
+    ompt_sync_region_kind_t kind,         /* kind of sync region                 */
+    ompt_scope_endpoint_t endpoint,       /* endpoint of sync region             */
+    ompt_data_t *parallel_data,           /* data of parallel region             */
+    ompt_data_t *task_data,               /* data of task                        */
+    const void *codeptr_ra                /* return address of runtime call      */
+);
+
+typedef enum ompt_cancel_flag_e {
+    ompt_cancel_parallel       = 0x1,
+    ompt_cancel_sections       = 0x2,
+    ompt_cancel_do             = 0x4,
+    ompt_cancel_taskgroup      = 0x8,
+    ompt_cancel_activated      = 0x10,
+    ompt_cancel_detected       = 0x20,
+    ompt_cancel_discarded_task = 0x40
+} ompt_cancel_flag_t;
+
+typedef void (*ompt_callback_cancel_t) (
+    ompt_data_t *task_data,               /* data of task                        */
+    int flags,                            /* cancel flags                        */
+    const void *codeptr_ra                /* return address of runtime call      */
+);
+
+typedef void (*ompt_callback_flush_t) (
+    ompt_data_t *thread_data,             /* data of thread                      */
+    const void *codeptr_ra                /* return address of runtime call      */
+);
 
 /****************************************************************************
  * ompt API
@@ -381,33 +546,48 @@ extern "C" {
  ***************************************************************************/
 
 /* state */
-OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, (
-    ompt_wait_id_t *ompt_wait_id
+OMPT_API_FUNCTION(omp_state_t, ompt_get_state, (
+    ompt_wait_id_t *wait_id
 ));
 
 /* thread */
-OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void));
-
-OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void));
+OMPT_API_FUNCTION(ompt_data_t*, ompt_get_thread_data, (void));
 
 /* parallel region */
-OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, (
-    int ancestor_level
+OMPT_API_FUNCTION(int, ompt_get_parallel_info, (
+    int ancestor_level,
+    ompt_data_t **parallel_data,
+    int *team_size
 ));
 
-OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, (
-    int ancestor_level
+/* task */
+OMPT_API_FUNCTION(int, ompt_get_task_info, (
+    int ancestor_level,
+    int *type,
+    ompt_data_t **task_data,
+    ompt_frame_t **task_frame,
+    ompt_data_t **parallel_data,
+    int *thread_num
 ));
 
-/* task */
-OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, (
-    int depth
+/* places */
+OMPT_API_FUNCTION(int, ompt_get_num_places, (void));
+
+OMPT_API_FUNCTION(int, ompt_get_place_proc_ids, (
+    int place_num,
+    int ids_size,
+    int *ids
 ));
 
-OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, (
-    int depth
+OMPT_API_FUNCTION(int, ompt_get_place_num, (void));
+
+OMPT_API_FUNCTION(int, ompt_get_partition_place_nums, (
+    int place_nums_size,
+    int *place_nums
 ));
 
+/* proc_id */
+OMPT_API_FUNCTION(int, ompt_get_proc_id, (void));
 
 
 /****************************************************************************
@@ -445,25 +625,35 @@ OMPT_API_FUNCTION(void, ompt_mutex_wait, (
  * INITIALIZATION FUNCTIONS
  ***************************************************************************/
 
-OMPT_API_FUNCTION(void, ompt_initialize, (
+typedef struct ompt_fns_t ompt_fns_t;
+
+OMPT_API_FUNCTION(int, ompt_initialize, (
     ompt_function_lookup_t ompt_fn_lookup,
-    const char *runtime_version,
-    unsigned int ompt_version
+    ompt_fns_t *fns
 ));
 
+OMPT_API_FUNCTION(void, ompt_finalize, (
+    ompt_fns_t *fns
+));
+
+struct ompt_fns_t {
+    ompt_initialize_t initialize;
+    ompt_finalize_t finalize;
+};
 
 /* initialization interface to be defined by tool */
-ompt_initialize_t ompt_tool(void);
+#ifdef _WIN32
+__declspec(dllexport)
+#endif
+ompt_fns_t * ompt_start_tool(
+    unsigned int omp_version, 
+    const char * runtime_version
+);
 
-typedef enum opt_init_mode_e {
-    ompt_init_mode_never  = 0,
-    ompt_init_mode_false  = 1,
-    ompt_init_mode_true   = 2,
-    ompt_init_mode_always = 3
-} ompt_init_mode_t;
+typedef void (*ompt_callback_t)(void);
 
 OMPT_API_FUNCTION(int, ompt_set_callback, (
-    ompt_event_t event,
+    ompt_callbacks_t which,
     ompt_callback_t callback
 ));
 
@@ -477,7 +667,7 @@ typedef enum ompt_set_callback_rc_e {  /* non-standard */
 
 
 OMPT_API_FUNCTION(int, ompt_get_callback, (
-    ompt_event_t event,
+    ompt_callbacks_t which,
     ompt_callback_t *callback
 ));
 
@@ -487,29 +677,37 @@ OMPT_API_FUNCTION(int, ompt_get_callback, (
  * MISCELLANEOUS FUNCTIONS
  ***************************************************************************/
 
-/* control */
-// FIXME: remove workaround for clang
-#if !defined(__clang__) && defined(_OPENMP) && (_OPENMP >= 201307)
-#pragma omp declare target
-#endif
-void ompt_control(
-    uint64_t command,
-    uint64_t modifier
-);
-#if !defined(__clang__) && defined(_OPENMP) && (_OPENMP >= 201307)
-#pragma omp end declare target
-#endif
-
 /* state enumeration */
-OMPT_API_FUNCTION(int, ompt_enumerate_state, (
+OMPT_API_FUNCTION(int, ompt_enumerate_states, (
     int current_state,
     int *next_state,
     const char **next_state_name
 ));
 
+/* mutex implementation enumeration */
+OMPT_API_FUNCTION(int, ompt_enumerate_mutex_impls, (
+    int current_impl,
+    int *next_impl,
+    const char **next_impl_name
+));
+
+/* get_unique_id */
+OMPT_API_FUNCTION(uint64_t, ompt_get_unique_id, (void));
+
 #ifdef  __cplusplus
 };
 #endif
 
-#endif
+/****************************************************************************
+ * TARGET
+ ***************************************************************************/
+
+ OMPT_API_FUNCTION(int, ompt_get_target_info, (
+    uint64_t *device_num,
+    ompt_id_t *target_id,
+    ompt_id_t *host_op_id
+));
+
+ OMPT_API_FUNCTION(int, ompt_get_num_devices, (void));
 
+#endif /* __OMPT__ */
index cafe182..9cabcbc 100644 (file)
@@ -200,6 +200,10 @@ Values for bit flags used in the ident_t to describe the fields.
 #define KMP_IDENT_BARRIER_IMPL_SINGLE 0x0140
 #define KMP_IDENT_BARRIER_IMPL_WORKSHARE 0x01C0
 
+#define KMP_IDENT_WORK_LOOP 0x200 // static loop
+#define KMP_IDENT_WORK_SECTIONS 0x400 // sections
+#define KMP_IDENT_WORK_DISTRIBUTE 0x800 // distribute
+
 /*!
  * The ident structure that describes a source location.
  */
@@ -798,6 +802,10 @@ extern kmp_hws_item_t __kmp_hws_proc;
 extern int __kmp_hws_requested;
 extern int __kmp_hws_abs_flag; // absolute or per-item number requested
 
+#if OMP_50_ENABLED && LIBOMP_OMPT_SUPPORT
+extern char const *__kmp_tool_libraries;
+#endif // OMP_50_ENABLED && LIBOMP_OMPT_SUPPORT
+
 /* ------------------------------------------------------------------------ */
 
 #define KMP_PAD(type, sz)                                                      \
@@ -3314,7 +3322,7 @@ extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
 extern kmp_team_t *
 __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
 #if OMPT_SUPPORT
-                    ompt_parallel_id_t ompt_parallel_id,
+                    ompt_data_t ompt_parallel_data,
 #endif
                     kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs,
                     int argc USE_NESTED_HOT_ARG(kmp_info_t *thr));
@@ -3322,7 +3330,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
 extern kmp_team_t *
 __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
 #if OMPT_SUPPORT
-                    ompt_parallel_id_t ompt_parallel_id,
+                    ompt_id_t ompt_parallel_id,
 #endif
                     kmp_internal_control_t *new_icvs,
                     int argc USE_NESTED_HOT_ARG(kmp_info_t *thr));
@@ -3362,9 +3370,6 @@ enum fork_context_e {
 };
 extern int __kmp_fork_call(ident_t *loc, int gtid,
                            enum fork_context_e fork_context, kmp_int32 argc,
-#if OMPT_SUPPORT
-                           void *unwrapped_task,
-#endif
                            microtask_t microtask, launch_t invoker,
 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
 #if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX
index 7037ff4..77df975 100644 (file)
@@ -361,19 +361,20 @@ typedef kmp_queuing_lock_t kmp_atomic_lock_t;
 
 static inline void __kmp_acquire_atomic_lock(kmp_atomic_lock_t *lck,
                                              kmp_int32 gtid) {
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_wait_atomic)) {
-    ompt_callbacks.ompt_callback(ompt_event_wait_atomic)((ompt_wait_id_t)lck);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.ompt_callback_mutex_acquire) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
+        ompt_mutex_atomic, 0, ompt_mutex_impl_queuing, (ompt_wait_id_t)lck,
+        OMPT_GET_RETURN_ADDRESS(0));
   }
 #endif
 
   __kmp_acquire_queuing_lock(lck, gtid);
 
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled &&
-      ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)) {
-    ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)(
-        (ompt_wait_id_t)lck);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.ompt_callback_mutex_acquired) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
+        ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0));
   }
 #endif
 }
@@ -386,10 +387,10 @@ static inline int __kmp_test_atomic_lock(kmp_atomic_lock_t *lck,
 static inline void __kmp_release_atomic_lock(kmp_atomic_lock_t *lck,
                                              kmp_int32 gtid) {
   __kmp_release_queuing_lock(lck, gtid);
-#if OMPT_SUPPORT && OMPT_BLAME
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_release_atomic)) {
-    ompt_callbacks.ompt_callback(ompt_event_release_atomic)(
-        (ompt_wait_id_t)lck);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.ompt_callback_mutex_released) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
+        ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0));
   }
 #endif
 }
index 93e3fb8..db372b5 100644 (file)
@@ -16,6 +16,9 @@
 #include "kmp_itt.h"
 #include "kmp_os.h"
 #include "kmp_stats.h"
+#if OMPT_SUPPORT
+#include "ompt-specific.h"
+#endif
 
 #if KMP_MIC
 #include <immintrin.h>
@@ -1224,8 +1227,9 @@ int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
   int status = 0;
   ident_t *loc = __kmp_threads[gtid]->th.th_ident;
 #if OMPT_SUPPORT
-  ompt_task_id_t my_task_id;
-  ompt_parallel_id_t my_parallel_id;
+  ompt_data_t *my_task_data;
+  ompt_data_t *my_parallel_data;
+  void *return_address;
 #endif
 
   KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,
@@ -1233,28 +1237,26 @@ int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
 
   ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
 #if OMPT_SUPPORT
-  if (ompt_enabled) {
-#if OMPT_BLAME
-    my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
-    my_parallel_id = team->t.ompt_team_info.parallel_id;
-
-#if OMPT_TRACE
-    if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) {
-      if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) {
-        ompt_callbacks.ompt_callback(ompt_event_single_others_end)(
-            my_parallel_id, my_task_id);
-      }
+  if (ompt_enabled.enabled) {
+#if OMPT_OPTIONAL
+    my_task_data = OMPT_CUR_TASK_DATA(this_thr);
+    my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
+    return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
+    if (ompt_enabled.ompt_callback_sync_region) {
+      ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
+          ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
+          my_task_data, return_address);
     }
-#endif
-    if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
-      ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(my_parallel_id,
-                                                             my_task_id);
+    if (ompt_enabled.ompt_callback_sync_region_wait) {
+      ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
+          ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
+          my_task_data, return_address);
     }
 #endif
     // It is OK to report the barrier state after the barrier begin callback.
     // According to the OMPT specification, a compliant implementation may
     // even delay reporting this state until the barrier begins to wait.
-    this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
+    this_thr->th.ompt_thread_info.state = omp_state_wait_barrier;
   }
 #endif
 
@@ -1489,14 +1491,20 @@ int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
                 __kmp_tid_from_gtid(gtid), status));
 
 #if OMPT_SUPPORT
-  if (ompt_enabled) {
-#if OMPT_BLAME
-    if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
-      ompt_callbacks.ompt_callback(ompt_event_barrier_end)(my_parallel_id,
-                                                           my_task_id);
+  if (ompt_enabled.enabled) {
+#if OMPT_OPTIONAL
+    if (ompt_enabled.ompt_callback_sync_region_wait) {
+      ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
+          ompt_sync_region_barrier, ompt_scope_end, my_parallel_data,
+          my_task_data, return_address);
+    }
+    if (ompt_enabled.ompt_callback_sync_region) {
+      ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
+          ompt_sync_region_barrier, ompt_scope_end, my_parallel_data,
+          my_task_data, return_address);
     }
 #endif
-    this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
+    this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
   }
 #endif
   ANNOTATE_BARRIER_END(&team->t.t_bar);
@@ -1593,14 +1601,31 @@ void __kmp_join_barrier(int gtid) {
 
   ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
 #if OMPT_SUPPORT
-#if OMPT_TRACE
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
-    ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
-        team->t.ompt_team_info.parallel_id,
-        team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
-  }
+  ompt_data_t *my_task_data;
+  ompt_data_t *my_parallel_data;
+  if (ompt_enabled.enabled) {
+#if OMPT_OPTIONAL
+    void *codeptr = NULL;
+    int ds_tid = this_thr->th.th_info.ds.ds_tid;
+    if (KMP_MASTER_TID(ds_tid) &&
+        (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
+         ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
+      codeptr = team->t.ompt_team_info.master_return_address;
+    my_task_data = OMPT_CUR_TASK_DATA(this_thr);
+    my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
+    if (ompt_enabled.ompt_callback_sync_region) {
+      ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
+          ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
+          my_task_data, codeptr);
+    }
+    if (ompt_enabled.ompt_callback_sync_region_wait) {
+      ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
+          ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
+          my_task_data, codeptr);
+    }
 #endif
-  this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
+    this_thr->th.ompt_thread_info.state = omp_state_wait_barrier_implicit;
+  }
 #endif
 
   if (__kmp_tasking_mode == tskm_extra_barrier) {
@@ -1758,20 +1783,6 @@ void __kmp_join_barrier(int gtid) {
   KA_TRACE(10,
            ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
 
-#if OMPT_SUPPORT
-  if (ompt_enabled) {
-#if OMPT_BLAME
-    if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
-      ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
-          team->t.ompt_team_info.parallel_id,
-          team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
-    }
-#endif
-
-    // return to default state
-    this_thr->th.ompt_thread_info.state = ompt_state_overhead;
-  }
-#endif
   ANNOTATE_BARRIER_END(&team->t.t_bar);
 }
 
@@ -1869,6 +1880,39 @@ void __kmp_fork_barrier(int gtid, int tid) {
   }
   }
 
+#if OMPT_SUPPORT
+  if (ompt_enabled.enabled) {
+    if (this_thr->th.ompt_thread_info.state ==
+        omp_state_wait_barrier_implicit) {
+      int ds_tid = this_thr->th.th_info.ds.ds_tid;
+      ompt_data_t *tId = (team) ? OMPT_CUR_TASK_DATA(this_thr)
+                                : &(this_thr->th.ompt_thread_info.task_data);
+      this_thr->th.ompt_thread_info.state = omp_state_overhead;
+#if OMPT_OPTIONAL
+      void *codeptr = NULL;
+      if (KMP_MASTER_TID(ds_tid) &&
+          (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
+           ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
+        codeptr = team->t.ompt_team_info.master_return_address;
+      if (ompt_enabled.ompt_callback_sync_region_wait) {
+        ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
+            ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
+      }
+      if (ompt_enabled.ompt_callback_sync_region) {
+        ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
+            ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
+      }
+#endif
+      if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
+        ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+            ompt_scope_end, NULL, tId, 0, ds_tid);
+      }
+      // return to idle state
+      this_thr->th.ompt_thread_info.state = omp_state_overhead;
+    }
+  }
+#endif
+
   // Early exit for reaping threads releasing forkjoin barrier
   if (TCR_4(__kmp_global.g.g_done)) {
     this_thr->th.th_task_team = NULL;
index efc67fc..71c71ec 100644 (file)
@@ -12,6 +12,9 @@
 #include "kmp_i18n.h"
 #include "kmp_io.h"
 #include "kmp_str.h"
+#if OMPT_SUPPORT
+#include "ompt-specific.h"
+#endif
 
 #if OMP_40_ENABLED
 
@@ -51,11 +54,25 @@ kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) {
         kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(
             &(this_team->t.t_cancel_request), cancel_noreq, cncl_kind);
         if (old == cancel_noreq || old == cncl_kind) {
-          // printf("__kmpc_cancel: this_team->t.t_cancel_request=%d @ %p\n",
-          //       this_team->t.t_cancel_request,
-          //       &(this_team->t.t_cancel_request));
-          // we do not have a cancellation request in this team or we do have
-          // one that matches the current request -> cancel
+// we do not have a cancellation request in this team or we do have
+// one that matches the current request -> cancel
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+          if (ompt_enabled.ompt_callback_cancel) {
+            ompt_data_t *task_data;
+            __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL,
+                                          NULL);
+            ompt_cancel_flag_t type = ompt_cancel_parallel;
+            if (cncl_kind == cancel_parallel)
+              type = ompt_cancel_parallel;
+            else if (cncl_kind == cancel_loop)
+              type = ompt_cancel_do;
+            else if (cncl_kind == cancel_sections)
+              type = ompt_cancel_sections;
+            ompt_callbacks.ompt_callback(ompt_callback_cancel)(
+                task_data, type | ompt_cancel_activated,
+                OMPT_GET_RETURN_ADDRESS(0));
+          }
+#endif
           return 1 /* true */;
         }
         break;
@@ -75,8 +92,18 @@ kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) {
           kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(
               &(taskgroup->cancel_request), cancel_noreq, cncl_kind);
           if (old == cancel_noreq || old == cncl_kind) {
-            // we do not have a cancellation request in this taskgroup or we do
-            // have one that matches the current request -> cancel
+// we do not have a cancellation request in this taskgroup or we do
+// have one that matches the current request -> cancel
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+            if (ompt_enabled.ompt_callback_cancel) {
+              ompt_data_t *task_data;
+              __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL,
+                                            NULL);
+              ompt_callbacks.ompt_callback(ompt_callback_cancel)(
+                  task_data, ompt_cancel_taskgroup | ompt_cancel_activated,
+                  OMPT_GET_RETURN_ADDRESS(0));
+            }
+#endif
             return 1 /* true */;
           }
         } else {
@@ -134,8 +161,25 @@ kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,
         KMP_DEBUG_ASSERT(this_team);
         if (this_team->t.t_cancel_request) {
           if (cncl_kind == this_team->t.t_cancel_request) {
-            // the request in the team structure matches the type of
-            // cancellation point so we can cancel
+// the request in the team structure matches the type of
+// cancellation point so we can cancel
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+            if (ompt_enabled.ompt_callback_cancel) {
+              ompt_data_t *task_data;
+              __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL,
+                                            NULL);
+              ompt_cancel_flag_t type = ompt_cancel_parallel;
+              if (cncl_kind == cancel_parallel)
+                type = ompt_cancel_parallel;
+              else if (cncl_kind == cancel_loop)
+                type = ompt_cancel_do;
+              else if (cncl_kind == cancel_sections)
+                type = ompt_cancel_sections;
+              ompt_callbacks.ompt_callback(ompt_callback_cancel)(
+                  task_data, type | ompt_cancel_detected,
+                  OMPT_GET_RETURN_ADDRESS(0));
+            }
+#endif
             return 1 /* true */;
           }
           KMP_ASSERT(0 /* false */);
@@ -158,7 +202,18 @@ kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,
 
         taskgroup = task->td_taskgroup;
         if (taskgroup) {
-          // return the current status of cancellation for the taskgroup
+// return the current status of cancellation for the taskgroup
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+          if (ompt_enabled.ompt_callback_cancel &&
+              !!taskgroup->cancel_request) {
+            ompt_data_t *task_data;
+            __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL,
+                                          NULL);
+            ompt_callbacks.ompt_callback(ompt_callback_cancel)(
+                task_data, ompt_cancel_taskgroup | ompt_cancel_detected,
+                OMPT_GET_RETURN_ADDRESS(0));
+          }
+#endif
           return !!taskgroup->cancel_request;
         } else {
           // if a cancellation point is encountered by a task that does not
index 03f8cfd..571658a 100644 (file)
 #define OMPT_DEBUG LIBOMP_OMPT_DEBUG
 #cmakedefine01 LIBOMP_OMPT_SUPPORT
 #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT
-#cmakedefine01 LIBOMP_OMPT_BLAME
-#define OMPT_BLAME LIBOMP_OMPT_BLAME
-#cmakedefine01 LIBOMP_OMPT_TRACE
-#define OMPT_TRACE LIBOMP_OMPT_TRACE
+#cmakedefine01 LIBOMP_OMPT_OPTIONAL
+#define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL
 #cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS
 #define KMP_USE_ADAPTIVE_LOCKS LIBOMP_USE_ADAPTIVE_LOCKS
 #define KMP_DEBUG_ADAPTIVE_LOCKS 0
index 585e961..52440c9 100644 (file)
@@ -20,7 +20,6 @@
 #include "kmp_stats.h"
 
 #if OMPT_SUPPORT
-#include "ompt-internal.h"
 #include "ompt-specific.h"
 #endif
 
@@ -278,7 +277,7 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
 
 #if OMPT_SUPPORT
     ompt_frame_t *ompt_frame;
-    if (ompt_enabled) {
+    if (ompt_enabled.enabled) {
       kmp_info_t *master_th = __kmp_threads[gtid];
       kmp_team_t *parent_team = master_th->th.th_team;
       ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
@@ -289,7 +288,8 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
         ompt_frame = &(
             parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
       }
-      ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
+      ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+      OMPT_STORE_RETURN_ADDRESS(gtid);
     }
 #endif
 
@@ -297,9 +297,6 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
     SSC_MARK_FORKING();
 #endif
     __kmp_fork_call(loc, gtid, fork_context_intel, argc,
-#if OMPT_SUPPORT
-                    VOLATILE_CAST(void *) microtask, // "unwrapped" task
-#endif
                     VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
                     VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
@@ -371,11 +368,11 @@ void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
 #if OMPT_SUPPORT
   kmp_team_t *parent_team = this_thr->th.th_team;
   int tid = __kmp_tid_from_gtid(gtid);
-  if (ompt_enabled) {
+  if (ompt_enabled.enabled) {
     parent_team->t.t_implicit_task_taskdata[tid]
-        .ompt_task_info.frame.reenter_runtime_frame =
-        __builtin_frame_address(1);
+        .ompt_task_info.frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
   }
+  OMPT_STORE_RETURN_ADDRESS(gtid);
 #endif
 
   // check if __kmpc_push_num_teams called, set default number of teams
@@ -388,9 +385,6 @@ void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
   KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
 
   __kmp_fork_call(loc, gtid, fork_context_intel, argc,
-#if OMPT_SUPPORT
-                  VOLATILE_CAST(void *) microtask, // "unwrapped" task
-#endif
                   VOLATILE_CAST(microtask_t)
                       __kmp_teams_master, // "wrapped" task
                   VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
@@ -433,9 +427,12 @@ conditional parallel region, like this,
 when the condition is false.
 */
 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
-  // The implementation is now in kmp_runtime.cpp so that it can share static
-  // functions with kmp_fork_call since the tasks to be done are similar in
-  // each case.
+// The implementation is now in kmp_runtime.cpp so that it can share static
+// functions with kmp_fork_call since the tasks to be done are similar in
+// each case.
+#if OMPT_SUPPORT
+  OMPT_STORE_RETURN_ADDRESS(global_tid);
+#endif
   __kmp_serialized_parallel(loc, global_tid);
 }
 
@@ -482,6 +479,30 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
   KMP_DEBUG_ASSERT(serial_team->t.t_threads);
   KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
 
+#if OMPT_SUPPORT
+  if (ompt_enabled.enabled &&
+      this_thr->th.ompt_thread_info.state != omp_state_overhead) {
+    OMPT_CUR_TASK_INFO(this_thr)->frame.exit_runtime_frame = NULL;
+    if (ompt_enabled.ompt_callback_implicit_task) {
+      ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+          ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
+          __kmp_tid_from_gtid(global_tid));
+    }
+
+    // reset clear the task id only after unlinking the task
+    ompt_data_t *parent_task_data;
+    __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
+
+    if (ompt_enabled.ompt_callback_parallel_end) {
+      ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+          &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
+          ompt_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
+    }
+    __ompt_lw_taskteam_unlink(this_thr);
+    this_thr->th.ompt_thread_info.state = omp_state_overhead;
+  }
+#endif
+
   /* If necessary, pop the internal control stack values and replace the team
    * values */
   top = serial_team->t.t_control_stack_top;
@@ -554,6 +575,12 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
 
   if (__kmp_env_consistency_check)
     __kmp_pop_parallel(global_tid, NULL);
+#if OMPT_SUPPORT
+  if (ompt_enabled.enabled)
+    this_thr->th.ompt_thread_info.state =
+        ((this_thr->th.th_team_serialized) ? omp_state_work_serial
+                                           : omp_state_work_parallel);
+#endif
 }
 
 /*!
@@ -617,6 +644,13 @@ void __kmpc_flush(ident_t *loc) {
 #else
 #error Unknown or unsupported architecture
 #endif
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.ompt_callback_flush) {
+    ompt_callbacks.ompt_callback(ompt_callback_flush)(
+        __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
+  }
+#endif
 }
 
 /* -------------------------------------------------------------------------- */
@@ -642,12 +676,13 @@ void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
     __kmp_check_barrier(global_tid, ct_barrier, loc);
   }
 
-#if OMPT_SUPPORT && OMPT_TRACE
+#if OMPT_SUPPORT
   ompt_frame_t *ompt_frame;
-  if (ompt_enabled) {
-    ompt_frame = __ompt_get_task_frame_internal(0);
+  if (ompt_enabled.enabled) {
+    __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
     if (ompt_frame->reenter_runtime_frame == NULL)
-      ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
+      ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+    OMPT_STORE_RETURN_ADDRESS(global_tid);
   }
 #endif
   __kmp_threads[global_tid]->th.th_ident = loc;
@@ -659,8 +694,8 @@ void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
   // 4) no sync is required
 
   __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
     ompt_frame->reenter_runtime_frame = NULL;
   }
 #endif
@@ -687,16 +722,17 @@ kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
     status = 1;
   }
 
-#if OMPT_SUPPORT && OMPT_TRACE
+#if OMPT_SUPPORT && OMPT_OPTIONAL
   if (status) {
-    if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
+    if (ompt_enabled.ompt_callback_master) {
       kmp_info_t *this_thr = __kmp_threads[global_tid];
       kmp_team_t *team = this_thr->th.th_team;
 
       int tid = __kmp_tid_from_gtid(global_tid);
-      ompt_callbacks.ompt_callback(ompt_event_master_begin)(
-          team->t.ompt_team_info.parallel_id,
-          team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
+      ompt_callbacks.ompt_callback(ompt_callback_master)(
+          ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
+          &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
+          OMPT_GET_RETURN_ADDRESS(0));
     }
   }
 #endif
@@ -732,14 +768,15 @@ void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
   KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
   KMP_POP_PARTITIONED_TIMER();
 
-#if OMPT_SUPPORT && OMPT_TRACE
+#if OMPT_SUPPORT && OMPT_OPTIONAL
   kmp_info_t *this_thr = __kmp_threads[global_tid];
   kmp_team_t *team = this_thr->th.th_team;
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_master_end)) {
+  if (ompt_enabled.ompt_callback_master) {
     int tid = __kmp_tid_from_gtid(global_tid);
-    ompt_callbacks.ompt_callback(ompt_event_master_end)(
-        team->t.ompt_team_info.parallel_id,
-        team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
+    ompt_callbacks.ompt_callback(ompt_callback_master)(
+        ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
+        &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
+        OMPT_GET_RETURN_ADDRESS(0));
   }
 #endif
 
@@ -776,16 +813,24 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
 
   th = __kmp_threads[gtid];
 
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  kmp_team_t *team;
+  ompt_wait_id_t lck;
+  void *codeptr_ra;
+  if (ompt_enabled.enabled) {
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+    team = __kmp_team_from_gtid(gtid);
+    lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
     /* OMPT state update */
-    th->th.ompt_thread_info.wait_id = (uint64_t)loc;
-    th->th.ompt_thread_info.state = ompt_state_wait_ordered;
+    th->th.ompt_thread_info.wait_id = lck;
+    th->th.ompt_thread_info.state = omp_state_wait_ordered;
 
     /* OMPT event callback */
-    if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
-      ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
-          th->th.ompt_thread_info.wait_id);
+    codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
+    if (ompt_enabled.ompt_callback_mutex_acquire) {
+      ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
+          ompt_mutex_ordered, omp_lock_hint_none, ompt_mutex_impl_spin,
+          (ompt_wait_id_t)lck, codeptr_ra);
     }
   }
 #endif
@@ -795,16 +840,16 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
   else
     __kmp_parallel_deo(&gtid, &cid, loc);
 
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
     /* OMPT state update */
-    th->th.ompt_thread_info.state = ompt_state_work_parallel;
+    th->th.ompt_thread_info.state = omp_state_work_parallel;
     th->th.ompt_thread_info.wait_id = 0;
 
     /* OMPT event callback */
-    if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
-      ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
-          th->th.ompt_thread_info.wait_id);
+    if (ompt_enabled.ompt_callback_mutex_acquired) {
+      ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
+          ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
     }
   }
 #endif
@@ -839,11 +884,13 @@ void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
   else
     __kmp_parallel_dxo(&gtid, &cid, loc);
 
-#if OMPT_SUPPORT && OMPT_BLAME
-  if (ompt_enabled &&
-      ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
-    ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
-        th->th.ompt_thread_info.wait_id);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+  if (ompt_enabled.ompt_callback_mutex_released) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
+        ompt_mutex_ordered,
+        (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
+        OMPT_LOAD_RETURN_ADDRESS(gtid));
   }
 #endif
 }
@@ -1063,11 +1110,18 @@ This function blocks until the executing thread can enter the critical section.
 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
                      kmp_critical_name *crit) {
 #if KMP_USE_DYNAMIC_LOCK
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(global_tid);
+#endif // OMPT_SUPPORT
   __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
 #else
   KMP_COUNT_BLOCK(OMP_CRITICAL);
   KMP_TIME_PARTITIONED_BLOCK(
       OMP_critical_wait); /* Time spent waiting to enter the critical section */
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  omp_state_t prev_state = omp_state_undefined;
+  ompt_thread_info_t ti;
+#endif
   kmp_user_lock_p lck;
 
   KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
@@ -1101,6 +1155,25 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
 #if USE_ITT_BUILD
   __kmp_itt_critical_acquiring(lck);
 #endif /* USE_ITT_BUILD */
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+  void *codeptr_ra = NULL;
+  if (ompt_enabled.enabled) {
+    ti = __kmp_threads[global_tid]->th.ompt_thread_info;
+    /* OMPT state update */
+    prev_state = ti.state;
+    ti.wait_id = (ompt_wait_id_t)lck;
+    ti.state = omp_state_wait_critical;
+
+    /* OMPT event callback */
+    codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
+    if (ompt_enabled.ompt_callback_mutex_acquire) {
+      ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
+          ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
+          (ompt_wait_id_t)crit, codeptr_ra);
+    }
+  }
+#endif
   // Value of 'crit' should be good for using as a critical_id of the critical
   // section directive.
   __kmp_acquire_user_lock_with_checks(lck, global_tid);
@@ -1108,6 +1181,19 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
 #if USE_ITT_BUILD
   __kmp_itt_critical_acquired(lck);
 #endif /* USE_ITT_BUILD */
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
+    /* OMPT state update */
+    ti.state = prev_state;
+    ti.wait_id = 0;
+
+    /* OMPT event callback */
+    if (ompt_enabled.ompt_callback_mutex_acquired) {
+      ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
+          ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
+    }
+  }
+#endif
 
   KMP_START_EXPLICIT_TIMER(OMP_critical);
   KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
@@ -1160,6 +1246,76 @@ static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
   return __kmp_user_lock_seq;
 }
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+static ompt_mutex_impl_t
+__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
+  if (user_lock) {
+    switch (KMP_EXTRACT_D_TAG(user_lock)) {
+    case 0:
+      break;
+#if KMP_USE_FUTEX
+    case locktag_futex:
+      return ompt_mutex_impl_queuing;
+#endif
+    case locktag_tas:
+      return ompt_mutex_impl_spin;
+#if KMP_USE_TSX
+    case locktag_hle:
+      return ompt_mutex_impl_speculative;
+#endif
+    default:
+      return ompt_mutex_impl_unknown;
+    }
+    ilock = KMP_LOOKUP_I_LOCK(user_lock);
+  }
+  KMP_ASSERT(ilock);
+  switch (ilock->type) {
+#if KMP_USE_TSX
+  case locktag_adaptive:
+  case locktag_rtm:
+    return ompt_mutex_impl_speculative;
+#endif
+  case locktag_nested_tas:
+    return ompt_mutex_impl_spin;
+#if KMP_USE_FUTEX
+  case locktag_nested_futex:
+#endif
+  case locktag_ticket:
+  case locktag_queuing:
+  case locktag_drdpa:
+  case locktag_nested_ticket:
+  case locktag_nested_queuing:
+  case locktag_nested_drdpa:
+    return ompt_mutex_impl_queuing;
+  default:
+    return ompt_mutex_impl_unknown;
+  }
+}
+
+// For locks without dynamic binding
+static ompt_mutex_impl_t __ompt_get_mutex_impl_type() {
+  switch (__kmp_user_lock_kind) {
+  case lk_tas:
+    return ompt_mutex_impl_spin;
+#if KMP_USE_FUTEX
+  case lk_futex:
+#endif
+  case lk_ticket:
+  case lk_queuing:
+  case lk_drdpa:
+    return ompt_mutex_impl_queuing;
+#if KMP_USE_TSX
+  case lk_hle:
+  case lk_rtm:
+  case lk_adaptive:
+    return ompt_mutex_impl_speculative;
+#endif
+  default:
+    return ompt_mutex_impl_unknown;
+  }
+}
+#endif
+
 /*!
 @ingroup WORK_SHARING
 @param loc  source location information.
@@ -1177,6 +1333,14 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
                                kmp_critical_name *crit, uintptr_t hint) {
   KMP_COUNT_BLOCK(OMP_CRITICAL);
   kmp_user_lock_p lck;
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  omp_state_t prev_state = omp_state_undefined;
+  ompt_thread_info_t ti;
+  // This is the case, if called from __kmpc_critical:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+#endif
 
   KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
 
@@ -1203,6 +1367,22 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
 #if USE_ITT_BUILD
     __kmp_itt_critical_acquiring(lck);
 #endif
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    if (ompt_enabled.enabled) {
+      ti = __kmp_threads[global_tid]->th.ompt_thread_info;
+      /* OMPT state update */
+      prev_state = ti.state;
+      ti.wait_id = (ompt_wait_id_t)lck;
+      ti.state = omp_state_wait_critical;
+
+      /* OMPT event callback */
+      if (ompt_enabled.ompt_callback_mutex_acquire) {
+        ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
+            ompt_mutex_critical, (unsigned int)hint,
+            __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
+      }
+    }
+#endif
 #if KMP_USE_INLINED_TAS
     if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
       KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
@@ -1225,12 +1405,41 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
 #if USE_ITT_BUILD
     __kmp_itt_critical_acquiring(lck);
 #endif
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    if (ompt_enabled.enabled) {
+      ti = __kmp_threads[global_tid]->th.ompt_thread_info;
+      /* OMPT state update */
+      prev_state = ti.state;
+      ti.wait_id = (ompt_wait_id_t)lck;
+      ti.state = omp_state_wait_critical;
+
+      /* OMPT event callback */
+      if (ompt_enabled.ompt_callback_mutex_acquire) {
+        ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
+            ompt_mutex_critical, (unsigned int)hint,
+            __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
+      }
+    }
+#endif
     KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
   }
 
 #if USE_ITT_BUILD
   __kmp_itt_critical_acquired(lck);
 #endif /* USE_ITT_BUILD */
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
+    /* OMPT state update */
+    ti.state = prev_state;
+    ti.wait_id = 0;
+
+    /* OMPT event callback */
+    if (ompt_enabled.ompt_callback_mutex_acquired) {
+      ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
+          ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
+    }
+  }
+#endif
 
   KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
   KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
@@ -1317,14 +1526,18 @@ void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
   // section directive.
   __kmp_release_user_lock_with_checks(lck, global_tid);
 
-#if OMPT_SUPPORT && OMPT_BLAME
-  if (ompt_enabled &&
-      ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
-    ompt_callbacks.ompt_callback(ompt_event_release_critical)((uint64_t)lck);
+#endif // KMP_USE_DYNAMIC_LOCK
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  /* OMPT release event triggers after lock is released; place here to trigger
+   * for all #if branches */
+  OMPT_STORE_RETURN_ADDRESS(global_tid);
+  if (ompt_enabled.ompt_callback_mutex_released) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
+        ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
   }
 #endif
 
-#endif // KMP_USE_DYNAMIC_LOCK
   KMP_POP_PARTITIONED_TIMER();
   KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
 }
@@ -1349,10 +1562,24 @@ kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
   if (__kmp_env_consistency_check)
     __kmp_check_barrier(global_tid, ct_barrier, loc);
 
+#if OMPT_SUPPORT
+  ompt_frame_t *ompt_frame;
+  if (ompt_enabled.enabled) {
+    __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+    if (ompt_frame->reenter_runtime_frame == NULL)
+      ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+    OMPT_STORE_RETURN_ADDRESS(global_tid);
+  }
+#endif
 #if USE_ITT_NOTIFY
   __kmp_threads[global_tid]->th.th_ident = loc;
 #endif
   status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
+    ompt_frame->reenter_runtime_frame = NULL;
+  }
+#endif
 
   return (status != 0) ? 0 : 1;
 }
@@ -1397,10 +1624,24 @@ kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
     __kmp_check_barrier(global_tid, ct_barrier, loc);
   }
 
+#if OMPT_SUPPORT
+  ompt_frame_t *ompt_frame;
+  if (ompt_enabled.enabled) {
+    __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+    if (ompt_frame->reenter_runtime_frame == NULL)
+      ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+    OMPT_STORE_RETURN_ADDRESS(global_tid);
+  }
+#endif
 #if USE_ITT_NOTIFY
   __kmp_threads[global_tid]->th.th_ident = loc;
 #endif
   __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
+    ompt_frame->reenter_runtime_frame = NULL;
+  }
+#endif
 
   ret = __kmpc_master(loc, global_tid);
 
@@ -1443,26 +1684,33 @@ kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
     KMP_PUSH_PARTITIONED_TIMER(OMP_single);
   }
 
-#if OMPT_SUPPORT && OMPT_TRACE
+#if OMPT_SUPPORT && OMPT_OPTIONAL
   kmp_info_t *this_thr = __kmp_threads[global_tid];
   kmp_team_t *team = this_thr->th.th_team;
   int tid = __kmp_tid_from_gtid(global_tid);
 
-  if (ompt_enabled) {
+  if (ompt_enabled.enabled) {
     if (rc) {
-      if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
-        ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
-            team->t.ompt_team_info.parallel_id,
-            team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
-            team->t.ompt_team_info.microtask);
+      if (ompt_enabled.ompt_callback_work) {
+        ompt_callbacks.ompt_callback(ompt_callback_work)(
+            ompt_work_single_executor, ompt_scope_begin,
+            &(team->t.ompt_team_info.parallel_data),
+            &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
+            1, OMPT_GET_RETURN_ADDRESS(0));
       }
     } else {
-      if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
-        ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
-            team->t.ompt_team_info.parallel_id,
-            team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
+      if (ompt_enabled.ompt_callback_work) {
+        ompt_callbacks.ompt_callback(ompt_callback_work)(
+            ompt_work_single_other, ompt_scope_begin,
+            &(team->t.ompt_team_info.parallel_data),
+            &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
+            1, OMPT_GET_RETURN_ADDRESS(0));
+        ompt_callbacks.ompt_callback(ompt_callback_work)(
+            ompt_work_single_other, ompt_scope_end,
+            &(team->t.ompt_team_info.parallel_data),
+            &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
+            1, OMPT_GET_RETURN_ADDRESS(0));
       }
-      this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
     }
   }
 #endif
@@ -1483,16 +1731,17 @@ void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
   __kmp_exit_single(global_tid);
   KMP_POP_PARTITIONED_TIMER();
 
-#if OMPT_SUPPORT && OMPT_TRACE
+#if OMPT_SUPPORT && OMPT_OPTIONAL
   kmp_info_t *this_thr = __kmp_threads[global_tid];
   kmp_team_t *team = this_thr->th.th_team;
   int tid = __kmp_tid_from_gtid(global_tid);
 
-  if (ompt_enabled &&
-      ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
-    ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
-        team->t.ompt_team_info.parallel_id,
-        team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
+  if (ompt_enabled.ompt_callback_work) {
+    ompt_callbacks.ompt_callback(ompt_callback_work)(
+        ompt_work_single_executor, ompt_scope_end,
+        &(team->t.ompt_team_info.parallel_data),
+        &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
+        OMPT_GET_RETURN_ADDRESS(0));
   }
 #endif
 }
@@ -1507,12 +1756,28 @@ Mark the end of a statically scheduled loop.
 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
   KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
 
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.ompt_callback_work) {
+    ompt_work_type_t ompt_work_type;
     ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
-    ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
-    ompt_callbacks.ompt_callback(ompt_event_loop_end)(team_info->parallel_id,
-                                                      task_info->task_id);
+    ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+    // Determine workshare type
+    if (loc != NULL) {
+      if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
+        ompt_work_type = ompt_work_loop;
+      } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
+        ompt_work_type = ompt_work_sections;
+      } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
+        ompt_work_type = ompt_work_distribute;
+      } else {
+        KMP_ASSERT2(0,
+                    "__kmpc_for_static_fini: can't determine workshare type");
+      }
+      KMP_DEBUG_ASSERT(ompt_work_type);
+    }
+    ompt_callbacks.ompt_callback(ompt_callback_work)(
+        ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
+        &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
   }
 #endif
 
@@ -1709,6 +1974,15 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
   if (didit)
     *data_ptr = cpy_data;
 
+#if OMPT_SUPPORT
+  ompt_frame_t *ompt_frame;
+  if (ompt_enabled.enabled) {
+    __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+    if (ompt_frame->reenter_runtime_frame == NULL)
+      ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+  }
+#endif
 /* This barrier is not a barrier region boundary */
 #if USE_ITT_NOTIFY
   __kmp_threads[gtid]->th.th_ident = loc;
@@ -1721,11 +1995,21 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
 // Consider next barrier a user-visible barrier for barrier region boundaries
 // Nesting checks are already handled by the single construct checks
 
+#if OMPT_SUPPORT
+  if (ompt_enabled.enabled) {
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+  }
+#endif
 #if USE_ITT_NOTIFY
   __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
 // tasks can overwrite the location)
 #endif
   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
+    ompt_frame->reenter_runtime_frame = NULL;
+  }
+#endif
 }
 
 /* -------------------------------------------------------------------------- */
@@ -1812,6 +2096,19 @@ void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
   }
 
   __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_lock_init) {
+    ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
+        ompt_mutex_lock, (omp_lock_hint_t)hint,
+        __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
+        codeptr);
+  }
+#endif
 }
 
 /* initialize the lock with a hint */
@@ -1823,6 +2120,19 @@ void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
   }
 
   __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_lock_init) {
+    ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
+        ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
+        __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
+        codeptr);
+  }
+#endif
 }
 
 #endif // KMP_USE_DYNAMIC_LOCK
@@ -1837,6 +2147,19 @@ void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
   }
   __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_lock_init) {
+    ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
+        ompt_mutex_lock, omp_lock_hint_none,
+        __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
+        codeptr);
+  }
+#endif
+
 #else // KMP_USE_DYNAMIC_LOCK
 
   static char const *const func = "omp_init_lock";
@@ -1867,9 +2190,15 @@ void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
   INIT_LOCK(lck);
   __kmp_set_user_lock_location(lck, loc);
 
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_init_lock)) {
-    ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t)lck);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_lock_init) {
+    ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
+        ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
+        (ompt_wait_id_t)user_lock, codeptr);
   }
 #endif
 
@@ -1890,6 +2219,19 @@ void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
   }
   __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_lock_init) {
+    ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
+        ompt_mutex_nest_lock, omp_lock_hint_none,
+        __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
+        codeptr);
+  }
+#endif
+
 #else // KMP_USE_DYNAMIC_LOCK
 
   static char const *const func = "omp_init_nest_lock";
@@ -1923,9 +2265,15 @@ void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
   INIT_NESTED_LOCK(lck);
   __kmp_set_user_lock_location(lck, loc);
 
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) {
-    ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t)lck);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_lock_init) {
+    ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
+        ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
+        (ompt_wait_id_t)user_lock, codeptr);
   }
 #endif
 
@@ -1948,6 +2296,22 @@ void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
   }
   __kmp_itt_lock_destroyed(lck);
 #endif
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_lock_destroy) {
+    kmp_user_lock_p lck;
+    if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
+      lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
+    } else {
+      lck = (kmp_user_lock_p)user_lock;
+    }
+    ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
+        ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
+  }
+#endif
   KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
 #else
   kmp_user_lock_p lck;
@@ -1966,9 +2330,14 @@ void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
     lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
   }
 
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) {
-    ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t)lck);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_lock_destroy) {
+    ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
+        ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
   }
 #endif
 
@@ -2001,6 +2370,16 @@ void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
   kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
   __kmp_itt_lock_destroyed(ilk->lock);
 #endif
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_lock_destroy) {
+    ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
+        ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
+  }
+#endif
   KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
 
 #else // KMP_USE_DYNAMIC_LOCK
@@ -2023,10 +2402,14 @@ void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
     lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
   }
 
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled &&
-      ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) {
-    ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t)lck);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_lock_destroy) {
+    ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
+        ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
   }
 #endif
 
@@ -2063,6 +2446,18 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
       (kmp_user_lock_p)
           user_lock); // itt function will get to the right lock object.
 #endif
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_mutex_acquire) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
+        ompt_mutex_lock, omp_lock_hint_none,
+        __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
+        codeptr);
+  }
+#endif
 #if KMP_USE_INLINED_TAS
   if (tag == locktag_tas && !__kmp_env_consistency_check) {
     KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
@@ -2078,6 +2473,12 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
 #if USE_ITT_BUILD
   __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
 #endif
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.ompt_callback_mutex_acquired) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
+        ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
+  }
+#endif
 
 #else // KMP_USE_DYNAMIC_LOCK
 
@@ -2100,6 +2501,17 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
 #if USE_ITT_BUILD
   __kmp_itt_lock_acquiring(lck);
 #endif /* USE_ITT_BUILD */
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_mutex_acquire) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
+        ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
+        (ompt_wait_id_t)lck, codeptr);
+  }
+#endif
 
   ACQUIRE_LOCK(lck, gtid);
 
@@ -2107,9 +2519,10 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
   __kmp_itt_lock_acquired(lck);
 #endif /* USE_ITT_BUILD */
 
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) {
-    ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t)lck);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.ompt_callback_mutex_acquired) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
+        ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
   }
 #endif
 
@@ -2122,14 +2535,41 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
 #if USE_ITT_BUILD
   __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
 #endif
-  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.enabled) {
+    if (ompt_enabled.ompt_callback_mutex_acquire) {
+      ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
+          ompt_mutex_nest_lock, omp_lock_hint_none,
+          __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
+          codeptr);
+    }
+  }
+#endif
+  int acquire_status =
+      KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
 #if USE_ITT_BUILD
   __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
 #endif
 
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled) {
-    // missing support here: need to know whether acquired first or not
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
+    if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
+      if (ompt_enabled.ompt_callback_mutex_acquired) {
+        // lock_first
+        ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
+            ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
+      }
+    } else {
+      if (ompt_enabled.ompt_callback_nest_lock) {
+        // lock_next
+        ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
+            ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
+      }
+    }
   }
 #endif
 
@@ -2156,6 +2596,19 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
 #if USE_ITT_BUILD
   __kmp_itt_lock_acquiring(lck);
 #endif /* USE_ITT_BUILD */
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.enabled) {
+    if (ompt_enabled.ompt_callback_mutex_acquire) {
+      ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
+          ompt_mutex_nest_lock, omp_lock_hint_none,
+          __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
+    }
+  }
+#endif
 
   ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
 
@@ -2163,16 +2616,20 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
   __kmp_itt_lock_acquired(lck);
 #endif /* USE_ITT_BUILD */
 
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
     if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
-      if (ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first))
-        ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)(
-            (uint64_t)lck);
+      if (ompt_enabled.ompt_callback_mutex_acquired) {
+        // lock_first
+        ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
+            ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
+      }
     } else {
-      if (ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next))
-        ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)(
-            (uint64_t)lck);
+      if (ompt_enabled.ompt_callback_nest_lock) {
+        // lock_next
+        ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
+            ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
+      }
     }
   }
 #endif
@@ -2200,6 +2657,17 @@ void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
     __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
   }
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_mutex_released) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
+        ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
+  }
+#endif
+
 #else // KMP_USE_DYNAMIC_LOCK
 
   kmp_user_lock_p lck;
@@ -2217,6 +2685,18 @@ void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
 #endif /* USE_ITT_BUILD */
     TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
     KMP_MB();
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    // This is the case, if called from omp_init_lock_with_hint:
+    void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+    if (!codeptr)
+      codeptr = OMPT_GET_RETURN_ADDRESS(0);
+    if (ompt_enabled.ompt_callback_mutex_released) {
+      ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
+          ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
+    }
+#endif
+
     return;
 #else
     lck = (kmp_user_lock_p)user_lock;
@@ -2238,9 +2718,14 @@ void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
 
   RELEASE_LOCK(lck, gtid);
 
-#if OMPT_SUPPORT && OMPT_BLAME
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
-    ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t)lck);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_mutex_released) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
+        ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
   }
 #endif
 
@@ -2254,7 +2739,28 @@ void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
 #if USE_ITT_BUILD
   __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
 #endif
-  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
+  int release_status =
+      KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.enabled) {
+    if (release_status == KMP_LOCK_RELEASED) {
+      if (ompt_enabled.ompt_callback_mutex_released) {
+        // release_lock_last
+        ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
+            ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
+      }
+    } else if (ompt_enabled.ompt_callback_nest_lock) {
+      // release_lock_prev
+      ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
+          ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
+    }
+  }
+#endif
 
 #else // KMP_USE_DYNAMIC_LOCK
 
@@ -2272,10 +2778,39 @@ void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
 #if USE_ITT_BUILD
     __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
 #endif /* USE_ITT_BUILD */
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    int release_status = KMP_LOCK_STILL_HELD;
+#endif
+
     if (--(tl->lk.depth_locked) == 0) {
       TCW_4(tl->lk.poll, 0);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+      release_status = KMP_LOCK_RELEASED;
+#endif
     }
     KMP_MB();
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    // This is the case, if called from omp_init_lock_with_hint:
+    void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+    if (!codeptr)
+      codeptr = OMPT_GET_RETURN_ADDRESS(0);
+    if (ompt_enabled.enabled) {
+      if (release_status == KMP_LOCK_RELEASED) {
+        if (ompt_enabled.ompt_callback_mutex_released) {
+          // release_lock_last
+          ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
+              ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
+        }
+      } else if (ompt_enabled.ompt_callback_nest_lock) {
+        // release_lock_previous
+        ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
+            ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
+      }
+    }
+#endif
+
     return;
 #else
     lck = (kmp_user_lock_p)user_lock;
@@ -2298,17 +2833,22 @@ void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
 
   int release_status;
   release_status = RELEASE_NESTED_LOCK(lck, gtid);
-#if OMPT_SUPPORT && OMPT_BLAME
-  if (ompt_enabled) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.enabled) {
     if (release_status == KMP_LOCK_RELEASED) {
-      if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
-        ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
-            (uint64_t)lck);
+      if (ompt_enabled.ompt_callback_mutex_released) {
+        // release_lock_last
+        ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
+            ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
       }
-    } else if (ompt_callbacks.ompt_callback(
-                   ompt_event_release_nest_lock_prev)) {
-      ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
-          (uint64_t)lck);
+    } else if (ompt_enabled.ompt_callback_nest_lock) {
+      // release_lock_previous
+      ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
+          ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
     }
   }
 #endif
@@ -2326,6 +2866,18 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
 #if USE_ITT_BUILD
   __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
 #endif
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_mutex_acquire) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
+        ompt_mutex_lock, omp_lock_hint_none,
+        __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
+        codeptr);
+  }
+#endif
 #if KMP_USE_INLINED_TAS
   if (tag == locktag_tas && !__kmp_env_consistency_check) {
     KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
@@ -2342,6 +2894,12 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
 #if USE_ITT_BUILD
     __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
 #endif
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    if (ompt_enabled.ompt_callback_mutex_acquired) {
+      ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
+          ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
+    }
+#endif
     return FTN_TRUE;
   } else {
 #if USE_ITT_BUILD
@@ -2372,6 +2930,17 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
 #if USE_ITT_BUILD
   __kmp_itt_lock_acquiring(lck);
 #endif /* USE_ITT_BUILD */
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_mutex_acquire) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
+        ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
+        (ompt_wait_id_t)lck, codeptr);
+  }
+#endif
 
   rc = TEST_LOCK(lck, gtid);
 #if USE_ITT_BUILD
@@ -2381,6 +2950,13 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
     __kmp_itt_lock_cancelled(lck);
   }
 #endif /* USE_ITT_BUILD */
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
+        ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
+  }
+#endif
+
   return (rc ? FTN_TRUE : FTN_FALSE);
 
 /* Can't use serial interval since not block structured */
@@ -2395,6 +2971,18 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
 #if USE_ITT_BUILD
   __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
 #endif
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.ompt_callback_mutex_acquire) {
+    ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
+        ompt_mutex_nest_lock, omp_lock_hint_none,
+        __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
+        codeptr);
+  }
+#endif
   rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
 #if USE_ITT_BUILD
   if (rc) {
@@ -2403,6 +2991,23 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
     __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
   }
 #endif
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled && rc) {
+    if (rc == 1) {
+      if (ompt_enabled.ompt_callback_mutex_acquired) {
+        // lock_first
+        ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
+            ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
+      }
+    } else {
+      if (ompt_enabled.ompt_callback_nest_lock) {
+        // lock_next
+        ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
+            ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
+      }
+    }
+  }
+#endif
   return rc;
 
 #else // KMP_USE_DYNAMIC_LOCK
@@ -2430,6 +3035,19 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
   __kmp_itt_lock_acquiring(lck);
 #endif /* USE_ITT_BUILD */
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  // This is the case, if called from omp_init_lock_with_hint:
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (!codeptr)
+    codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  if (ompt_enabled.enabled) &&
+        ompt_enabled.ompt_callback_mutex_acquire) {
+      ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
+          ompt_mutex_nest_lock, omp_lock_hint_none,
+          __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
+    }
+#endif
+
   rc = TEST_NESTED_LOCK(lck, gtid);
 #if USE_ITT_BUILD
   if (rc) {
@@ -2438,6 +3056,23 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
     __kmp_itt_lock_cancelled(lck);
   }
 #endif /* USE_ITT_BUILD */
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled && rc) {
+    if (rc == 1) {
+      if (ompt_enabled.ompt_callback_mutex_acquired) {
+        // lock_first
+        ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
+            ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
+      }
+    } else {
+      if (ompt_enabled.ompt_callback_nest_lock) {
+        // lock_next
+        ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
+            ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
+      }
+    }
+  }
+#endif
   return rc;
 
 /* Can't use serial interval since not block structured */
@@ -2697,6 +3332,19 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
 // this barrier should be invisible to a customer and to the threading profile
 // tool (it's neither a terminating barrier nor customer's code, it's
 // used for an internal purpose)
+#if OMPT_SUPPORT
+    // JP: can this barrier potentially leed to task scheduling?
+    // JP: as long as there is a barrier in the implementation, OMPT should and
+    // will provide the barrier events
+    //         so we set-up the necessary frame/return addresses.
+    ompt_frame_t *ompt_frame;
+    if (ompt_enabled.enabled) {
+      __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+      if (ompt_frame->reenter_runtime_frame == NULL)
+        ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+      OMPT_STORE_RETURN_ADDRESS(global_tid);
+    }
+#endif
 #if USE_ITT_NOTIFY
     __kmp_threads[global_tid]->th.th_ident = loc;
 #endif
@@ -2704,6 +3352,11 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
         __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
                       global_tid, FALSE, reduce_size, reduce_data, reduce_func);
     retval = (retval != 0) ? (0) : (1);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    if (ompt_enabled.enabled) {
+      ompt_frame->reenter_runtime_frame = NULL;
+    }
+#endif
 
     // all other workers except master should do this pop here
     //     ( none of other workers will get to __kmpc_end_reduce_nowait() )
@@ -2859,6 +3512,15 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
 // case tree_reduce_block:
 // this barrier should be visible to a customer and to the threading profile
 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
+#if OMPT_SUPPORT
+    ompt_frame_t *ompt_frame;
+    if (ompt_enabled.enabled) {
+      __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+      if (ompt_frame->reenter_runtime_frame == NULL)
+        ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+      OMPT_STORE_RETURN_ADDRESS(global_tid);
+    }
+#endif
 #if USE_ITT_NOTIFY
     __kmp_threads[global_tid]->th.th_ident =
         loc; // needed for correct notification of frames
@@ -2867,6 +3529,11 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
         __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
                       global_tid, TRUE, reduce_size, reduce_data, reduce_func);
     retval = (retval != 0) ? (0) : (1);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    if (ompt_enabled.enabled) {
+      ompt_frame->reenter_runtime_frame = NULL;
+    }
+#endif
 
     // all other workers except master should do this pop here
     // ( none of other workers except master will enter __kmpc_end_reduce() )
@@ -2916,28 +3583,70 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
     __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
 
 // TODO: implicit barrier: should be exposed
+#if OMPT_SUPPORT
+    ompt_frame_t *ompt_frame;
+    if (ompt_enabled.enabled) {
+      __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+      if (ompt_frame->reenter_runtime_frame == NULL)
+        ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+      OMPT_STORE_RETURN_ADDRESS(global_tid);
+    }
+#endif
 #if USE_ITT_NOTIFY
     __kmp_threads[global_tid]->th.th_ident = loc;
 #endif
     __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    if (ompt_enabled.enabled) {
+      ompt_frame->reenter_runtime_frame = NULL;
+    }
+#endif
 
   } else if (packed_reduction_method == empty_reduce_block) {
 
 // usage: if team size==1, no synchronization is required (Intel platforms only)
 
 // TODO: implicit barrier: should be exposed
+#if OMPT_SUPPORT
+    ompt_frame_t *ompt_frame;
+    if (ompt_enabled.enabled) {
+      __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+      if (ompt_frame->reenter_runtime_frame == NULL)
+        ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+      OMPT_STORE_RETURN_ADDRESS(global_tid);
+    }
+#endif
 #if USE_ITT_NOTIFY
     __kmp_threads[global_tid]->th.th_ident = loc;
 #endif
     __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    if (ompt_enabled.enabled) {
+      ompt_frame->reenter_runtime_frame = NULL;
+    }
+#endif
 
   } else if (packed_reduction_method == atomic_reduce_block) {
 
+#if OMPT_SUPPORT
+    ompt_frame_t *ompt_frame;
+    if (ompt_enabled.enabled) {
+      __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+      if (ompt_frame->reenter_runtime_frame == NULL)
+        ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+      OMPT_STORE_RETURN_ADDRESS(global_tid);
+    }
+#endif
 // TODO: implicit barrier: should be exposed
 #if USE_ITT_NOTIFY
     __kmp_threads[global_tid]->th.th_ident = loc;
 #endif
     __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    if (ompt_enabled.enabled) {
+      ompt_frame->reenter_runtime_frame = NULL;
+    }
+#endif
 
   } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
                                    tree_reduce_block)) {
index 9a57613..cb2bc3f 100644 (file)
@@ -36,7 +36,6 @@
 #endif
 
 #if OMPT_SUPPORT
-#include "ompt-internal.h"
 #include "ompt-specific.h"
 #endif
 
@@ -1230,12 +1229,14 @@ __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
   }
 #endif // ( KMP_STATIC_STEAL_ENABLED )
 
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.ompt_callback_work) {
     ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
-    ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
-    ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
-        team_info->parallel_id, task_info->task_id, team_info->microtask);
+    ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+    kmp_info_t *thr = __kmp_threads[gtid];
+    ompt_callbacks.ompt_callback(ompt_callback_work)(
+        ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data),
+        &(task_info->task_data), tc, OMPT_LOAD_RETURN_ADDRESS(gtid));
   }
 #endif
 }
@@ -1390,16 +1391,18 @@ static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) {
 /* Define a macro for exiting __kmp_dispatch_next(). If status is 0 (no more
    work), then tell OMPT the loop is over. In some cases kmp_dispatch_fini()
    is not called. */
-#if OMPT_SUPPORT && OMPT_TRACE
+#if OMPT_SUPPORT && OMPT_OPTIONAL
 #define OMPT_LOOP_END                                                          \
   if (status == 0) {                                                           \
-    if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_end)) {   \
+    if (ompt_enabled.ompt_callback_work) {                                     \
       ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);              \
-      ompt_task_info_t *task_info = __ompt_get_taskinfo(0);                    \
-      ompt_callbacks.ompt_callback(ompt_event_loop_end)(                       \
-          team_info->parallel_id, task_info->task_id);                         \
+      ompt_task_info_t *task_info = __ompt_get_task_info_object(0);            \
+      ompt_callbacks.ompt_callback(ompt_callback_work)(                        \
+          ompt_work_loop, ompt_scope_end, &(team_info->parallel_data),         \
+          &(task_info->task_data), 0, codeptr);                                \
     }                                                                          \
   }
+// TODO: implement count
 #else
 #define OMPT_LOOP_END // no-op
 #endif
@@ -1407,7 +1410,12 @@ static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) {
 template <typename T>
 static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
                                T *p_lb, T *p_ub,
-                               typename traits_t<T>::signed_t *p_st) {
+                               typename traits_t<T>::signed_t *p_st
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+                               ,
+                               void *codeptr
+#endif
+                               ) {
 
   typedef typename traits_t<T>::unsigned_t UT;
   typedef typename traits_t<T>::signed_t ST;
@@ -2527,6 +2535,9 @@ void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
                             enum sched_type schedule, kmp_int32 lb,
                             kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) {
   KMP_DEBUG_ASSERT(__kmp_init_serial);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
   __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true);
 }
 /*!
@@ -2536,6 +2547,9 @@ void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
                              enum sched_type schedule, kmp_uint32 lb,
                              kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) {
   KMP_DEBUG_ASSERT(__kmp_init_serial);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
   __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true);
 }
 
@@ -2546,6 +2560,9 @@ void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
                             enum sched_type schedule, kmp_int64 lb,
                             kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) {
   KMP_DEBUG_ASSERT(__kmp_init_serial);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
   __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true);
 }
 
@@ -2556,6 +2573,9 @@ void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
                              enum sched_type schedule, kmp_uint64 lb,
                              kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) {
   KMP_DEBUG_ASSERT(__kmp_init_serial);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
   __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true);
 }
 
@@ -2573,6 +2593,9 @@ void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
                                  kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
                                  kmp_int32 chunk) {
   KMP_DEBUG_ASSERT(__kmp_init_serial);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
   __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st);
   __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true);
 }
@@ -2582,6 +2605,9 @@ void __kmpc_dist_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
                                   kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
                                   kmp_int32 chunk) {
   KMP_DEBUG_ASSERT(__kmp_init_serial);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
   __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st);
   __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true);
 }
@@ -2591,6 +2617,9 @@ void __kmpc_dist_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
                                  kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
                                  kmp_int64 chunk) {
   KMP_DEBUG_ASSERT(__kmp_init_serial);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
   __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st);
   __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true);
 }
@@ -2600,6 +2629,9 @@ void __kmpc_dist_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
                                   kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
                                   kmp_int64 chunk) {
   KMP_DEBUG_ASSERT(__kmp_init_serial);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
   __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st);
   __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true);
 }
@@ -2619,7 +2651,15 @@ If there is no more work, then the lb,ub and stride need not be modified.
 */
 int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
                            kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) {
-  return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+                                        ,
+                                        OMPT_LOAD_RETURN_ADDRESS(gtid)
+#endif
+                                            );
 }
 
 /*!
@@ -2628,7 +2668,15 @@ See @ref __kmpc_dispatch_next_4
 int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
                             kmp_uint32 *p_lb, kmp_uint32 *p_ub,
                             kmp_int32 *p_st) {
-  return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+                                         ,
+                                         OMPT_LOAD_RETURN_ADDRESS(gtid)
+#endif
+                                             );
 }
 
 /*!
@@ -2636,7 +2684,15 @@ See @ref __kmpc_dispatch_next_4
 */
 int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
                            kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) {
-  return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+                                        ,
+                                        OMPT_LOAD_RETURN_ADDRESS(gtid)
+#endif
+                                            );
 }
 
 /*!
@@ -2645,7 +2701,15 @@ See @ref __kmpc_dispatch_next_4
 int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
                             kmp_uint64 *p_lb, kmp_uint64 *p_ub,
                             kmp_int64 *p_st) {
-  return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+                                         ,
+                                         OMPT_LOAD_RETURN_ADDRESS(gtid)
+#endif
+                                             );
 }
 
 /*!
index fc05f69..7efb622 100644 (file)
 
 #include "kmp_i18n.h"
 
+#if OMPT_SUPPORT
+#include "ompt-specific.h"
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif // __cplusplus
@@ -340,6 +344,26 @@ int FTN_STDCALL xexpand(FTN_GET_MAX_THREADS)(void) {
 #endif
 }
 
+#if OMP_50_ENABLED
+int FTN_STDCALL FTN_CONTROL_TOOL(uint64_t command, uint64_t modifier,
+                                 void *arg) {
+#if defined(KMP_STUB) || !OMPT_SUPPORT
+  return -2;
+#else
+  OMPT_STORE_RETURN_ADDRESS(__kmp_entry_gtid());
+  if (!TCR_4(__kmp_init_middle)) {
+    return -2;
+  }
+  kmp_info_t *this_thr = __kmp_threads[__kmp_entry_gtid()];
+  ompt_task_info_t *parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
+  parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+  int ret = __kmp_control_tool(command, modifier, arg);
+  parent_task_info->frame.reenter_runtime_frame = 0;
+  return ret;
+#endif
+}
+#endif
+
 int FTN_STDCALL xexpand(FTN_GET_THREAD_NUM)(void) {
 #ifdef KMP_STUB
   return 0;
@@ -873,8 +897,11 @@ void FTN_STDCALL FTN_INIT_LOCK_WITH_HINT(void **user_lock,
 #ifdef KMP_STUB
   *((kmp_stub_lock_t *)user_lock) = UNLOCKED;
 #else
-  __kmpc_init_lock_with_hint(NULL, __kmp_entry_gtid(), user_lock,
-                             KMP_DEREF hint);
+  int gtid = __kmp_entry_gtid();
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  __kmpc_init_lock_with_hint(NULL, gtid, user_lock, KMP_DEREF hint);
 #endif
 }
 
@@ -883,8 +910,11 @@ void FTN_STDCALL FTN_INIT_NEST_LOCK_WITH_HINT(void **user_lock,
 #ifdef KMP_STUB
   *((kmp_stub_lock_t *)user_lock) = UNLOCKED;
 #else
-  __kmpc_init_nest_lock_with_hint(NULL, __kmp_entry_gtid(), user_lock,
-                                  KMP_DEREF hint);
+  int gtid = __kmp_entry_gtid();
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  __kmpc_init_nest_lock_with_hint(NULL, gtid, user_lock, KMP_DEREF hint);
 #endif
 }
 #endif
@@ -894,7 +924,11 @@ void FTN_STDCALL xexpand(FTN_INIT_LOCK)(void **user_lock) {
 #ifdef KMP_STUB
   *((kmp_stub_lock_t *)user_lock) = UNLOCKED;
 #else
-  __kmpc_init_lock(NULL, __kmp_entry_gtid(), user_lock);
+  int gtid = __kmp_entry_gtid();
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  __kmpc_init_lock(NULL, gtid, user_lock);
 #endif
 }
 
@@ -903,7 +937,11 @@ void FTN_STDCALL xexpand(FTN_INIT_NEST_LOCK)(void **user_lock) {
 #ifdef KMP_STUB
   *((kmp_stub_lock_t *)user_lock) = UNLOCKED;
 #else
-  __kmpc_init_nest_lock(NULL, __kmp_entry_gtid(), user_lock);
+  int gtid = __kmp_entry_gtid();
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  __kmpc_init_nest_lock(NULL, gtid, user_lock);
 #endif
 }
 
@@ -911,7 +949,11 @@ void FTN_STDCALL xexpand(FTN_DESTROY_LOCK)(void **user_lock) {
 #ifdef KMP_STUB
   *((kmp_stub_lock_t *)user_lock) = UNINIT;
 #else
-  __kmpc_destroy_lock(NULL, __kmp_entry_gtid(), user_lock);
+  int gtid = __kmp_entry_gtid();
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  __kmpc_destroy_lock(NULL, gtid, user_lock);
 #endif
 }
 
@@ -919,7 +961,11 @@ void FTN_STDCALL xexpand(FTN_DESTROY_NEST_LOCK)(void **user_lock) {
 #ifdef KMP_STUB
   *((kmp_stub_lock_t *)user_lock) = UNINIT;
 #else
-  __kmpc_destroy_nest_lock(NULL, __kmp_entry_gtid(), user_lock);
+  int gtid = __kmp_entry_gtid();
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  __kmpc_destroy_nest_lock(NULL, gtid, user_lock);
 #endif
 }
 
@@ -933,7 +979,11 @@ void FTN_STDCALL xexpand(FTN_SET_LOCK)(void **user_lock) {
   }
   *((kmp_stub_lock_t *)user_lock) = LOCKED;
 #else
-  __kmpc_set_lock(NULL, __kmp_entry_gtid(), user_lock);
+  int gtid = __kmp_entry_gtid();
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  __kmpc_set_lock(NULL, gtid, user_lock);
 #endif
 }
 
@@ -944,7 +994,11 @@ void FTN_STDCALL xexpand(FTN_SET_NEST_LOCK)(void **user_lock) {
   }
   (*((int *)user_lock))++;
 #else
-  __kmpc_set_nest_lock(NULL, __kmp_entry_gtid(), user_lock);
+  int gtid = __kmp_entry_gtid();
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  __kmpc_set_nest_lock(NULL, gtid, user_lock);
 #endif
 }
 
@@ -958,7 +1012,11 @@ void FTN_STDCALL xexpand(FTN_UNSET_LOCK)(void **user_lock) {
   }
   *((kmp_stub_lock_t *)user_lock) = UNLOCKED;
 #else
-  __kmpc_unset_lock(NULL, __kmp_entry_gtid(), user_lock);
+  int gtid = __kmp_entry_gtid();
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  __kmpc_unset_lock(NULL, gtid, user_lock);
 #endif
 }
 
@@ -972,7 +1030,11 @@ void FTN_STDCALL xexpand(FTN_UNSET_NEST_LOCK)(void **user_lock) {
   }
   (*((int *)user_lock))--;
 #else
-  __kmpc_unset_nest_lock(NULL, __kmp_entry_gtid(), user_lock);
+  int gtid = __kmp_entry_gtid();
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  __kmpc_unset_nest_lock(NULL, gtid, user_lock);
 #endif
 }
 
@@ -987,7 +1049,11 @@ int FTN_STDCALL xexpand(FTN_TEST_LOCK)(void **user_lock) {
   *((kmp_stub_lock_t *)user_lock) = LOCKED;
   return 1;
 #else
-  return __kmpc_test_lock(NULL, __kmp_entry_gtid(), user_lock);
+  int gtid = __kmp_entry_gtid();
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  return __kmpc_test_lock(NULL, gtid, user_lock);
 #endif
 }
 
@@ -998,7 +1064,11 @@ int FTN_STDCALL xexpand(FTN_TEST_NEST_LOCK)(void **user_lock) {
   }
   return ++(*((int *)user_lock));
 #else
-  return __kmpc_test_nest_lock(NULL, __kmp_entry_gtid(), user_lock);
+  int gtid = __kmp_entry_gtid();
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+  return __kmpc_test_nest_lock(NULL, gtid, user_lock);
 #endif
 }
 
index 93c0228..3cbbed0 100644 (file)
 #endif
 #endif
 
+#if OMP_50_ENABLED
+#define FTN_CONTROL_TOOL omp_control_tool
+#endif
+
 #endif /* KMP_FTN_PLAIN */
 
 /* ------------------------------------------------------------------------ */
 #endif
 #endif
 
+#if OMP_50_ENABLED
+#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL
+#endif
+
 #endif /* KMP_FTN_APPEND */
 
 /* ------------------------------------------------------------------------ */
 #endif
 #endif
 
+#if OMP_50_ENABLED
+#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL
+#endif
+
 #endif /* KMP_FTN_UPPER */
 
 /* ------------------------------------------------------------------------ */
 #endif
 #endif
 
+#if OMP_50_ENABLED
+#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL_
+#endif
+
 #endif /* KMP_FTN_UAPPEND */
 
 /* -------------------------- GOMP API NAMES ------------------------ */
index f97abaa..f8a2cde 100644 (file)
@@ -303,6 +303,10 @@ kmp_int32 __kmp_max_task_priority = 0;
 kmp_uint64 __kmp_taskloop_min_tasks = 0;
 #endif
 
+#if OMP_50_ENABLED && OMPT_SUPPORT
+char const *__kmp_tool_libraries = NULL;
+#endif
+
 /* This check ensures that the compiler is passing the correct data type for the
    flags formal parameter of the function kmpc_omp_task_alloc(). If the type is
    not a 4-byte type, then give an error message about a non-positive length
index b7045cf..6658259 100644 (file)
@@ -31,14 +31,20 @@ void xexpand(KMP_API_NAME_GOMP_BARRIER)(void) {
   int gtid = __kmp_entry_gtid();
   MKLOC(loc, "GOMP_barrier");
   KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid));
-#if OMPT_SUPPORT && OMPT_TRACE
+#if OMPT_SUPPORT && OMPT_OPTIONAL
   ompt_frame_t *ompt_frame;
-  if (ompt_enabled) {
-    ompt_frame = __ompt_get_task_frame_internal(0);
-    ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
+  if (ompt_enabled.enabled) {
+    __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+    ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+    OMPT_STORE_RETURN_ADDRESS(gtid);
   }
 #endif
   __kmpc_barrier(&loc, gtid);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
+    ompt_frame->reenter_runtime_frame = NULL;
+  }
+#endif
 }
 
 // Mutual exclusion
@@ -56,6 +62,9 @@ void xexpand(KMP_API_NAME_GOMP_CRITICAL_START)(void) {
   int gtid = __kmp_entry_gtid();
   MKLOC(loc, "GOMP_critical_start");
   KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid));
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
   __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr);
 }
 
@@ -63,6 +72,9 @@ void xexpand(KMP_API_NAME_GOMP_CRITICAL_END)(void) {
   int gtid = __kmp_get_gtid();
   MKLOC(loc, "GOMP_critical_end");
   KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid));
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
   __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr);
 }
 
@@ -111,7 +123,40 @@ int xexpand(KMP_API_NAME_GOMP_SINGLE_START)(void) {
   // 3rd parameter == FALSE prevents kmp_enter_single from pushing a
   // workshare when USE_CHECKS is defined.  We need to avoid the push,
   // as there is no corresponding GOMP_single_end() call.
-  return __kmp_enter_single(gtid, &loc, FALSE);
+  kmp_int32 rc = __kmp_enter_single(gtid, &loc, FALSE);
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  kmp_info_t *this_thr = __kmp_threads[gtid];
+  kmp_team_t *team = this_thr->th.th_team;
+  int tid = __kmp_tid_from_gtid(gtid);
+
+  if (ompt_enabled.enabled) {
+    if (rc) {
+      if (ompt_enabled.ompt_callback_work) {
+        ompt_callbacks.ompt_callback(ompt_callback_work)(
+            ompt_work_single_executor, ompt_scope_begin,
+            &(team->t.ompt_team_info.parallel_data),
+            &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
+            1, OMPT_GET_RETURN_ADDRESS(0));
+      }
+    } else {
+      if (ompt_enabled.ompt_callback_work) {
+        ompt_callbacks.ompt_callback(ompt_callback_work)(
+            ompt_work_single_other, ompt_scope_begin,
+            &(team->t.ompt_team_info.parallel_data),
+            &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
+            1, OMPT_GET_RETURN_ADDRESS(0));
+        ompt_callbacks.ompt_callback(ompt_callback_work)(
+            ompt_work_single_other, ompt_scope_end,
+            &(team->t.ompt_team_info.parallel_data),
+            &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
+            1, OMPT_GET_RETURN_ADDRESS(0));
+      }
+    }
+  }
+#endif
+
+  return rc;
 }
 
 void *xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) {
@@ -129,14 +174,33 @@ void *xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) {
   if (__kmp_enter_single(gtid, &loc, FALSE))
     return NULL;
 
-  // Wait for the first thread to set the copyprivate data pointer,
-  // and for all other threads to reach this point.
+// Wait for the first thread to set the copyprivate data pointer,
+// and for all other threads to reach this point.
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  ompt_frame_t *ompt_frame;
+  if (ompt_enabled.enabled) {
+    __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+    ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+  }
+#endif
   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
 
   // Retrieve the value of the copyprivate data point, and wait for all
   // threads to do likewise, then return.
   retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data;
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+  }
+#endif
   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
+    ompt_frame->reenter_runtime_frame = NULL;
+  }
+#endif
   return retval;
 }
 
@@ -149,14 +213,35 @@ void xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) {
   // continuing, so that the know that the copyprivate data pointer has been
   // propagated to all threads before trying to reuse the t_copypriv_data field.
   __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data;
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  ompt_frame_t *ompt_frame;
+  if (ompt_enabled.enabled) {
+    __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+    ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+  }
+#endif
   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+  }
+#endif
   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
+    ompt_frame->reenter_runtime_frame = NULL;
+  }
+#endif
 }
 
 void xexpand(KMP_API_NAME_GOMP_ORDERED_START)(void) {
   int gtid = __kmp_entry_gtid();
   MKLOC(loc, "GOMP_ordered_start");
   KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
   __kmpc_ordered(&loc, gtid);
 }
 
@@ -164,6 +249,9 @@ void xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void) {
   int gtid = __kmp_get_gtid();
   MKLOC(loc, "GOMP_ordered_end");
   KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
   __kmpc_end_ordered(&loc, gtid);
 }
 
@@ -197,26 +285,26 @@ static
 #if OMPT_SUPPORT
   kmp_info_t *thr;
   ompt_frame_t *ompt_frame;
-  ompt_state_t enclosing_state;
+  omp_state_t enclosing_state;
 
-  if (ompt_enabled) {
+  if (ompt_enabled.enabled) {
     // get pointer to thread data structure
     thr = __kmp_threads[*gtid];
 
     // save enclosing task state; set current state for task
     enclosing_state = thr->th.ompt_thread_info.state;
-    thr->th.ompt_thread_info.state = ompt_state_work_parallel;
+    thr->th.ompt_thread_info.state = omp_state_work_parallel;
 
     // set task frame
-    ompt_frame = __ompt_get_task_frame_internal(0);
-    ompt_frame->exit_runtime_frame = __builtin_frame_address(0);
+    __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+    ompt_frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0);
   }
 #endif
 
   task(data);
 
 #if OMPT_SUPPORT
-  if (ompt_enabled) {
+  if (ompt_enabled.enabled) {
     // clear task frame
     ompt_frame->exit_runtime_frame = NULL;
 
@@ -236,24 +324,29 @@ static
                                           enum sched_type schedule, long start,
                                           long end, long incr,
                                           long chunk_size) {
-  // Intialize the loop worksharing construct.
+// Intialize the loop worksharing construct.
+
+#if OMPT_SUPPORT
+  if (ompt_enabled.enabled)
+    OMPT_STORE_RETURN_ADDRESS(*gtid);
+#endif
   KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size,
                     schedule != kmp_sch_static);
 
 #if OMPT_SUPPORT
   kmp_info_t *thr;
   ompt_frame_t *ompt_frame;
-  ompt_state_t enclosing_state;
+  omp_state_t enclosing_state;
 
-  if (ompt_enabled) {
+  if (ompt_enabled.enabled) {
     thr = __kmp_threads[*gtid];
     // save enclosing task state; set current state for task
     enclosing_state = thr->th.ompt_thread_info.state;
-    thr->th.ompt_thread_info.state = ompt_state_work_parallel;
+    thr->th.ompt_thread_info.state = omp_state_work_parallel;
 
     // set task frame
-    ompt_frame = __ompt_get_task_frame_internal(0);
-    ompt_frame->exit_runtime_frame = __builtin_frame_address(0);
+    __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+    ompt_frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0);
   }
 #endif
 
@@ -261,7 +354,7 @@ static
   task(data);
 
 #if OMPT_SUPPORT
-  if (ompt_enabled) {
+  if (ompt_enabled.enabled) {
     // clear task frame
     ompt_frame->exit_runtime_frame = NULL;
 
@@ -285,11 +378,8 @@ static
   va_list ap;
   va_start(ap, argc);
 
-  rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc,
-#if OMPT_SUPPORT
-                       VOLATILE_CAST(void *) unwrapped_task,
-#endif
-                       wrapper, __kmp_invoke_task_func,
+  rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper,
+                       __kmp_invoke_task_func,
 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
                        &ap
 #else
@@ -304,18 +394,19 @@ static
   }
 
 #if OMPT_SUPPORT
-  if (ompt_enabled) {
-#if OMPT_TRACE
+  int ompt_team_size;
+  if (ompt_enabled.enabled) {
     ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
-    ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
+    ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
 
     // implicit task callback
-    if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
-      ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
-          team_info->parallel_id, task_info->task_id);
+    if (ompt_enabled.ompt_callback_implicit_task) {
+      ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc;
+      ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+          ompt_scope_begin, &(team_info->parallel_data),
+          &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid));
     }
-#endif
-    thr->th.ompt_thread_info.state = ompt_state_work_parallel;
+    thr->th.ompt_thread_info.state = omp_state_work_parallel;
   }
 #endif
 }
@@ -323,47 +414,9 @@ static
 static void __kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid,
                                            void (*task)(void *)) {
 #if OMPT_SUPPORT
-  ompt_parallel_id_t ompt_parallel_id;
-  if (ompt_enabled) {
-    ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
-
-    ompt_parallel_id = __ompt_parallel_id_new(gtid);
-
-    // parallel region callback
-    if (ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
-      int team_size = 1;
-      ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
-          task_info->task_id, &task_info->frame, ompt_parallel_id, team_size,
-          (void *)task, OMPT_INVOKER(fork_context_gnu));
-    }
-  }
+  OMPT_STORE_RETURN_ADDRESS(gtid);
 #endif
-
   __kmp_serialized_parallel(loc, gtid);
-
-#if OMPT_SUPPORT
-  if (ompt_enabled) {
-    kmp_info_t *thr = __kmp_threads[gtid];
-
-    ompt_task_id_t my_ompt_task_id = __ompt_task_id_new(gtid);
-
-    // set up lightweight task
-    ompt_lw_taskteam_t *lwt =
-        (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));
-    __ompt_lw_taskteam_init(lwt, thr, gtid, (void *)task, ompt_parallel_id);
-    lwt->ompt_task_info.task_id = my_ompt_task_id;
-    __ompt_lw_taskteam_link(lwt, thr);
-
-#if OMPT_TRACE
-    // implicit task callback
-    if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
-      ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
-          ompt_parallel_id, my_ompt_task_id);
-    }
-    thr->th.ompt_thread_info.state = ompt_state_work_parallel;
-#endif
-  }
-#endif
 }
 
 void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data,
@@ -373,10 +426,11 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data,
 #if OMPT_SUPPORT
   ompt_frame_t *parent_frame, *frame;
 
-  if (ompt_enabled) {
-    parent_frame = __ompt_get_task_frame_internal(0);
-    parent_frame->reenter_runtime_frame = __builtin_frame_address(1);
+  if (ompt_enabled.enabled) {
+    __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
+    parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
   }
+  OMPT_STORE_RETURN_ADDRESS(gtid);
 #endif
 
   MKLOC(loc, "GOMP_parallel_start");
@@ -394,9 +448,9 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data,
   }
 
 #if OMPT_SUPPORT
-  if (ompt_enabled) {
-    frame = __ompt_get_task_frame_internal(0);
-    frame->exit_runtime_frame = __builtin_frame_address(1);
+  if (ompt_enabled.enabled) {
+    __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL);
+    frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
   }
 #endif
 }
@@ -404,44 +458,23 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data,
 void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
   int gtid = __kmp_get_gtid();
   kmp_info_t *thr;
+  int ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc;
 
   thr = __kmp_threads[gtid];
 
   MKLOC(loc, "GOMP_parallel_end");
   KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid));
 
-#if OMPT_SUPPORT
-  ompt_parallel_id_t parallel_id;
-  ompt_task_id_t serialized_task_id;
-  ompt_frame_t *ompt_frame = NULL;
-
-  if (ompt_enabled) {
-    ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
-    parallel_id = team_info->parallel_id;
-
-    ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
-    serialized_task_id = task_info->task_id;
-
-    // unlink if necessary. no-op if there is not a lightweight task.
-    ompt_lw_taskteam_t *lwt = __ompt_lw_taskteam_unlink(thr);
-    // GOMP allocates/frees lwt since it can't be kept on the stack
-    if (lwt) {
-      __kmp_free(lwt);
-    }
-  }
-#endif
-
   if (!thr->th.th_team->t.t_serialized) {
     __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,
                                  thr->th.th_team);
 
 #if OMPT_SUPPORT
-    if (ompt_enabled) {
+    if (ompt_enabled.enabled) {
       // Implicit task is finished here, in the barrier we might schedule
       // deferred tasks,
       // these don't see the implicit task on the stack
-      ompt_frame = __ompt_get_task_frame_internal(0);
-      ompt_frame->exit_runtime_frame = NULL;
+      OMPT_CUR_TASK_INFO(thr)->frame.exit_runtime_frame = NULL;
     }
 #endif
 
@@ -452,35 +485,7 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
 #endif
                     );
   } else {
-#if OMPT_SUPPORT && OMPT_TRACE
-    if (ompt_enabled &&
-        ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
-      ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
-          parallel_id, serialized_task_id);
-    }
-#endif
-
     __kmpc_end_serialized_parallel(&loc, gtid);
-
-#if OMPT_SUPPORT
-    if (ompt_enabled) {
-      // Record that we re-entered the runtime system in the frame that
-      // created the parallel region.
-      ompt_task_info_t *parent_task_info = __ompt_get_taskinfo(0);
-
-      if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
-        ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
-            parallel_id, parent_task_info->task_id,
-            OMPT_INVOKER(fork_context_gnu));
-      }
-
-      parent_task_info->frame.reenter_runtime_frame = NULL;
-
-      thr->th.ompt_thread_info.state =
-          (((thr->th.th_team)->t.t_serialized) ? ompt_state_work_serial
-                                               : ompt_state_work_parallel);
-    }
-#endif
   }
 }
 
@@ -508,6 +513,12 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
 // num and calculate the iteration space using the result.  It doesn't do this
 // with ordered static loop, so they can be checked.
 
+#if OMPT_SUPPORT
+#define IF_OMPT_SUPPORT(code) code
+#else
+#define IF_OMPT_SUPPORT(code)
+#endif
+
 #define LOOP_START(func, schedule)                                             \
   int func(long lb, long ub, long str, long chunk_sz, long *p_lb,              \
            long *p_ub) {                                                       \
@@ -520,9 +531,11 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
               gtid, lb, ub, str, chunk_sz));                                   \
                                                                                \
     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
+      IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                        \
       KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                            \
                         (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,        \
                         (schedule) != kmp_sch_static);                         \
+      IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                        \
       status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,            \
                                  (kmp_int *)p_ub, (kmp_int *)&stride);         \
       if (status) {                                                            \
@@ -551,8 +564,10 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
               gtid, lb, ub, str, chunk_sz));                                   \
                                                                                \
     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
+      IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                        \
       KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                            \
                         (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \
+      IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                        \
       status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,            \
                                  (kmp_int *)p_ub, (kmp_int *)&stride);         \
       if (status) {                                                            \
@@ -577,6 +592,7 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
     MKLOC(loc, #func);                                                         \
     KA_TRACE(20, (#func ": T#%d\n", gtid));                                    \
                                                                                \
+    IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                          \
     fini_code status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,    \
                                          (kmp_int *)p_ub, (kmp_int *)&stride); \
     if (status) {                                                              \
@@ -621,7 +637,20 @@ void xexpand(KMP_API_NAME_GOMP_LOOP_END)(void) {
   int gtid = __kmp_get_gtid();
   KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid))
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  ompt_frame_t *ompt_frame;
+  if (ompt_enabled.enabled) {
+    __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+    ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+  }
+#endif
   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled) {
+    ompt_frame->reenter_runtime_frame = NULL;
+  }
+#endif
 
   KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid))
 }
@@ -796,17 +825,18 @@ LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT),
     KA_TRACE(20, (#func " exit: T#%d\n", gtid));                               \
   }
 
-#if OMPT_SUPPORT
+#if OMPT_SUPPORT && OMPT_OPTIONAL
 
 #define OMPT_LOOP_PRE()                                                        \
   ompt_frame_t *parent_frame;                                                  \
-  if (ompt_enabled) {                                                          \
-    parent_frame = __ompt_get_task_frame_internal(0);                          \
-    parent_frame->reenter_runtime_frame = __builtin_frame_address(1);          \
-  }
+  if (ompt_enabled.enabled) {                                                  \
+    __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);   \
+    parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);           \
+  }                                                                            \
+  OMPT_STORE_RETURN_ADDRESS(gtid);
 
 #define OMPT_LOOP_POST()                                                       \
-  if (ompt_enabled) {                                                          \
+  if (ompt_enabled.enabled) {                                                  \
     parent_frame->reenter_runtime_frame = NULL;                                \
   }
 
@@ -878,6 +908,16 @@ void xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,
     }
   }
 
+#if OMPT_SUPPORT
+  kmp_taskdata_t *current_task;
+  if (ompt_enabled.enabled) {
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+    current_task = __kmp_threads[gtid]->th.th_current_task;
+    current_task->ompt_task_info.frame.reenter_runtime_frame =
+        OMPT_GET_FRAME_ADDRESS(1);
+  }
+#endif
+
   if (if_cond) {
 #if OMP_40_ENABLED
     if (gomp_flags & 8) {
@@ -893,23 +933,26 @@ void xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,
         dep_list[i].flags.out = (i < nout);
       }
       __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps, dep_list, 0, NULL);
-    } else
+    } else {
 #endif
       __kmpc_omp_task(&loc, gtid, task);
+    }
   } else {
 #if OMPT_SUPPORT
     ompt_thread_info_t oldInfo;
     kmp_info_t *thread;
     kmp_taskdata_t *taskdata;
-    if (ompt_enabled) {
+    kmp_taskdata_t *current_task;
+    if (ompt_enabled.enabled) {
       // Store the threads states and restore them after the task
       thread = __kmp_threads[gtid];
       taskdata = KMP_TASK_TO_TASKDATA(task);
       oldInfo = thread->th.ompt_thread_info;
       thread->th.ompt_thread_info.wait_id = 0;
-      thread->th.ompt_thread_info.state = ompt_state_work_parallel;
+      thread->th.ompt_thread_info.state = omp_state_work_parallel;
       taskdata->ompt_task_info.frame.exit_runtime_frame =
-          __builtin_frame_address(0);
+          OMPT_GET_FRAME_ADDRESS(0);
+      OMPT_STORE_RETURN_ADDRESS(gtid);
     }
 #endif
 
@@ -918,12 +961,17 @@ void xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,
     __kmpc_omp_task_complete_if0(&loc, gtid, task);
 
 #if OMPT_SUPPORT
-    if (ompt_enabled) {
+    if (ompt_enabled.enabled) {
       thread->th.ompt_thread_info = oldInfo;
       taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
     }
 #endif
   }
+#if OMPT_SUPPORT
+  if (ompt_enabled.enabled) {
+    current_task->ompt_task_info.frame.reenter_runtime_frame = NULL;
+  }
+#endif
 
   KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid));
 }
@@ -932,6 +980,11 @@ void xexpand(KMP_API_NAME_GOMP_TASKWAIT)(void) {
   MKLOC(loc, "GOMP_taskwait");
   int gtid = __kmp_entry_gtid();
 
+#if OMPT_SUPPORT
+  if (ompt_enabled.enabled)
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+
   KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid));
 
   __kmpc_omp_taskwait(&loc, gtid);
@@ -1001,10 +1054,11 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task)(void *),
 #if OMPT_SUPPORT
   ompt_frame_t *parent_frame;
 
-  if (ompt_enabled) {
-    parent_frame = __ompt_get_task_frame_internal(0);
-    parent_frame->reenter_runtime_frame = __builtin_frame_address(1);
+  if (ompt_enabled.enabled) {
+    __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
+    parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
   }
+  OMPT_STORE_RETURN_ADDRESS(gtid);
 #endif
 
   MKLOC(loc, "GOMP_parallel_sections_start");
@@ -1023,7 +1077,7 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task)(void *),
   }
 
 #if OMPT_SUPPORT
-  if (ompt_enabled) {
+  if (ompt_enabled.enabled) {
     parent_frame->reenter_runtime_frame = NULL;
   }
 #endif
@@ -1037,7 +1091,20 @@ void xexpand(KMP_API_NAME_GOMP_SECTIONS_END)(void) {
   int gtid = __kmp_get_gtid();
   KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid))
 
+#if OMPT_SUPPORT
+  ompt_frame_t *ompt_frame;
+  if (ompt_enabled.enabled) {
+    __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+    ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+  }
+#endif
   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
+#if OMPT_SUPPORT
+  if (ompt_enabled.enabled) {
+    ompt_frame->reenter_runtime_frame = NULL;
+  }
+#endif
 
   KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid))
 }
@@ -1063,10 +1130,11 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), void *data,
 
 #if OMPT_SUPPORT
   ompt_task_info_t *parent_task_info, *task_info;
-  if (ompt_enabled) {
-    parent_task_info = __ompt_get_taskinfo(0);
-    parent_task_info->frame.reenter_runtime_frame = __builtin_frame_address(1);
+  if (ompt_enabled.enabled) {
+    parent_task_info = __ompt_get_task_info_object(0);
+    parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
   }
+  OMPT_STORE_RETURN_ADDRESS(gtid);
 #endif
   if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
     if (num_threads != 0) {
@@ -1082,15 +1150,20 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), void *data,
     __kmp_GOMP_serialized_parallel(&loc, gtid, task);
   }
 #if OMPT_SUPPORT
-  if (ompt_enabled) {
-    task_info = __ompt_get_taskinfo(0);
-    task_info->frame.exit_runtime_frame = __builtin_frame_address(0);
+  if (ompt_enabled.enabled) {
+    task_info = __ompt_get_task_info_object(0);
+    task_info->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0);
   }
 #endif
   task(data);
+#if OMPT_SUPPORT
+  if (ompt_enabled.enabled) {
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+  }
+#endif
   xexpand(KMP_API_NAME_GOMP_PARALLEL_END)();
 #if OMPT_SUPPORT
-  if (ompt_enabled) {
+  if (ompt_enabled.enabled) {
     task_info->frame.exit_runtime_frame = NULL;
     parent_task_info->frame.reenter_runtime_frame = NULL;
   }
@@ -1106,6 +1179,10 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
   MKLOC(loc, "GOMP_parallel_sections");
   KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid));
 
+#if OMPT_SUPPORT
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+
   if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
     if (num_threads != 0) {
       __kmp_push_num_threads(&loc, gtid, num_threads);
@@ -1153,6 +1230,7 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
       __kmp_GOMP_serialized_parallel(&loc, gtid, task);                        \
     }                                                                          \
                                                                                \
+    IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                          \
     KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                              \
                       (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,          \
                       (schedule) != kmp_sch_static);                           \
@@ -1177,6 +1255,11 @@ void xexpand(KMP_API_NAME_GOMP_TASKGROUP_START)(void) {
   MKLOC(loc, "GOMP_taskgroup_start");
   KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid));
 
+#if OMPT_SUPPORT
+  if (ompt_enabled.enabled)
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+
   __kmpc_taskgroup(&loc, gtid);
 
   return;
@@ -1187,6 +1270,11 @@ void xexpand(KMP_API_NAME_GOMP_TASKGROUP_END)(void) {
   MKLOC(loc, "GOMP_taskgroup_end");
   KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid));
 
+#if OMPT_SUPPORT
+  if (ompt_enabled.enabled)
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+
   __kmpc_end_taskgroup(&loc, gtid);
 
   return;
index 1adfec6..bb18a6d 100644 (file)
@@ -1135,7 +1135,7 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
   kmp_int32 need_mf = 1;
 
 #if OMPT_SUPPORT
-  ompt_state_t prev_state = ompt_state_undefined;
+  omp_state_t prev_state = omp_state_undefined;
 #endif
 
   KA_TRACE(1000,
@@ -1243,7 +1243,7 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
 #endif
 
 #if OMPT_SUPPORT
-        if (ompt_enabled && prev_state != ompt_state_undefined) {
+        if (ompt_enabled.enabled && prev_state != omp_state_undefined) {
           /* change the state before clearing wait_id */
           this_thr->th.ompt_thread_info.state = prev_state;
           this_thr->th.ompt_thread_info.wait_id = 0;
@@ -1258,11 +1258,11 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
     }
 
 #if OMPT_SUPPORT
-    if (ompt_enabled && prev_state == ompt_state_undefined) {
+    if (ompt_enabled.enabled && prev_state == omp_state_undefined) {
       /* this thread will spin; set wait_id before entering wait state */
       prev_state = this_thr->th.ompt_thread_info.state;
       this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
-      this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
+      this_thr->th.ompt_thread_info.state = omp_state_wait_lock;
     }
 #endif
 
@@ -2911,11 +2911,11 @@ static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
                                      kmp_dyna_lockseq_t tag);
 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
-static void __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
+static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
-static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
-                                                kmp_int32);
+static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
+                                               kmp_int32);
 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
                                                  kmp_int32);
 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
@@ -2938,14 +2938,13 @@ void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) = {
 
 // set/acquire functions
 #define expand(l, op)                                                          \
-  0, (void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
-static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
+  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
+static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
     __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
 #undef expand
 #define expand(l, op)                                                          \
-  0, (void (*)(kmp_dyna_lock_t *,                                              \
-               kmp_int32))__kmp_##op##_##l##_lock_with_checks,
-static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
+  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
+static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
     __kmp_set_indirect_lock_with_checks, 0,
     KMP_FOREACH_D_LOCK(expand, acquire)};
 #undef expand
@@ -2968,7 +2967,7 @@ static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
 #undef expand
 
 // Exposes only one set of jump tables (*lock or *lock_with_checks).
-void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
+int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
 
@@ -2982,13 +2981,13 @@ void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = {
 
 // set/acquire functions
 #define expand(l, op)                                                          \
-  (void (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
-static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = {
-    KMP_FOREACH_I_LOCK(expand, acquire)};
+  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
+static int (*indirect_set[])(kmp_user_lock_p,
+                             kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
 #undef expand
 #define expand(l, op)                                                          \
-  (void (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
-static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
+  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
+static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
     KMP_FOREACH_I_LOCK(expand, acquire)};
 #undef expand
 
@@ -3009,7 +3008,7 @@ static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
 #undef expand
 
 // Exposes only one jump tables (*lock or *lock_with_checks).
-void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
+int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
 
@@ -3164,9 +3163,9 @@ static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
   __kmp_release_lock(&__kmp_global_lock, gtid);
 }
 
-static void __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
+static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
   kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
-  KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
+  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
 }
 
 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
@@ -3179,11 +3178,11 @@ static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
   return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
 }
 
-static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
-                                                kmp_int32 gtid) {
+static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
+                                               kmp_int32 gtid) {
   kmp_indirect_lock_t *l =
       __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
-  KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
+  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
 }
 
 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
index e65aa98..76f1968 100644 (file)
@@ -1142,7 +1142,7 @@ typedef struct {
 // with/without consistency checking.
 extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t);
 extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *);
-extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32);
+extern int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32);
 extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32);
 extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32);
 
@@ -1150,7 +1150,7 @@ extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32);
 // with/withuot consistency checking.
 extern void (*__kmp_indirect_init[])(kmp_user_lock_p);
 extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p);
-extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32);
+extern int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32);
 extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32);
 extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32);
 
index 3c40873..bd3b8fe 100644 (file)
@@ -722,16 +722,6 @@ void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
     /* TODO replace with general release procedure */
     team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
 
-#if OMPT_SUPPORT && OMPT_BLAME
-    if (ompt_enabled &&
-        ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
-      /* accept blame for "ordered" waiting */
-      kmp_info_t *this_thread = __kmp_threads[gtid];
-      ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
-          this_thread->th.ompt_thread_info.wait_id);
-    }
-#endif
-
     KMP_MB(); /* Flush all pending memory write invalidates.  */
   }
 #endif /* BUILD_PARALLEL_ORDERED */
@@ -1204,6 +1194,28 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
   this_thr->th.th_set_proc_bind = proc_bind_default;
 #endif /* OMP_40_ENABLED */
 
+#if OMPT_SUPPORT
+  ompt_data_t ompt_parallel_data;
+  ompt_parallel_data.ptr = NULL;
+  ompt_data_t *implicit_task_data;
+  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
+  if (ompt_enabled.enabled &&
+      this_thr->th.ompt_thread_info.state != omp_state_overhead) {
+
+    ompt_task_info_t *parent_task_info;
+    parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
+
+    parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+    if (ompt_enabled.ompt_callback_parallel_begin) {
+      int team_size = 1;
+
+      ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
+          &(parent_task_info->task_data), &(parent_task_info->frame),
+          &ompt_parallel_data, team_size, ompt_invoker_program, codeptr);
+    }
+  }
+#endif // OMPT_SUPPORT
+
   if (this_thr->th.th_team != serial_team) {
     // Nested level will be an index in the nested nthreads array
     int level = this_thr->th.th_team->t.t_level;
@@ -1215,13 +1227,9 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
 
       __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
 
-#if OMPT_SUPPORT
-      ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
-#endif
-
       new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
 #if OMPT_SUPPORT
-                                     ompt_parallel_id,
+                                     ompt_parallel_data,
 #endif
 #if OMP_40_ENABLED
                                      proc_bind,
@@ -1316,11 +1324,6 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
     }
     this_thr->th.th_dispatch = serial_team->t.t_dispatch;
 
-#if OMPT_SUPPORT
-    ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
-    __ompt_team_assign_id(serial_team, ompt_parallel_id);
-#endif
-
     KMP_MB();
 
   } else {
@@ -1364,17 +1367,41 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
 
   if (__kmp_env_consistency_check)
     __kmp_push_parallel(global_tid, NULL);
+#if OMPT_SUPPORT
+  serial_team->t.ompt_team_info.master_return_address = codeptr;
+  if (ompt_enabled.enabled &&
+      this_thr->th.ompt_thread_info.state != omp_state_overhead) {
+    OMPT_CUR_TASK_INFO(this_thr)
+        ->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+
+    ompt_lw_taskteam_t lw_taskteam;
+    __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
+                            &ompt_parallel_data, codeptr);
+
+    __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
+    // don't use lw_taskteam after linking. content was swaped
+
+    /* OMPT implicit task begin */
+    implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
+    if (ompt_enabled.ompt_callback_implicit_task) {
+      ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+          ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
+          OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid));
+    }
+
+    /* OMPT state */
+    this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
+    OMPT_CUR_TASK_INFO(this_thr)
+        ->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+  }
+#endif
 }
 
 /* most of the work for a fork */
 /* return true if we really went parallel, false if serialized */
 int __kmp_fork_call(ident_t *loc, int gtid,
                     enum fork_context_e call_context, // Intel, GNU, ...
-                    kmp_int32 argc,
-#if OMPT_SUPPORT
-                    void *unwrapped_task,
-#endif
-                    microtask_t microtask, launch_t invoker,
+                    kmp_int32 argc, microtask_t microtask, launch_t invoker,
 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
                     va_list *ap
@@ -1432,16 +1459,17 @@ int __kmp_fork_call(ident_t *loc, int gtid,
     master_set_numthreads = master_th->th.th_set_nproc;
 
 #if OMPT_SUPPORT
-    ompt_parallel_id_t ompt_parallel_id;
-    ompt_task_id_t ompt_task_id;
+    ompt_data_t ompt_parallel_data;
+    ompt_parallel_data.ptr = NULL;
+    ompt_data_t *parent_task_data;
     ompt_frame_t *ompt_frame;
-    ompt_task_id_t my_task_id;
-    ompt_parallel_id_t my_parallel_id;
+    ompt_data_t *implicit_task_data;
+    void *return_address = NULL;
 
-    if (ompt_enabled) {
-      ompt_parallel_id = __ompt_parallel_id_new(gtid);
-      ompt_task_id = __ompt_get_task_id_internal(0);
-      ompt_frame = __ompt_get_task_frame_internal(0);
+    if (ompt_enabled.enabled) {
+      __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
+                                    NULL, NULL);
+      return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
     }
 #endif
 
@@ -1465,13 +1493,16 @@ int __kmp_fork_call(ident_t *loc, int gtid,
 #endif
 
 #if OMPT_SUPPORT
-    if (ompt_enabled &&
-        ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
-      int team_size = master_set_numthreads;
-
-      ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
-          ompt_task_id, ompt_frame, ompt_parallel_id, team_size, unwrapped_task,
-          OMPT_INVOKER(call_context));
+    if (ompt_enabled.enabled) {
+      if (ompt_enabled.ompt_callback_parallel_begin) {
+        int team_size = master_set_numthreads
+                            ? master_set_numthreads
+                            : get__nproc_2(parent_team, master_tid);
+        ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
+            parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
+            OMPT_INVOKER(call_context), return_address);
+      }
+      master_th->th.ompt_thread_info.state = omp_state_overhead;
     }
 #endif
 
@@ -1508,27 +1539,25 @@ int __kmp_fork_call(ident_t *loc, int gtid,
 
         ompt_lw_taskteam_t lw_taskteam;
 
-        if (ompt_enabled) {
-          __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, unwrapped_task,
-                                  ompt_parallel_id);
-          lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
+        if (ompt_enabled.enabled) {
+          __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
+                                  &ompt_parallel_data, return_address);
           exit_runtime_p =
               &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
 
-          __ompt_lw_taskteam_link(&lw_taskteam, master_th);
+          __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
+          // don't use lw_taskteam after linking. content was swaped
 
-#if OMPT_TRACE
           /* OMPT implicit task begin */
-          my_task_id = lw_taskteam.ompt_task_info.task_id;
-          my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
-          if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
-            ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
-                my_parallel_id, my_task_id);
+          implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
+          if (ompt_enabled.ompt_callback_implicit_task) {
+            ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+                ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
+                implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
           }
-#endif
 
           /* OMPT state */
-          master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+          master_th->th.ompt_thread_info.state = omp_state_work_parallel;
         } else {
           exit_runtime_p = &dummy;
         }
@@ -1547,34 +1576,27 @@ int __kmp_fork_call(ident_t *loc, int gtid,
 
 #if OMPT_SUPPORT
         *exit_runtime_p = NULL;
-        if (ompt_enabled) {
-#if OMPT_TRACE
-          lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
-
-          if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
-            ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
-                ompt_parallel_id, ompt_task_id);
+        if (ompt_enabled.enabled) {
+          OMPT_CUR_TASK_INFO(master_th)->frame.exit_runtime_frame = NULL;
+          if (ompt_enabled.ompt_callback_implicit_task) {
+            ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+                ompt_scope_end, NULL, implicit_task_data, 1,
+                __kmp_tid_from_gtid(gtid));
           }
-
           __ompt_lw_taskteam_unlink(master_th);
-          // reset clear the task id only after unlinking the task
-          lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
-#endif
 
-          if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
-            ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
-                ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
+          if (ompt_enabled.ompt_callback_parallel_end) {
+            ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+                OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
+                OMPT_INVOKER(call_context), return_address);
           }
-          master_th->th.ompt_thread_info.state = ompt_state_overhead;
+          master_th->th.ompt_thread_info.state = omp_state_overhead;
         }
 #endif
         return TRUE;
       }
 
       parent_team->t.t_pkfn = microtask;
-#if OMPT_SUPPORT
-      parent_team->t.ompt_team_info.microtask = unwrapped_task;
-#endif
       parent_team->t.t_invoke = invoker;
       KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
       parent_team->t.t_active_level++;
@@ -1726,28 +1748,27 @@ int __kmp_fork_call(ident_t *loc, int gtid,
 #if OMPT_SUPPORT
           void *dummy;
           void **exit_runtime_p;
+          ompt_task_info_t *task_info;
 
           ompt_lw_taskteam_t lw_taskteam;
 
-          if (ompt_enabled) {
+          if (ompt_enabled.enabled) {
             __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
-                                    unwrapped_task, ompt_parallel_id);
-            lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
-            exit_runtime_p =
-                &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
-
-            __ompt_lw_taskteam_link(&lw_taskteam, master_th);
-
-#if OMPT_TRACE
-            my_task_id = lw_taskteam.ompt_task_info.task_id;
-            if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
-              ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
-                  ompt_parallel_id, my_task_id);
+                                    &ompt_parallel_data, return_address);
+
+            __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
+            // don't use lw_taskteam after linking. content was swaped
+
+            task_info = OMPT_CUR_TASK_INFO(master_th);
+            exit_runtime_p = &(task_info->frame.exit_runtime_frame);
+            if (ompt_enabled.ompt_callback_implicit_task) {
+              ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+                  ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
+                  &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid));
             }
-#endif
 
             /* OMPT state */
-            master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+            master_th->th.ompt_thread_info.state = omp_state_work_parallel;
           } else {
             exit_runtime_p = &dummy;
           }
@@ -1766,26 +1787,21 @@ int __kmp_fork_call(ident_t *loc, int gtid,
           }
 
 #if OMPT_SUPPORT
-          *exit_runtime_p = NULL;
-          if (ompt_enabled) {
-            lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
-
-#if OMPT_TRACE
-            if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
-              ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
-                  ompt_parallel_id, ompt_task_id);
+          if (ompt_enabled.enabled) {
+            exit_runtime_p = NULL;
+            if (ompt_enabled.ompt_callback_implicit_task) {
+              ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+                  ompt_scope_end, NULL, &(task_info->task_data), 1,
+                  __kmp_tid_from_gtid(gtid));
             }
-#endif
 
             __ompt_lw_taskteam_unlink(master_th);
-            // reset clear the task id only after unlinking the task
-            lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
-
-            if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
-              ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
-                  ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
+            if (ompt_enabled.ompt_callback_parallel_end) {
+              ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+                  OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
+                  OMPT_INVOKER(call_context), return_address);
             }
-            master_th->th.ompt_thread_info.state = ompt_state_overhead;
+            master_th->th.ompt_thread_info.state = omp_state_overhead;
           }
 #endif
         } else if (microtask == (microtask_t)__kmp_teams_master) {
@@ -1834,30 +1850,28 @@ int __kmp_fork_call(ident_t *loc, int gtid,
 #if OMPT_SUPPORT
           void *dummy;
           void **exit_runtime_p;
+          ompt_task_info_t *task_info;
 
           ompt_lw_taskteam_t lw_taskteam;
 
-          if (ompt_enabled) {
+          if (ompt_enabled.enabled) {
             __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
-                                    unwrapped_task, ompt_parallel_id);
-            lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
-            exit_runtime_p =
-                &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
-
-            __ompt_lw_taskteam_link(&lw_taskteam, master_th);
+                                    &ompt_parallel_data, return_address);
+            __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
+            // don't use lw_taskteam after linking. content was swaped
+            task_info = OMPT_CUR_TASK_INFO(master_th);
+            exit_runtime_p = &(task_info->frame.exit_runtime_frame);
 
-#if OMPT_TRACE
             /* OMPT implicit task begin */
-            my_task_id = lw_taskteam.ompt_task_info.task_id;
-            my_parallel_id = ompt_parallel_id;
-            if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
-              ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
-                  my_parallel_id, my_task_id);
+            implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
+            if (ompt_enabled.ompt_callback_implicit_task) {
+              ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+                  ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
+                  implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
             }
-#endif
 
             /* OMPT state */
-            master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+            master_th->th.ompt_thread_info.state = omp_state_work_parallel;
           } else {
             exit_runtime_p = &dummy;
           }
@@ -1875,26 +1889,22 @@ int __kmp_fork_call(ident_t *loc, int gtid,
           }
 
 #if OMPT_SUPPORT
-          *exit_runtime_p = NULL;
-          if (ompt_enabled) {
-#if OMPT_TRACE
-            lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
-
-            if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
-              ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
-                  my_parallel_id, my_task_id);
+          if (ompt_enabled.enabled) {
+            *exit_runtime_p = NULL;
+            if (ompt_enabled.ompt_callback_implicit_task) {
+              ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+                  ompt_scope_end, NULL, &(task_info->task_data), 1,
+                  __kmp_tid_from_gtid(gtid));
             }
-#endif
 
+            ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
             __ompt_lw_taskteam_unlink(master_th);
-            // reset clear the task id only after unlinking the task
-            lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
-
-            if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
-              ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
-                  ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
+            if (ompt_enabled.ompt_callback_parallel_end) {
+              ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+                  &ompt_parallel_data, parent_task_data,
+                  OMPT_INVOKER(call_context), return_address);
             }
-            master_th->th.ompt_thread_info.state = ompt_state_overhead;
+            master_th->th.ompt_thread_info.state = omp_state_overhead;
           }
 #endif
 #if OMP_40_ENABLED
@@ -1902,14 +1912,13 @@ int __kmp_fork_call(ident_t *loc, int gtid,
 #endif /* OMP_40_ENABLED */
       } else if (call_context == fork_context_gnu) {
 #if OMPT_SUPPORT
-        ompt_lw_taskteam_t *lwt =
-            (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));
-        __ompt_lw_taskteam_init(lwt, master_th, gtid, unwrapped_task,
-                                ompt_parallel_id);
+        ompt_lw_taskteam_t lwt;
+        __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
+                                return_address);
 
-        lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
-        lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
-        __ompt_lw_taskteam_link(lwt, master_th);
+        lwt.ompt_task_info.frame.exit_runtime_frame = NULL;
+        __ompt_lw_taskteam_link(&lwt, master_th, 1);
+// don't use lw_taskteam after linking. content was swaped
 #endif
 
         // we were called from GNU native code
@@ -2004,7 +2013,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
       KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
       team = __kmp_allocate_team(root, nthreads, nthreads,
 #if OMPT_SUPPORT
-                                 ompt_parallel_id,
+                                 ompt_parallel_data,
 #endif
 #if OMP_40_ENABLED
                                  proc_bind,
@@ -2015,7 +2024,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
       KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
       team = __kmp_allocate_team(root, nthreads, nthreads,
 #if OMPT_SUPPORT
-                                 ompt_parallel_id,
+                                 ompt_parallel_data,
 #endif
 #if OMP_40_ENABLED
                                  proc_bind,
@@ -2033,7 +2042,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
     KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
     KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
 #if OMPT_SUPPORT
-    KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
+    KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
+                          return_address);
 #endif
     KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
 // TODO: parent_team->t.t_level == INT_MAX ???
@@ -2167,7 +2177,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
                          &master_th->th.th_current_task->td_icvs, loc);
 
 #if OMPT_SUPPORT
-    master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+    master_th->th.ompt_thread_info.state = omp_state_work_parallel;
 #endif
 
     __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
@@ -2251,8 +2261,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
   KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
 
 #if OMPT_SUPPORT
-  if (ompt_enabled) {
-    master_th->th.ompt_thread_info.state = ompt_state_overhead;
+  if (ompt_enabled.enabled) {
+    master_th->th.ompt_thread_info.state = omp_state_overhead;
   }
 #endif
 
@@ -2264,17 +2274,18 @@ static inline void __kmp_join_restore_state(kmp_info_t *thread,
                                             kmp_team_t *team) {
   // restore state outside the region
   thread->th.ompt_thread_info.state =
-      ((team->t.t_serialized) ? ompt_state_work_serial
-                              : ompt_state_work_parallel);
+      ((team->t.t_serialized) ? omp_state_work_serial
+                              : omp_state_work_parallel);
 }
 
-static inline void __kmp_join_ompt(kmp_info_t *thread, kmp_team_t *team,
-                                   ompt_parallel_id_t parallel_id,
-                                   fork_context_e fork_context) {
-  ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
-  if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
-    ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
-        parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
+static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
+                                   kmp_team_t *team, ompt_data_t *parallel_data,
+                                   fork_context_e fork_context, void *codeptr) {
+  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+  if (ompt_enabled.ompt_callback_parallel_end) {
+    ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+        parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
+        codeptr);
   }
 
   task_info->frame.reenter_runtime_frame = NULL;
@@ -2311,8 +2322,8 @@ void __kmp_join_call(ident_t *loc, int gtid
   master_th->th.th_ident = loc;
 
 #if OMPT_SUPPORT
-  if (ompt_enabled) {
-    master_th->th.ompt_thread_info.state = ompt_state_overhead;
+  if (ompt_enabled.enabled) {
+    master_th->th.ompt_thread_info.state = omp_state_overhead;
   }
 #endif
 
@@ -2349,7 +2360,7 @@ void __kmp_join_call(ident_t *loc, int gtid
     __kmpc_end_serialized_parallel(loc, gtid);
 
 #if OMPT_SUPPORT
-    if (ompt_enabled) {
+    if (ompt_enabled.enabled) {
       __kmp_join_restore_state(master_th, parent_team);
     }
 #endif
@@ -2377,7 +2388,8 @@ void __kmp_join_call(ident_t *loc, int gtid
   KMP_MB();
 
 #if OMPT_SUPPORT
-  ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
+  ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
+  void *codeptr = team->t.ompt_team_info.master_return_address;
 #endif
 
 #if USE_ITT_BUILD
@@ -2449,8 +2461,9 @@ void __kmp_join_call(ident_t *loc, int gtid
     }
 
 #if OMPT_SUPPORT
-    if (ompt_enabled) {
-      __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
+    if (ompt_enabled.enabled) {
+      __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
+                      codeptr);
     }
 #endif
 
@@ -2479,15 +2492,18 @@ void __kmp_join_call(ident_t *loc, int gtid
   }
   KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
 
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled) {
-    ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
-    if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
-      ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
-          parallel_id, task_info->task_id);
+#if OMPT_SUPPORT
+  if (ompt_enabled.enabled) {
+    ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+    if (ompt_enabled.ompt_callback_implicit_task) {
+      int ompt_team_size = team->t.t_nproc;
+      ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+          ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
+          __kmp_tid_from_gtid(gtid));
     }
+
     task_info->frame.exit_runtime_frame = NULL;
-    task_info->task_id = 0;
+    task_info->task_data = ompt_data_none;
   }
 #endif
 
@@ -2558,8 +2574,9 @@ void __kmp_join_call(ident_t *loc, int gtid
   __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
 
 #if OMPT_SUPPORT
-  if (ompt_enabled) {
-    __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
+  if (ompt_enabled.enabled) {
+    __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
+                    codeptr);
   }
 #endif
 
@@ -3154,7 +3171,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
                           1, // new_nproc
                           1, // max_nproc
 #if OMPT_SUPPORT
-                          0, // root parallel id
+                          ompt_data_none, // root parallel id
 #endif
 #if OMP_40_ENABLED
                           __kmp_nested_proc_bind.bind_types[0],
@@ -3195,7 +3212,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
                           1, // new_nproc
                           __kmp_dflt_team_nth_ub * 2, // max_nproc
 #if OMPT_SUPPORT
-                          0, // root parallel id
+                          ompt_data_none, // root parallel id
 #endif
 #if OMP_40_ENABLED
                           __kmp_nested_proc_bind.bind_types[0],
@@ -3734,6 +3751,9 @@ int __kmp_register_root(int initial_thread) {
       __kmp_print_thread_storage_map(root_thread, gtid);
     }
     root_thread->th.th_info.ds.ds_gtid = gtid;
+#if OMPT_SUPPORT
+    root_thread->th.ompt_thread_info.thread_data.ptr = NULL;
+#endif
     root_thread->th.th_root = root;
     if (__kmp_env_consistency_check) {
       root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
@@ -3756,7 +3776,7 @@ int __kmp_register_root(int initial_thread) {
     root_thread->th.th_serial_team =
         __kmp_allocate_team(root, 1, 1,
 #if OMPT_SUPPORT
-                            0, // root parallel id
+                            ompt_data_none, // root parallel id
 #endif
 #if OMP_40_ENABLED
                             proc_bind_default,
@@ -3826,6 +3846,29 @@ int __kmp_register_root(int initial_thread) {
 
   __kmp_root_counter++;
 
+#if OMPT_SUPPORT
+  if (!initial_thread && ompt_enabled.enabled) {
+
+    ompt_thread_t *root_thread = ompt_get_thread();
+
+    ompt_set_thread_state(root_thread, omp_state_overhead);
+
+    if (ompt_enabled.ompt_callback_thread_begin) {
+      ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
+          ompt_thread_initial, __ompt_get_thread_data_internal());
+    }
+    ompt_data_t *task_data;
+    __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
+    if (ompt_enabled.ompt_callback_task_create) {
+      ompt_callbacks.ompt_callback(ompt_callback_task_create)(
+          NULL, NULL, task_data, ompt_task_initial, 0, NULL);
+      // initial task has nothing to return to
+    }
+
+    ompt_set_thread_state(root_thread, omp_state_work_serial);
+  }
+#endif
+
   KMP_MB();
   __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
 
@@ -3909,9 +3952,9 @@ static int __kmp_reset_root(int gtid, kmp_root_t *root) {
 #endif /* KMP_OS_WINDOWS */
 
 #if OMPT_SUPPORT
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
-    int gtid = __kmp_get_gtid();
-    __ompt_thread_end(ompt_thread_initial, gtid);
+  if (ompt_enabled.ompt_callback_thread_end) {
+    ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
+        &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
   }
 #endif
 
@@ -3961,7 +4004,7 @@ void __kmp_unregister_root_current_thread(int gtid) {
   if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
 #if OMPT_SUPPORT
     // the runtime is shutting down so we won't report any events
-    thread->th.ompt_thread_info.state = ompt_state_undefined;
+    thread->th.ompt_thread_info.state = omp_state_undefined;
 #endif
     __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
   }
@@ -4282,7 +4325,7 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
     new_thr->th.th_serial_team = serial_team =
         (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
 #if OMPT_SUPPORT
-                                          0, // root parallel id
+                                          ompt_data_none, // root parallel id
 #endif
 #if OMP_40_ENABLED
                                           proc_bind_default,
@@ -4813,7 +4856,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
 kmp_team_t *
 __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
 #if OMPT_SUPPORT
-                    ompt_parallel_id_t ompt_parallel_id,
+                    ompt_data_t ompt_parallel_data,
 #endif
 #if OMP_40_ENABLED
                     kmp_proc_bind_t new_proc_bind,
@@ -5180,7 +5223,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
 #endif
 
 #if OMPT_SUPPORT
-    __ompt_team_assign_id(team, ompt_parallel_id);
+    __ompt_team_assign_id(team, ompt_parallel_data);
 #endif
 
     KMP_MB();
@@ -5232,7 +5275,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
                     team->t.t_id));
 
 #if OMPT_SUPPORT
-      __ompt_team_assign_id(team, ompt_parallel_id);
+      __ompt_team_assign_id(team, ompt_parallel_data);
 #endif
 
       KMP_MB();
@@ -5296,7 +5339,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
 #endif
 
 #if OMPT_SUPPORT
-  __ompt_team_assign_id(team, ompt_parallel_id);
+  __ompt_team_assign_id(team, ompt_parallel_data);
   team->t.ompt_serialized_team_info = NULL;
 #endif
 
@@ -5563,16 +5606,26 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
   }
 
 #if OMPT_SUPPORT
-  if (ompt_enabled) {
-    this_thr->th.ompt_thread_info.state = ompt_state_overhead;
+  ompt_data_t *thread_data;
+  if (ompt_enabled.enabled) {
+    thread_data = &(this_thr->th.ompt_thread_info.thread_data);
+    thread_data->ptr = NULL;
+
+    this_thr->th.ompt_thread_info.state = omp_state_overhead;
     this_thr->th.ompt_thread_info.wait_id = 0;
-    this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
-    if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
-      __ompt_thread_begin(ompt_thread_worker, gtid);
+    this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
+    if (ompt_enabled.ompt_callback_thread_begin) {
+      ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
+          ompt_thread_worker, thread_data);
     }
   }
 #endif
 
+#if OMPT_SUPPORT
+  if (ompt_enabled.enabled) {
+    this_thr->th.ompt_thread_info.state = omp_state_idle;
+  }
+#endif
   /* This is the place where threads wait for work */
   while (!TCR_4(__kmp_global.g.g_done)) {
     KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
@@ -5581,18 +5634,12 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
     /* wait for work to do */
     KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
 
-#if OMPT_SUPPORT
-    if (ompt_enabled) {
-      this_thr->th.ompt_thread_info.state = ompt_state_idle;
-    }
-#endif
-
     /* No tid yet since not part of a team */
     __kmp_fork_barrier(gtid, KMP_GTID_DNE);
 
 #if OMPT_SUPPORT
-    if (ompt_enabled) {
-      this_thr->th.ompt_thread_info.state = ompt_state_overhead;
+    if (ompt_enabled.enabled) {
+      this_thr->th.ompt_thread_info.state = omp_state_overhead;
     }
 #endif
 
@@ -5600,14 +5647,6 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
 
     /* have we been allocated? */
     if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
-#if OMPT_SUPPORT
-      ompt_task_info_t *task_info;
-      ompt_parallel_id_t my_parallel_id;
-      if (ompt_enabled) {
-        task_info = __ompt_get_taskinfo(0);
-        my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
-      }
-#endif
       /* we were just woken up, so run our new task */
       if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
         int rc;
@@ -5619,11 +5658,8 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
         updateHWFPControl(*pteam);
 
 #if OMPT_SUPPORT
-        if (ompt_enabled) {
-          this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
-          // Initialize OMPT task id for implicit task.
-          int tid = __kmp_tid_from_gtid(gtid);
-          task_info->task_id = __ompt_task_id_new(tid);
+        if (ompt_enabled.enabled) {
+          this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
         }
 #endif
 
@@ -5634,40 +5670,29 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
         }
         KMP_ASSERT(rc);
 
-#if OMPT_SUPPORT
-        if (ompt_enabled) {
-          /* no frame set while outside task */
-          task_info->frame.exit_runtime_frame = NULL;
-
-          this_thr->th.ompt_thread_info.state = ompt_state_overhead;
-        }
-#endif
         KMP_MB();
         KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
                       gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
                       (*pteam)->t.t_pkfn));
       }
-      /* join barrier after parallel region */
-      __kmp_join_barrier(gtid);
-#if OMPT_SUPPORT && OMPT_TRACE
-      if (ompt_enabled) {
-        if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
-          // don't access *pteam here: it may have already been freed
-          // by the master thread behind the barrier (possible race)
-          ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
-              my_parallel_id, task_info->task_id);
-        }
-        task_info->frame.exit_runtime_frame = NULL;
-        task_info->task_id = 0;
+#if OMPT_SUPPORT
+      if (ompt_enabled.enabled) {
+        /* no frame set while outside task */
+        __ompt_get_task_info_object(0)->frame.exit_runtime_frame = NULL;
+
+        this_thr->th.ompt_thread_info.state = omp_state_overhead;
+        this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
       }
 #endif
+      /* join barrier after parallel region */
+      __kmp_join_barrier(gtid);
     }
   }
   TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
 
 #if OMPT_SUPPORT
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
-    __ompt_thread_end(ompt_thread_worker, gtid);
+  if (ompt_enabled.ompt_callback_thread_end) {
+    ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
   }
 #endif
 
@@ -6925,26 +6950,27 @@ int __kmp_invoke_task_func(int gtid) {
 #if OMPT_SUPPORT
   void *dummy;
   void **exit_runtime_p;
-  ompt_task_id_t my_task_id;
-  ompt_parallel_id_t my_parallel_id;
+  ompt_data_t *my_task_data;
+  ompt_data_t *my_parallel_data;
+  int ompt_team_size;
 
-  if (ompt_enabled) {
+  if (ompt_enabled.enabled) {
     exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]
                            .ompt_task_info.frame.exit_runtime_frame);
   } else {
     exit_runtime_p = &dummy;
   }
 
-#if OMPT_TRACE
-  my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
-  my_parallel_id = team->t.ompt_team_info.parallel_id;
-  if (ompt_enabled &&
-      ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
-    ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(my_parallel_id,
-                                                                 my_task_id);
+  my_task_data =
+      &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
+  my_parallel_data = &(team->t.ompt_team_info.parallel_data);
+  if (ompt_enabled.ompt_callback_implicit_task) {
+    ompt_team_size = team->t.t_nproc;
+    ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+        ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
+        __kmp_tid_from_gtid(gtid));
   }
 #endif
-#endif
 
   {
     KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
@@ -6991,9 +7017,6 @@ void __kmp_teams_master(int gtid) {
   SSC_MARK_FORKING();
 #endif
   __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
-#if OMPT_SUPPORT
-                  (void *)thr->th.th_teams_microtask, // "unwrapped" task
-#endif
                   (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
                   VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
 #if INCLUDE_SSC_MARKS
@@ -7170,6 +7193,36 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
 #endif /* KMP_DEBUG */
 
   __kmp_join_barrier(gtid); /* wait for everyone */
+#if OMPT_SUPPORT
+  int ds_tid = this_thr->th.th_info.ds.ds_tid;
+  if (this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
+    ompt_data_t *tId = OMPT_CUR_TASK_DATA(this_thr);
+    ompt_data_t *pId = OMPT_CUR_TEAM_DATA(this_thr);
+    this_thr->th.ompt_thread_info.state = omp_state_overhead;
+#if OMPT_OPTIONAL
+    void *codeptr = NULL;
+    if (KMP_MASTER_TID(ds_tid) &&
+        (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
+         ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
+      codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
+
+    if (ompt_enabled.ompt_callback_sync_region_wait) {
+      ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
+          ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr);
+    }
+    if (ompt_enabled.ompt_callback_sync_region) {
+      ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
+          ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr);
+    }
+#endif
+    if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
+      ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+          ompt_scope_end, NULL, tId, 0, ds_tid);
+    }
+    // return to idle state
+    this_thr->th.ompt_thread_info.state = omp_state_overhead;
+  }
+#endif
 
   KMP_MB(); /* Flush all pending memory write invalidates.  */
   KMP_ASSERT(this_thr->th.th_team == team);
index 0fa1a69..4a878e3 100644 (file)
@@ -44,7 +44,12 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
                                   T *plower, T *pupper,
                                   typename traits_t<T>::signed_t *pstride,
                                   typename traits_t<T>::signed_t incr,
-                                  typename traits_t<T>::signed_t chunk) {
+                                  typename traits_t<T>::signed_t chunk
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+                                  ,
+                                  void *codeptr
+#endif
+                                  ) {
   KMP_COUNT_BLOCK(OMP_FOR_static);
   KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);
 
@@ -58,14 +63,29 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
   kmp_team_t *team;
   kmp_info_t *th = __kmp_threads[gtid];
 
-#if OMPT_SUPPORT && OMPT_TRACE
+#if OMPT_SUPPORT && OMPT_OPTIONAL
   ompt_team_info_t *team_info = NULL;
   ompt_task_info_t *task_info = NULL;
+  ompt_work_type_t ompt_work_type;
 
-  if (ompt_enabled) {
+  if (ompt_enabled.enabled) {
     // Only fully initialize variables needed by OMPT if OMPT is enabled.
     team_info = __ompt_get_teaminfo(0, NULL);
-    task_info = __ompt_get_taskinfo(0);
+    task_info = __ompt_get_task_info_object(0);
+    // Determine workshare type
+    if (loc != NULL) {
+      if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
+        ompt_work_type = ompt_work_loop;
+      } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
+        ompt_work_type = ompt_work_sections;
+      } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
+        ompt_work_type = ompt_work_distribute;
+      } else {
+        KMP_ASSERT2(0,
+                    "__kmpc_for_static_init: can't determine workshare type");
+      }
+      KMP_DEBUG_ASSERT(ompt_work_type);
+    }
   }
 #endif
 
@@ -119,10 +139,11 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
 #endif
     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
 
-#if OMPT_SUPPORT && OMPT_TRACE
-    if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
-      ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
-          team_info->parallel_id, task_info->task_id, team_info->microtask);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    if (ompt_enabled.ompt_callback_work) {
+      ompt_callbacks.ompt_callback(ompt_callback_work)(
+          ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
+          &(task_info->task_data), 0, codeptr);
     }
 #endif
     KMP_COUNT_VALUE(FOR_static_iterations, 0);
@@ -170,10 +191,11 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
 #endif
     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
 
-#if OMPT_SUPPORT && OMPT_TRACE
-    if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
-      ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
-          team_info->parallel_id, task_info->task_id, team_info->microtask);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    if (ompt_enabled.ompt_callback_work) {
+      ompt_callbacks.ompt_callback(ompt_callback_work)(
+          ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
+          &(task_info->task_data), *pstride, codeptr);
     }
 #endif
     return;
@@ -198,10 +220,11 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
 #endif
     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
 
-#if OMPT_SUPPORT && OMPT_TRACE
-    if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
-      ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
-          team_info->parallel_id, task_info->task_id, team_info->microtask);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    if (ompt_enabled.ompt_callback_work) {
+      ompt_callbacks.ompt_callback(ompt_callback_work)(
+          ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
+          &(task_info->task_data), *pstride, codeptr);
     }
 #endif
     return;
@@ -354,10 +377,11 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
 #endif
   KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
 
-#if OMPT_SUPPORT && OMPT_TRACE
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
-    ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
-        team_info->parallel_id, task_info->task_id, team_info->microtask);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.ompt_callback_work) {
+    ompt_callbacks.ompt_callback(ompt_callback_work)(
+        ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
+        &(task_info->task_data), trip_count, codeptr);
   }
 #endif
 
@@ -745,7 +769,12 @@ void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
                               kmp_int32 *pupper, kmp_int32 *pstride,
                               kmp_int32 incr, kmp_int32 chunk) {
   __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
-                                   pupper, pstride, incr, chunk);
+                                   pupper, pstride, incr, chunk
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+                                   ,
+                                   OMPT_GET_RETURN_ADDRESS(0)
+#endif
+                                       );
 }
 
 /*!
@@ -757,7 +786,12 @@ void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
                                kmp_int32 *pstride, kmp_int32 incr,
                                kmp_int32 chunk) {
   __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
-                                    pupper, pstride, incr, chunk);
+                                    pupper, pstride, incr, chunk
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+                                    ,
+                                    OMPT_GET_RETURN_ADDRESS(0)
+#endif
+                                        );
 }
 
 /*!
@@ -768,7 +802,12 @@ void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
                               kmp_int64 *pupper, kmp_int64 *pstride,
                               kmp_int64 incr, kmp_int64 chunk) {
   __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
-                                   pupper, pstride, incr, chunk);
+                                   pupper, pstride, incr, chunk
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+                                   ,
+                                   OMPT_GET_RETURN_ADDRESS(0)
+#endif
+                                       );
 }
 
 /*!
@@ -780,7 +819,12 @@ void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
                                kmp_int64 *pstride, kmp_int64 incr,
                                kmp_int64 chunk) {
   __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
-                                    pupper, pstride, incr, chunk);
+                                    pupper, pstride, incr, chunk
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+                                    ,
+                                    OMPT_GET_RETURN_ADDRESS(0)
+#endif
+                                        );
 }
 /*!
 @}
index b4328e7..ee30763 100644 (file)
@@ -334,13 +334,11 @@ static void __kmp_stg_parse_size(char const *name, char const *value,
   }
 } // __kmp_stg_parse_size
 
-#if KMP_AFFINITY_SUPPORTED
 static void __kmp_stg_parse_str(char const *name, char const *value,
                                 char const **out) {
   __kmp_str_free(out);
   *out = __kmp_str_format("%s", value);
 } // __kmp_stg_parse_str
-#endif
 
 static void __kmp_stg_parse_int(
     char const
@@ -4354,7 +4352,29 @@ static void __kmp_stg_print_omp_cancellation(kmp_str_buf_t *buffer,
 
 #endif
 
-// -----------------------------------------------------------------------------
+#if OMP_50_ENABLED && OMPT_SUPPORT
+
+static void __kmp_stg_parse_omp_tool_libraries(char const *name,
+                                               char const *value, void *data) {
+  __kmp_stg_parse_str(name, value, &__kmp_tool_libraries);
+} // __kmp_stg_parse_omp_tool_libraries
+
+static void __kmp_stg_print_omp_tool_libraries(kmp_str_buf_t *buffer,
+                                               char const *name, void *data) {
+  if (__kmp_tool_libraries)
+    __kmp_stg_print_str(buffer, name, __kmp_tool_libraries);
+  else {
+    if (__kmp_env_format) {
+      KMP_STR_BUF_PRINT_NAME;
+    } else {
+      __kmp_str_buf_print(buffer, "   %s", name);
+    }
+    __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined));
+  }
+} // __kmp_stg_print_omp_tool_libraries
+
+#endif
+
 // Table.
 
 static kmp_setting_t __kmp_stg_table[] = {
@@ -4598,6 +4618,12 @@ static kmp_setting_t __kmp_stg_table[] = {
     {"OMP_CANCELLATION", __kmp_stg_parse_omp_cancellation,
      __kmp_stg_print_omp_cancellation, NULL, 0, 0},
 #endif
+
+#if OMP_50_ENABLED && OMPT_SUPPORT
+    {"OMP_TOOL_LIBRARIES", __kmp_stg_parse_omp_tool_libraries,
+     __kmp_stg_print_omp_tool_libraries, NULL, 0, 0},
+#endif
+
     {"", NULL, NULL, NULL, 0, 0}}; // settings
 
 static int const __kmp_stg_count =
index 91a18d0..08ebcfe 100644 (file)
@@ -16,6 +16,9 @@
 #include "kmp.h"
 #include "kmp_io.h"
 #include "kmp_wait_release.h"
+#if OMPT_SUPPORT
+#include "ompt-specific.h"
+#endif
 
 #if OMP_40_ENABLED
 
@@ -217,18 +220,19 @@ static inline void __kmp_track_dependence(kmp_depnode_t *source,
                task_source->td_ident->psource, sink->dn.id,
                task_sink->td_ident->psource);
 #endif
-#if OMPT_SUPPORT && OMPT_TRACE
-  // OMPT tracks dependences between task (a=source, b=sink) in which
-  // task a blocks the execution of b through the ompt_new_dependence_callback
-  if (ompt_enabled &&
-      ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  /* OMPT tracks dependences between task (a=source, b=sink) in which
+     task a blocks the execution of b through the ompt_new_dependence_callback
+     */
+  if (ompt_enabled.ompt_callback_task_dependence) {
     kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
     kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
 
-    ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)(
-        task_source->ompt_task_info.task_id, task_sink->ompt_task_info.task_id);
+    ompt_callbacks.ompt_callback(ompt_callback_task_dependence)(
+        &(task_source->ompt_task_info.task_data),
+        &(task_sink->ompt_task_info.task_data));
   }
-#endif /* OMPT_SUPPORT && OMPT_TRACE */
+#endif /* OMPT_SUPPORT && OMPT_OPTIONAL */
 }
 
 template <bool filter>
@@ -470,10 +474,29 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
   kmp_info_t *thread = __kmp_threads[gtid];
   kmp_taskdata_t *current_task = thread->th.th_current_task;
 
-#if OMPT_SUPPORT && OMPT_TRACE
+#if OMPT_SUPPORT
+  OMPT_STORE_RETURN_ADDRESS(gtid);
+
+  if (ompt_enabled.enabled) {
+    if (ompt_enabled.ompt_callback_task_create) {
+      kmp_taskdata_t *parent = new_taskdata->td_parent;
+      ompt_data_t task_data = ompt_data_none;
+      ompt_callbacks.ompt_callback(ompt_callback_task_create)(
+          parent ? &(parent->ompt_task_info.task_data) : &task_data,
+          parent ? &(parent->ompt_task_info.frame) : NULL,
+          &(new_taskdata->ompt_task_info.task_data),
+          ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1,
+          OMPT_LOAD_RETURN_ADDRESS(gtid));
+    }
+
+    new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
+        OMPT_GET_FRAME_ADDRESS(0);
+  }
+
+#if OMPT_OPTIONAL
   /* OMPT grab all dependences if requested by the tool */
-  if (ompt_enabled && ndeps + ndeps_noalias > 0 &&
-      ompt_callbacks.ompt_callback(ompt_event_task_dependences)) {
+  if (ndeps + ndeps_noalias > 0 &&
+      ompt_enabled.ompt_callback_task_dependences) {
     kmp_int32 i;
 
     new_taskdata->ompt_task_info.ndeps = ndeps + ndeps_noalias;
@@ -509,8 +532,17 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
         new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags =
             ompt_task_dependence_type_in;
     }
+    ompt_callbacks.ompt_callback(ompt_callback_task_dependences)(
+        &(new_taskdata->ompt_task_info.task_data),
+        new_taskdata->ompt_task_info.deps, new_taskdata->ompt_task_info.ndeps);
+    /* We can now free the allocated memory for the dependencies */
+    /* For OMPD we might want to delay the free until task_end */
+    KMP_OMPT_DEPS_FREE(thread, new_taskdata->ompt_task_info.deps);
+    new_taskdata->ompt_task_info.deps = NULL;
+    new_taskdata->ompt_task_info.ndeps = 0;
   }
-#endif /* OMPT_SUPPORT && OMPT_TRACE */
+#endif /* OMPT_OPTIONAL */
+#endif /* OMPT_SUPPORT */
 
   bool serial = current_task->td_flags.team_serial ||
                 current_task->td_flags.tasking_ser ||
@@ -557,7 +589,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
                 "loc=%p task=%p, transferring to __kmpc_omp_task\n",
                 gtid, loc_ref, new_taskdata));
 
-  return __kmpc_omp_task(loc_ref, gtid, new_task);
+  return __kmp_omp_task(gtid, new_task, true);
 }
 
 /*!
index 373339e..7d148ff 100644 (file)
@@ -446,40 +446,78 @@ static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
 
   KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
 
+  return;
+}
+
 #if OMPT_SUPPORT
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
-    kmp_taskdata_t *parent = taskdata->td_parent;
-    ompt_callbacks.ompt_callback(ompt_event_task_begin)(
-        parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
-        parent ? &(parent->ompt_task_info.frame) : NULL,
-        taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.function);
-  }
-#endif
-#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
-  /* OMPT emit all dependences if requested by the tool */
-  if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
-      ompt_callbacks.ompt_callback(ompt_event_task_dependences)) {
-    ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
-        taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.deps,
-        taskdata->ompt_task_info.ndeps);
-    /* We can now free the allocated memory for the dependencies */
-    KMP_OMPT_DEPS_FREE(thread, taskdata->ompt_task_info.deps);
-    taskdata->ompt_task_info.deps = NULL;
-    taskdata->ompt_task_info.ndeps = 0;
-  }
-#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
+//------------------------------------------------------------------------------
+// __ompt_task_init:
+//   Initialize OMPT fields maintained by a task. This will only be called after
+//   ompt_start_tool, so we already know whether ompt is enabled or not.
+
+static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) {
+  // The calls to __ompt_task_init already have the ompt_enabled condition.
+  task->ompt_task_info.task_data.value = 0;
+  task->ompt_task_info.frame.exit_runtime_frame = NULL;
+  task->ompt_task_info.frame.reenter_runtime_frame = NULL;
+#if OMP_40_ENABLED
+  task->ompt_task_info.ndeps = 0;
+  task->ompt_task_info.deps = NULL;
+#endif /* OMP_40_ENABLED */
+}
 
-  return;
+// __ompt_task_start:
+//   Build and trigger task-begin event
+static inline void __ompt_task_start(kmp_task_t *task,
+                                     kmp_taskdata_t *current_task,
+                                     kmp_int32 gtid) {
+  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+  ompt_task_status_t status = ompt_task_others;
+  if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
+    status = ompt_task_yield;
+    __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
+  }
+  /* let OMPT know that we're about to run this task */
+  if (ompt_enabled.ompt_callback_task_schedule) {
+    ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
+        &(current_task->ompt_task_info.task_data), status,
+        &(taskdata->ompt_task_info.task_data));
+  }
+  taskdata->ompt_task_info.scheduling_parent = current_task;
 }
 
-// __kmpc_omp_task_begin_if0: report that a given serialized task has started
-// execution
-//
-// loc_ref: source location information; points to beginning of task block.
-// gtid: global thread number.
-// task: task thunk for the started task.
-void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
-                               kmp_task_t *task) {
+// __ompt_task_finish:
+//   Build and trigger final task-schedule event
+static inline void __ompt_task_finish(kmp_task_t *task,
+                                      kmp_taskdata_t *resumed_task) {
+  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+  ompt_task_status_t status = ompt_task_complete;
+  if (taskdata->td_flags.tiedness == TASK_UNTIED &&
+      KMP_TEST_THEN_ADD32(&(taskdata->td_untied_count), 0) > 1)
+    status = ompt_task_others;
+  if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
+      taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
+    status = ompt_task_cancel;
+  }
+
+  /* let OMPT know that we're returning to the callee task */
+  if (ompt_enabled.ompt_callback_task_schedule) {
+    ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
+        &(taskdata->ompt_task_info.task_data), status,
+        &((resumed_task ? resumed_task
+                        : (taskdata->ompt_task_info.scheduling_parent
+                               ? taskdata->ompt_task_info.scheduling_parent
+                               : taskdata->td_parent))
+              ->ompt_task_info.task_data));
+  }
+}
+#endif
+
+template <bool ompt>
+static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid,
+                                               kmp_task_t *task,
+                                               void *frame_address,
+                                               void *return_address) {
   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
   kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
 
@@ -500,10 +538,57 @@ void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
       1; // Execute this task immediately, not deferred.
   __kmp_task_start(gtid, task, current_task);
 
+#if OMPT_SUPPORT
+  if (ompt) {
+    if (current_task->ompt_task_info.frame.reenter_runtime_frame == NULL) {
+      current_task->ompt_task_info.frame.reenter_runtime_frame =
+          taskdata->ompt_task_info.frame.exit_runtime_frame = frame_address;
+    }
+    if (ompt_enabled.ompt_callback_task_create) {
+      ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
+      ompt_callbacks.ompt_callback(ompt_callback_task_create)(
+          &(parent_info->task_data), &(parent_info->frame),
+          &(taskdata->ompt_task_info.task_data),
+          ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0,
+          return_address);
+    }
+    __ompt_task_start(task, current_task, gtid);
+  }
+#endif // OMPT_SUPPORT
+
   KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
                 loc_ref, taskdata));
+}
 
-  return;
+#if OMPT_SUPPORT
+OMPT_NOINLINE
+static void __kmpc_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid,
+                                           kmp_task_t *task,
+                                           void *frame_address,
+                                           void *return_address) {
+  __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
+                                           return_address);
+}
+#endif // OMPT_SUPPORT
+
+// __kmpc_omp_task_begin_if0: report that a given serialized task has started
+// execution
+//
+// loc_ref: source location information; points to beginning of task block.
+// gtid: global thread number.
+// task: task thunk for the started task.
+void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
+                               kmp_task_t *task) {
+#if OMPT_SUPPORT
+  if (UNLIKELY(ompt_enabled.enabled)) {
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+    __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
+                                   OMPT_GET_FRAME_ADDRESS(1),
+                                   OMPT_LOAD_RETURN_ADDRESS(gtid));
+    return;
+  }
+#endif
+  __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
 }
 
 #ifdef TASK_UNUSED
@@ -623,14 +708,6 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
       thread->th.th_task_team; // might be NULL for serial teams...
   kmp_int32 children = 0;
 
-#if OMPT_SUPPORT
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_end)) {
-    kmp_taskdata_t *parent = taskdata->td_parent;
-    ompt_callbacks.ompt_callback(ompt_event_task_end)(
-        taskdata->ompt_task_info.task_id);
-  }
-#endif
-
   KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
                 "task %p\n",
                 gtid, taskdata, resumed_task));
@@ -760,13 +837,10 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
   return;
 }
 
-// __kmpc_omp_task_complete_if0: report that a task has completed execution
-//
-// loc_ref: source location information; points to end of task block.
-// gtid: global thread number.
-// task: task thunk for the completed task.
-void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
-                                  kmp_task_t *task) {
+template <bool ompt>
+static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref,
+                                                  kmp_int32 gtid,
+                                                  kmp_task_t *task) {
   KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
                 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
   // this routine will provide task to resume
@@ -774,9 +848,43 @@ void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
 
   KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
                 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
+
+#if OMPT_SUPPORT
+  if (ompt) {
+    __ompt_task_finish(task, NULL);
+    ompt_frame_t *ompt_frame;
+    __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+    ompt_frame->reenter_runtime_frame = NULL;
+  }
+#endif
+
   return;
 }
 
+#if OMPT_SUPPORT
+OMPT_NOINLINE
+void __kmpc_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid,
+                                       kmp_task_t *task) {
+  __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
+}
+#endif // OMPT_SUPPORT
+
+// __kmpc_omp_task_complete_if0: report that a task has completed execution
+//
+// loc_ref: source location information; points to end of task block.
+// gtid: global thread number.
+// task: task thunk for the completed task.
+void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
+                                  kmp_task_t *task) {
+#if OMPT_SUPPORT
+  if (UNLIKELY(ompt_enabled.enabled)) {
+    __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
+    return;
+  }
+#endif
+  __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
+}
+
 #ifdef TASK_UNUSED
 // __kmpc_omp_task_complete: report that a task has completed execution
 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
@@ -793,25 +901,6 @@ void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
 }
 #endif // TASK_UNUSED
 
-#if OMPT_SUPPORT
-// __kmp_task_init_ompt: Initialize OMPT fields maintained by a task. This will
-//  only be called after ompt_tool, so we already know whether ompt is enabled
-// or not.
-static inline void __kmp_task_init_ompt(kmp_taskdata_t *task, int tid,
-                                        void *function) {
-  if (ompt_enabled) {
-    task->ompt_task_info.task_id = __ompt_task_id_new(tid);
-    task->ompt_task_info.function = function;
-    task->ompt_task_info.frame.exit_runtime_frame = NULL;
-    task->ompt_task_info.frame.reenter_runtime_frame = NULL;
-#if OMP_40_ENABLED
-    task->ompt_task_info.ndeps = 0;
-    task->ompt_task_info.deps = NULL;
-#endif /* OMP_40_ENABLED */
-  }
-}
-#endif
-
 // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit
 // task for a given thread
 //
@@ -876,7 +965,8 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
   }
 
 #if OMPT_SUPPORT
-  __kmp_task_init_ompt(task, tid, NULL);
+  if (UNLIKELY(ompt_enabled.enabled))
+    __ompt_task_init(task, tid);
 #endif
 
   KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
@@ -1121,7 +1211,8 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
   ANNOTATE_HAPPENS_BEFORE(task);
 
 #if OMPT_SUPPORT
-  __kmp_task_init_ompt(taskdata, gtid, (void *)task_entry);
+  if (UNLIKELY(ompt_enabled.enabled))
+    __ompt_task_init(taskdata, gtid);
 #endif
 
   return task;
@@ -1207,7 +1298,7 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
   if (taskdata->td_flags.proxy != TASK_PROXY) {
 #endif
     ANNOTATE_HAPPENS_AFTER(task);
-    __kmp_task_start(gtid, task, current_task);
+    __kmp_task_start(gtid, task, current_task); // OMPT only if not discarded
 #if OMP_45_ENABLED
   }
 #endif
@@ -1215,14 +1306,16 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
 #if OMPT_SUPPORT
   ompt_thread_info_t oldInfo;
   kmp_info_t *thread;
-  if (ompt_enabled) {
+  if (UNLIKELY(ompt_enabled.enabled)) {
     // Store the threads states and restore them after the task
     thread = __kmp_threads[gtid];
     oldInfo = thread->th.ompt_thread_info;
     thread->th.ompt_thread_info.wait_id = 0;
-    thread->th.ompt_thread_info.state = ompt_state_work_parallel;
+    thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
+                                            ? omp_state_work_serial
+                                            : omp_state_work_parallel;
     taskdata->ompt_task_info.frame.exit_runtime_frame =
-        __builtin_frame_address(0);
+        OMPT_GET_FRAME_ADDRESS(0);
   }
 #endif
 
@@ -1236,6 +1329,18 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
     kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
     if ((taskgroup && taskgroup->cancel_request) ||
         (this_team->t.t_cancel_request == cancel_parallel)) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+      ompt_data_t *task_data;
+      if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
+        __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
+        ompt_callbacks.ompt_callback(ompt_callback_cancel)(
+            task_data,
+            ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
+                                                      : ompt_cancel_parallel) |
+                ompt_cancel_discarded_task,
+            NULL);
+      }
+#endif
       KMP_COUNT_BLOCK(TASK_cancelled);
       // this task belongs to a task group and we need to cancel it
       discard = 1 /* true */;
@@ -1270,13 +1375,10 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
 #endif // KMP_STATS_ENABLED
 #endif // OMP_40_ENABLED
 
-#if OMPT_SUPPORT && OMPT_TRACE
-    /* let OMPT know that we're about to run this task */
-    if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) {
-      ompt_callbacks.ompt_callback(ompt_event_task_switch)(
-          current_task->ompt_task_info.task_id,
-          taskdata->ompt_task_info.task_id);
-    }
+// OMPT task begin
+#if OMPT_SUPPORT
+    if (UNLIKELY(ompt_enabled.enabled))
+      __ompt_task_start(task, current_task, gtid);
 #endif
 
 #ifdef KMP_GOMP_COMPAT
@@ -1289,21 +1391,16 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
     }
     KMP_POP_PARTITIONED_TIMER();
 
-#if OMPT_SUPPORT && OMPT_TRACE
-    /* let OMPT know that we're returning to the callee task */
-    if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) {
-      ompt_callbacks.ompt_callback(ompt_event_task_switch)(
-          taskdata->ompt_task_info.task_id,
-          current_task->ompt_task_info.task_id);
-    }
+#if OMPT_SUPPORT
+    if (UNLIKELY(ompt_enabled.enabled))
+      __ompt_task_finish(task, current_task);
 #endif
-
 #if OMP_40_ENABLED
   }
 #endif // OMP_40_ENABLED
 
 #if OMPT_SUPPORT
-  if (ompt_enabled) {
+  if (UNLIKELY(ompt_enabled.enabled)) {
     thread->th.ompt_thread_info = oldInfo;
     taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
   }
@@ -1314,7 +1411,7 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
   if (taskdata->td_flags.proxy != TASK_PROXY) {
 #endif
     ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
-    __kmp_task_finish(gtid, task, current_task);
+    __kmp_task_finish(gtid, task, current_task); // OMPT only if not discarded
 #if OMP_45_ENABLED
   }
 #endif
@@ -1352,6 +1449,21 @@ kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
   KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
                 loc_ref, new_taskdata));
 
+#if OMPT_SUPPORT
+  kmp_taskdata_t *parent;
+  if (UNLIKELY(ompt_enabled.enabled)) {
+    parent = new_taskdata->td_parent;
+    if (ompt_enabled.ompt_callback_task_create) {
+      ompt_data_t task_data = ompt_data_none;
+      ompt_callbacks.ompt_callback(ompt_callback_task_create)(
+          parent ? &(parent->ompt_task_info.task_data) : &task_data,
+          parent ? &(parent->ompt_task_info.frame) : NULL,
+          &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
+          OMPT_GET_RETURN_ADDRESS(0));
+    }
+  }
+#endif
+
   /* Should we execute the new task or queue it? For now, let's just always try
      to queue it.  If the queue fills up, then we'll execute it.  */
 
@@ -1369,6 +1481,11 @@ kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
        gtid, loc_ref, new_taskdata));
 
   ANNOTATE_HAPPENS_BEFORE(new_task);
+#if OMPT_SUPPORT
+  if (UNLIKELY(ompt_enabled.enabled)) {
+    parent->ompt_task_info.frame.reenter_runtime_frame = NULL;
+  }
+#endif
   return TASK_CURRENT_NOT_QUEUED;
 }
 
@@ -1387,13 +1504,6 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
                          bool serialize_immediate) {
   kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
 
-#if OMPT_SUPPORT
-  if (ompt_enabled) {
-    new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
-        __builtin_frame_address(1);
-  }
-#endif
-
 /* Should we execute the new task or queue it? For now, let's just always try to
    queue it.  If the queue fills up, then we'll execute it.  */
 #if OMP_45_ENABLED
@@ -1409,12 +1519,6 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
     __kmp_invoke_task(gtid, new_task, current_task);
   }
 
-#if OMPT_SUPPORT
-  if (ompt_enabled) {
-    new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
-  }
-#endif
-
   ANNOTATE_HAPPENS_BEFORE(new_task);
   return TASK_CURRENT_NOT_QUEUED;
 }
@@ -1436,23 +1540,49 @@ kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
   kmp_int32 res;
   KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
 
-#if KMP_DEBUG
+#if KMP_DEBUG || OMPT_SUPPORT
   kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
 #endif
   KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
                 new_taskdata));
 
+#if OMPT_SUPPORT
+  kmp_taskdata_t *parent = NULL;
+  if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+    parent = new_taskdata->td_parent;
+    if (!parent->ompt_task_info.frame.reenter_runtime_frame)
+      parent->ompt_task_info.frame.reenter_runtime_frame =
+          OMPT_GET_FRAME_ADDRESS(1);
+    if (ompt_enabled.ompt_callback_task_create) {
+      ompt_data_t task_data = ompt_data_none;
+      ompt_callbacks.ompt_callback(ompt_callback_task_create)(
+          parent ? &(parent->ompt_task_info.task_data) : &task_data,
+          parent ? &(parent->ompt_task_info.frame) : NULL,
+          &(new_taskdata->ompt_task_info.task_data),
+          ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
+          OMPT_LOAD_RETURN_ADDRESS(gtid));
+    }
+  }
+#endif
+
   res = __kmp_omp_task(gtid, new_task, true);
 
   KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "
                 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
                 gtid, loc_ref, new_taskdata));
+#if OMPT_SUPPORT
+  if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
+    parent->ompt_task_info.frame.reenter_runtime_frame = NULL;
+  }
+#endif
   return res;
 }
 
-// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are
-// complete
-kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
+template <bool ompt>
+static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
+                                              void *frame_address,
+                                              void *return_address) {
   kmp_taskdata_t *taskdata;
   kmp_info_t *thread;
   int thread_finished = FALSE;
@@ -1463,23 +1593,30 @@ kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
   if (__kmp_tasking_mode != tskm_immediate_exec) {
     thread = __kmp_threads[gtid];
     taskdata = thread->th.th_current_task;
-#if OMPT_SUPPORT && OMPT_TRACE
-    ompt_task_id_t my_task_id;
-    ompt_parallel_id_t my_parallel_id;
-
-    if (ompt_enabled) {
-      kmp_team_t *team = thread->th.th_team;
-      my_task_id = taskdata->ompt_task_info.task_id;
-      my_parallel_id = team->t.ompt_team_info.parallel_id;
-
-      taskdata->ompt_task_info.frame.reenter_runtime_frame =
-          __builtin_frame_address(1);
-      if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
-        ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(my_parallel_id,
-                                                                my_task_id);
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    ompt_data_t *my_task_data;
+    ompt_data_t *my_parallel_data;
+
+    if (ompt) {
+      my_task_data = &(taskdata->ompt_task_info.task_data);
+      my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
+
+      taskdata->ompt_task_info.frame.reenter_runtime_frame = frame_address;
+
+      if (ompt_enabled.ompt_callback_sync_region) {
+        ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
+            ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
+            my_task_data, return_address);
+      }
+
+      if (ompt_enabled.ompt_callback_sync_region_wait) {
+        ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
+            ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
+            my_task_data, return_address);
       }
     }
-#endif
+#endif // OMPT_SUPPORT && OMPT_OPTIONAL
 
 // Debugger: The taskwait is active. Store location and thread encountered the
 // taskwait.
@@ -1522,15 +1659,22 @@ kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
     // negated.
     taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
 
-#if OMPT_SUPPORT && OMPT_TRACE
-    if (ompt_enabled) {
-      if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
-        ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(my_parallel_id,
-                                                              my_task_id);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    if (ompt) {
+      if (ompt_enabled.ompt_callback_sync_region_wait) {
+        ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
+            ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
+            my_task_data, return_address);
+      }
+      if (ompt_enabled.ompt_callback_sync_region) {
+        ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
+            ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
+            my_task_data, return_address);
       }
       taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
     }
-#endif
+#endif // OMPT_SUPPORT && OMPT_OPTIONAL
+
     ANNOTATE_HAPPENS_AFTER(taskdata);
   }
 
@@ -1541,6 +1685,29 @@ kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
   return TASK_CURRENT_NOT_QUEUED;
 }
 
+#if OMPT_SUPPORT
+OMPT_NOINLINE
+static kmp_int32 __kmpc_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid,
+                                          void *frame_address,
+                                          void *return_address) {
+  return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
+                                            return_address);
+}
+#endif // OMPT_SUPPORT
+
+// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are
+// complete
+kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (UNLIKELY(ompt_enabled.enabled)) {
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+    return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(1),
+                                    OMPT_LOAD_RETURN_ADDRESS(gtid));
+  }
+#endif
+  return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
+}
+
 // __kmpc_omp_taskyield: switch to a different task
 kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
   kmp_taskdata_t *taskdata;
@@ -1575,10 +1742,18 @@ kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
       kmp_task_team_t *task_team = thread->th.th_task_team;
       if (task_team != NULL) {
         if (KMP_TASKING_ENABLED(task_team)) {
+#if OMPT_SUPPORT
+          if (UNLIKELY(ompt_enabled.enabled))
+            thread->th.ompt_thread_info.ompt_task_yielded = 1;
+#endif
           __kmp_execute_tasks_32(
               thread, gtid, NULL, FALSE,
               &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
               __kmp_task_stealing_constraint);
+#if OMPT_SUPPORT
+          if (UNLIKELY(ompt_enabled.enabled))
+            thread->th.ompt_thread_info.ompt_task_yielded = 0;
+#endif
         }
       }
     }
@@ -1809,6 +1984,22 @@ void __kmpc_taskgroup(ident_t *loc, int gtid) {
   tg_new->reduce_num_data = 0;
 #endif
   taskdata->td_taskgroup = tg_new;
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
+    void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+    if (!codeptr)
+      codeptr = OMPT_GET_RETURN_ADDRESS(0);
+    kmp_team_t *team = thread->th.th_team;
+    ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
+    // FIXME: I think this is wrong for lwt!
+    ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
+
+    ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
+        ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
+        &(my_task_data), codeptr);
+  }
+#endif
 }
 
 // __kmpc_end_taskgroup: Wait until all tasks generated by the current task
@@ -1819,6 +2010,22 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
   kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
   int thread_finished = FALSE;
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  kmp_team_t *team;
+  ompt_data_t my_task_data;
+  ompt_data_t my_parallel_data;
+  void *codeptr;
+  if (UNLIKELY(ompt_enabled.enabled)) {
+    team = thread->th.th_team;
+    my_task_data = taskdata->ompt_task_info.task_data;
+    // FIXME: I think this is wrong for lwt!
+    my_parallel_data = team->t.ompt_team_info.parallel_data;
+    codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
+    if (!codeptr)
+      codeptr = OMPT_GET_RETURN_ADDRESS(0);
+  }
+#endif
+
   KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
   KMP_DEBUG_ASSERT(taskgroup != NULL);
   KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
@@ -1832,6 +2039,14 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
       __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
 #endif /* USE_ITT_BUILD */
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
+      ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
+          ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
+          &(my_task_data), codeptr);
+    }
+#endif
+
 #if OMP_45_ENABLED
     if (!taskdata->td_flags.team_serial ||
         (thread->th.th_task_team != NULL &&
@@ -1848,6 +2063,14 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
       }
     }
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
+      ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
+          ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
+          &(my_task_data), codeptr);
+    }
+#endif
+
 #if USE_ITT_BUILD
     if (itt_sync_obj != NULL)
       __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
@@ -1867,6 +2090,14 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
   KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
                 gtid, taskdata));
   ANNOTATE_HAPPENS_AFTER(taskdata);
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
+    ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
+        ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
+        &(my_task_data), codeptr);
+  }
+#endif
 }
 #endif
 
@@ -3255,8 +3486,8 @@ kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
            ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
             thread, taskdata, taskdata->td_parent));
 #if OMPT_SUPPORT
-  __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid,
-                       (void *)task->routine);
+  if (UNLIKELY(ompt_enabled.enabled))
+    __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
 #endif
   return task;
 }
@@ -3539,8 +3770,22 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
                 "grain %llu(%d), dup %p\n",
                 gtid, taskdata, *lb, *ub, st, grainsize, sched, task_dup));
 
-  if (nogroup == 0)
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+  if (ompt_enabled.ompt_callback_work) {
+    ompt_callbacks.ompt_callback(ompt_callback_work)(
+        ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
+        &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
+  }
+#endif
+
+  if (nogroup == 0) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
     __kmpc_taskgroup(loc, gtid);
+  }
 
   // =========================================================================
   // calculate loop parameters
@@ -3614,6 +3859,9 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
   if (if_val == 0) { // if(0) specified, mark task as serial
     taskdata->td_flags.task_serial = 1;
     taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
     // always start serial tasks linearly
     __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
                           grainsize, extras, tc, task_dup);
@@ -3621,18 +3869,35 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
     KA_TRACE(20, ("__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
                   "(%lld), grain %llu, extras %llu\n",
                   gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
     __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
                          grainsize, extras, tc, num_tasks_min, task_dup);
   } else {
     KA_TRACE(20, ("__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
                   "(%lld), grain %llu, extras %llu\n",
                   gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
     __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
                           grainsize, extras, tc, task_dup);
   }
 
-  if (nogroup == 0)
+  if (nogroup == 0) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
     __kmpc_end_taskgroup(loc, gtid);
+  }
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.ompt_callback_work) {
+    ompt_callbacks.ompt_callback(ompt_callback_work)(
+        ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
+        &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
+  }
+#endif
   KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid));
 }
 
index 2ecacef..1f519d5 100644 (file)
@@ -17,6 +17,9 @@
 #include "kmp.h"
 #include "kmp_itt.h"
 #include "kmp_stats.h"
+#if OMPT_SUPPORT
+#include "ompt-specific.h"
+#endif
 
 /*!
 @defgroup WAIT_RELEASE Wait/Release operations
@@ -85,6 +88,44 @@ public:
   */
 };
 
+#if OMPT_SUPPORT
+static inline void __ompt_implicit_task_end(kmp_info_t *this_thr,
+                                            omp_state_t omp_state,
+                                            ompt_data_t *tId,
+                                            ompt_data_t *pId) {
+  int ds_tid = this_thr->th.th_info.ds.ds_tid;
+  if (omp_state == omp_state_wait_barrier_implicit) {
+    this_thr->th.ompt_thread_info.state = omp_state_overhead;
+#if OMPT_OPTIONAL
+    void *codeptr = NULL;
+    if (ompt_enabled.ompt_callback_sync_region_wait) {
+      ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
+          ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
+    }
+    if (ompt_enabled.ompt_callback_sync_region) {
+      ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
+          ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
+    }
+#endif
+    if (!KMP_MASTER_TID(ds_tid)) {
+      if (ompt_enabled.ompt_callback_implicit_task) {
+        ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+            ompt_scope_end, NULL, tId, 0, ds_tid);
+      }
+#if OMPT_OPTIONAL
+      if (ompt_enabled.ompt_callback_idle) {
+        ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin);
+      }
+#endif
+      // return to idle state
+      this_thr->th.ompt_thread_info.state = omp_state_idle;
+    } else {
+      this_thr->th.ompt_thread_info.state = omp_state_overhead;
+    }
+  }
+}
+#endif
+
 /* Spin wait loop that first does pause, then yield, then sleep. A thread that
    calls __kmp_wait_*  must make certain that another thread calls __kmp_release
    to wake it back up to prevent deadlocks!  */
@@ -116,30 +157,88 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag,
   stats_state_e thread_state = KMP_GET_THREAD_STATE();
 #endif
 
-#if OMPT_SUPPORT && OMPT_BLAME
-  ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
-  if (ompt_enabled && ompt_state != ompt_state_undefined) {
-    if (ompt_state == ompt_state_idle) {
-      if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
-        ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
-      }
-    } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
-      KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
-                       ompt_state == ompt_state_wait_barrier_implicit ||
-                       ompt_state == ompt_state_wait_barrier_explicit);
-
+/* OMPT Behavior:
+THIS function is called from
+  __kmp_barrier (2 times)  (implicit or explicit barrier in parallel regions)
+            these have join / fork behavior
+
+       In these cases, we don't change the state or trigger events in THIS
+function.
+       Events are triggered in the calling code (__kmp_barrier):
+
+                state := omp_state_overhead
+            barrier-begin
+            barrier-wait-begin
+                state := omp_state_wait_barrier
+          call join-barrier-implementation (finally arrive here)
+          {}
+          call fork-barrier-implementation (finally arrive here)
+          {}
+                state := omp_state_overhead
+            barrier-wait-end
+            barrier-end
+                state := omp_state_work_parallel
+
+
+  __kmp_fork_barrier  (after thread creation, before executing implicit task)
+          call fork-barrier-implementation (finally arrive here)
+          {} // worker arrive here with state = omp_state_idle
+
+
+  __kmp_join_barrier  (implicit barrier at end of parallel region)
+                state := omp_state_barrier_implicit
+            barrier-begin
+            barrier-wait-begin
+          call join-barrier-implementation (finally arrive here
+final_spin=FALSE)
+          {
+          }
+  __kmp_fork_barrier  (implicit barrier at end of parallel region)
+          call fork-barrier-implementation (finally arrive here final_spin=TRUE)
+
+       Worker after task-team is finished:
+            barrier-wait-end
+            barrier-end
+            implicit-task-end
+            idle-begin
+                state := omp_state_idle
+
+       Before leaving, if state = omp_state_idle
+            idle-end
+                state := omp_state_overhead
+*/
+#if OMPT_SUPPORT
+  omp_state_t ompt_entry_state;
+  ompt_data_t *pId = NULL;
+  ompt_data_t *tId;
+  if (ompt_enabled.enabled) {
+    ompt_entry_state = this_thr->th.ompt_thread_info.state;
+    if (!final_spin || ompt_entry_state != omp_state_wait_barrier_implicit ||
+        KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
       ompt_lw_taskteam_t *team =
           this_thr->th.th_team->t.ompt_serialized_team_info;
-      ompt_parallel_id_t pId;
-      ompt_task_id_t tId;
       if (team) {
-        pId = team->ompt_team_info.parallel_id;
-        tId = team->ompt_task_info.task_id;
+        pId = &(team->ompt_team_info.parallel_data);
+        tId = &(team->ompt_task_info.task_data);
       } else {
-        pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
-        tId = this_thr->th.th_current_task->ompt_task_info.task_id;
+        pId = OMPT_CUR_TEAM_DATA(this_thr);
+        tId = OMPT_CUR_TASK_DATA(this_thr);
+      }
+    } else {
+      pId = NULL;
+      tId = &(this_thr->th.ompt_thread_info.task_data);
+    }
+#if OMPT_OPTIONAL
+    if (ompt_entry_state == omp_state_idle) {
+      if (ompt_enabled.ompt_callback_idle) {
+        ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin);
       }
-      ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
+    } else
+#endif
+        if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
+                           this_thr->th.th_task_team == NULL)) {
+      // implicit task is done. Either no taskqueue, or task-team finished
+      __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
     }
   }
 #endif
@@ -206,6 +305,11 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag,
             this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
         } else {
           KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
+#if OMPT_SUPPORT
+          // task-team is done now, other cases should be catched above
+          if (final_spin && ompt_enabled.enabled)
+            __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
+#endif
           this_thr->th.th_task_team = NULL;
           this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
         }
@@ -293,29 +397,22 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag,
     // TODO: If thread is done with work and times out, disband/free
   }
 
-#if OMPT_SUPPORT && OMPT_BLAME
-  if (ompt_enabled && ompt_state != ompt_state_undefined) {
-    if (ompt_state == ompt_state_idle) {
-      if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
-        ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
-      }
-    } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
-      KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
-                       ompt_state == ompt_state_wait_barrier_implicit ||
-                       ompt_state == ompt_state_wait_barrier_explicit);
-
-      ompt_lw_taskteam_t *team =
-          this_thr->th.th_team->t.ompt_serialized_team_info;
-      ompt_parallel_id_t pId;
-      ompt_task_id_t tId;
-      if (team) {
-        pId = team->ompt_team_info.parallel_id;
-        tId = team->ompt_task_info.task_id;
-      } else {
-        pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
-        tId = this_thr->th.th_current_task->ompt_task_info.task_id;
+#if OMPT_SUPPORT
+  omp_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
+  if (ompt_enabled.enabled && ompt_exit_state != omp_state_undefined) {
+#if OMPT_OPTIONAL
+    if (final_spin) {
+      __ompt_implicit_task_end(this_thr, ompt_exit_state, tId, pId);
+      ompt_exit_state = this_thr->th.ompt_thread_info.state;
+    }
+#endif
+    if (ompt_exit_state == omp_state_idle) {
+#if OMPT_OPTIONAL
+      if (ompt_enabled.ompt_callback_idle) {
+        ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_end);
       }
-      ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
+#endif
+      this_thr->th.ompt_thread_info.state = omp_state_overhead;
     }
   }
 #endif
index baeb67c..5f9a980 100644 (file)
  | the OMPT TR. They are exposed to tools through ompt_set_callback.
  +--------------------------------------------------------------------------*/
 
-#define ompt_event_NEVER ompt_set_result_event_never_occurs
-#define ompt_event_UNIMPLEMENTED ompt_set_result_event_may_occur_no_callback
-#define ompt_event_MAY_CONVENIENT ompt_set_result_event_may_occur_callback_some
-#define ompt_event_MAY_ALWAYS ompt_set_result_event_may_occur_callback_always
+#define ompt_event_UNIMPLEMENTED ompt_set_never
+#define ompt_event_MAY_CONVENIENT ompt_set_sometimes
+#define ompt_event_MAY_ALWAYS ompt_set_always
 
-#if OMPT_TRACE
-#define ompt_event_MAY_ALWAYS_TRACE ompt_event_MAY_ALWAYS
+#if OMPT_OPTIONAL
+#define ompt_event_MAY_ALWAYS_OPTIONAL ompt_event_MAY_ALWAYS
 #else
-#define ompt_event_MAY_ALWAYS_TRACE ompt_event_UNIMPLEMENTED
-#endif
-
-#if OMPT_BLAME
-#define ompt_event_MAY_ALWAYS_BLAME ompt_event_MAY_ALWAYS
-#else
-#define ompt_event_MAY_ALWAYS_BLAME ompt_event_UNIMPLEMENTED
+#define ompt_event_MAY_ALWAYS_OPTIONAL ompt_event_UNIMPLEMENTED
 #endif
 
 /*----------------------------------------------------------------------------
  | Mandatory Events
  +--------------------------------------------------------------------------*/
 
-#define ompt_event_parallel_begin_implemented ompt_event_MAY_ALWAYS
-#define ompt_event_parallel_end_implemented ompt_event_MAY_ALWAYS
+#define ompt_callback_parallel_begin_implemented ompt_event_MAY_ALWAYS
+#define ompt_callback_parallel_end_implemented ompt_event_MAY_ALWAYS
 
-#define ompt_event_task_begin_implemented ompt_event_MAY_ALWAYS
-#define ompt_event_task_end_implemented ompt_event_MAY_ALWAYS
+#define ompt_callback_task_create_implemented ompt_event_MAY_ALWAYS
+#define ompt_callback_task_schedule_implemented ompt_event_MAY_ALWAYS
 
-#define ompt_event_thread_begin_implemented ompt_event_MAY_ALWAYS
-#define ompt_event_thread_end_implemented ompt_event_MAY_ALWAYS
+#define ompt_callback_thread_begin_implemented ompt_event_MAY_ALWAYS
+#define ompt_callback_thread_end_implemented ompt_event_MAY_ALWAYS
 
-#define ompt_event_control_implemented ompt_event_MAY_ALWAYS
+#define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS
 
-#define ompt_event_runtime_shutdown_implemented ompt_event_MAY_ALWAYS
+#define ompt_callback_implicit_task_implemented ompt_event_MAY_ALWAYS
 
 /*----------------------------------------------------------------------------
- | Optional Events (blame shifting)
+ | Target Related Events (not yet implemented)
  +--------------------------------------------------------------------------*/
 
-#define ompt_event_idle_begin_implemented ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_idle_end_implemented ompt_event_MAY_ALWAYS_BLAME
-
-#define ompt_event_wait_barrier_begin_implemented ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_wait_barrier_end_implemented ompt_event_MAY_ALWAYS_BLAME
+#define ompt_callback_target_implemented ompt_event_UNIMPLEMENTED
+#define ompt_callback_target_data_op_implemented ompt_event_UNIMPLEMENTED
+#define ompt_callback_target_submit_implemented ompt_event_UNIMPLEMENTED
+#define ompt_callback_device_initialize_implemented ompt_event_UNIMPLEMENTED
+#define ompt_callback_device_finalize_implemented ompt_event_UNIMPLEMENTED
 
-#define ompt_event_wait_taskwait_begin_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_taskwait_end_implemented ompt_event_UNIMPLEMENTED
-
-#define ompt_event_wait_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_taskgroup_end_implemented ompt_event_UNIMPLEMENTED
-
-#define ompt_event_release_lock_implemented ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_release_nest_lock_last_implemented                          \
-  ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_release_critical_implemented ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_release_atomic_implemented ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_release_ordered_implemented ompt_event_MAY_ALWAYS_BLAME
+#define ompt_callback_target_map_implemented ompt_event_UNIMPLEMENTED
 
 /*----------------------------------------------------------------------------
- | Optional Events (synchronous events)
+ | Optional Events (blame shifting)
  +--------------------------------------------------------------------------*/
 
-#define ompt_event_implicit_task_begin_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_implicit_task_end_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_callback_idle_implemented ompt_event_MAY_ALWAYS_OPTIONAL
 
-#define ompt_event_initial_task_begin_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_initial_task_end_implemented ompt_event_UNIMPLEMENTED
+#define ompt_callback_sync_region_wait_implemented                             \
+  ompt_event_MAY_ALWAYS_OPTIONAL
 
-#define ompt_event_task_switch_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_callback_mutex_released_implemented ompt_event_MAY_ALWAYS_OPTIONAL
 
-#define ompt_event_loop_begin_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_loop_end_implemented ompt_event_MAY_ALWAYS_TRACE
-
-#define ompt_event_sections_begin_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_sections_end_implemented ompt_event_UNIMPLEMENTED
-
-#define ompt_event_single_in_block_begin_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_single_in_block_end_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_single_others_begin_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_single_others_end_implemented ompt_event_MAY_ALWAYS_TRACE
-
-#define ompt_event_workshare_begin_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_workshare_end_implemented ompt_event_UNIMPLEMENTED
-
-#define ompt_event_master_begin_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_master_end_implemented ompt_event_MAY_ALWAYS_TRACE
-
-#define ompt_event_barrier_begin_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_barrier_end_implemented ompt_event_MAY_ALWAYS_TRACE
+/*----------------------------------------------------------------------------
+ | Optional Events (synchronous events)
+ +--------------------------------------------------------------------------*/
 
-#define ompt_event_taskwait_begin_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_taskwait_end_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_callback_work_implemented ompt_event_MAY_ALWAYS_OPTIONAL
 
-#define ompt_event_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_taskgroup_end_implemented ompt_event_UNIMPLEMENTED
+#define ompt_callback_master_implemented ompt_event_MAY_ALWAYS_OPTIONAL
 
-#define ompt_event_release_nest_lock_prev_implemented                          \
-  ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_wait_lock_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_nest_lock_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_critical_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_atomic_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_wait_ordered_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_callback_sync_region_implemented ompt_event_MAY_ALWAYS_OPTIONAL
 
-#define ompt_event_acquired_lock_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_acquired_nest_lock_first_implemented                        \
-  ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_acquired_nest_lock_next_implemented                         \
-  ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_acquired_critical_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_acquired_atomic_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_acquired_ordered_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_callback_mutex_acquire_implemented ompt_event_MAY_ALWAYS_OPTIONAL
+#define ompt_callback_mutex_acquired_implemented ompt_event_MAY_ALWAYS_OPTIONAL
+#define ompt_callback_nest_lock_implemented ompt_event_MAY_ALWAYS_OPTIONAL
 
-#define ompt_event_init_lock_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_init_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_callback_lock_init_implemented ompt_event_MAY_ALWAYS_OPTIONAL
+#define ompt_callback_lock_destroy_implemented ompt_event_MAY_ALWAYS_OPTIONAL
 
-#define ompt_event_destroy_lock_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_destroy_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_callback_flush_implemented ompt_event_MAY_ALWAYS_OPTIONAL
 
-#define ompt_event_flush_implemented ompt_event_UNIMPLEMENTED
+#define ompt_callback_cancel_implemented ompt_event_MAY_ALWAYS_OPTIONAL
 
 #if OMP_40_ENABLED
-#define ompt_event_task_dependences_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_task_dependence_pair_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_callback_task_dependences_implemented                             \
+  ompt_event_MAY_ALWAYS_OPTIONAL
+#define ompt_callback_task_dependence_implemented ompt_event_MAY_ALWAYS_OPTIONAL
 #else
-#define ompt_event_task_dependences_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_task_dependence_pair_implemented ompt_event_UNIMPLEMENTED
+#define ompt_callback_task_dependences_implemented ompt_event_UNIMPLEMENTED
+#define ompt_callback_task_dependence_implemented ompt_event_UNIMPLEMENTED
 #endif /* OMP_40_ENABLED */
 
 #endif
index fa66d9c..a6f0647 100644 (file)
@@ -8,6 +8,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#if KMP_OS_UNIX
+#include <dlfcn.h>
+#endif
 
 /*****************************************************************************
  * ompt include files
 
 typedef struct {
   const char *state_name;
-  ompt_state_t state_id;
-} ompt_state_info_t;
+  omp_state_t state_id;
+} omp_state_info_t;
+
+typedef struct {
+  const char *name;
+  ompt_mutex_impl_t id;
+} ompt_mutex_impl_info_t;
 
 enum tool_setting_e {
   omp_tool_error,
@@ -46,25 +54,27 @@ enum tool_setting_e {
   omp_tool_enabled
 };
 
-typedef void (*ompt_initialize_t)(ompt_function_lookup_t ompt_fn_lookup,
-                                  const char *version,
-                                  unsigned int ompt_version);
-
 /*****************************************************************************
  * global variables
  ****************************************************************************/
 
-int ompt_enabled = 0;
+ompt_callbacks_active_t ompt_enabled;
+
+omp_state_info_t omp_state_info[] = {
+#define omp_state_macro(state, code) {#state, state},
+    FOREACH_OMP_STATE(omp_state_macro)
+#undef omp_state_macro
+};
 
-ompt_state_info_t ompt_state_info[] = {
-#define ompt_state_macro(state, code) {#state, state},
-    FOREACH_OMPT_STATE(ompt_state_macro)
-#undef ompt_state_macro
+ompt_mutex_impl_info_t ompt_mutex_impl_info[] = {
+#define ompt_mutex_impl_macro(name, id) {#name, name},
+    FOREACH_OMPT_MUTEX_IMPL(ompt_mutex_impl_macro)
+#undef ompt_mutex_impl_macro
 };
 
-ompt_callbacks_t ompt_callbacks;
+ompt_callbacks_internal_t ompt_callbacks;
 
-static ompt_initialize_t ompt_initialize_fn = NULL;
+static ompt_fns_t *ompt_fns = NULL;
 
 /*****************************************************************************
  * forward declarations
@@ -72,48 +82,71 @@ static ompt_initialize_t ompt_initialize_fn = NULL;
 
 static ompt_interface_fn_t ompt_fn_lookup(const char *s);
 
-OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void);
+OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void);
 
 /*****************************************************************************
  * initialization and finalization (private operations)
  ****************************************************************************/
 
 /* On Unix-like systems that support weak symbols the following implementation
- * of ompt_tool() will be used in case no tool-supplied implementation of
+ * of ompt_start_tool() will be used in case no tool-supplied implementation of
  * this function is present in the address space of a process.
  *
  * On Windows, the ompt_tool_windows function is used to find the
  * ompt_tool symbol across all modules loaded by a process. If ompt_tool is
  * found, ompt_tool's return value is used to initialize the tool. Otherwise,
  * NULL is returned and OMPT won't be enabled */
+
+typedef ompt_fns_t *(*ompt_start_tool_t)(unsigned int, const char *);
+
+#if KMP_OS_UNIX
+
 #if OMPT_HAVE_WEAK_ATTRIBUTE
+_OMP_EXTERN __attribute__((weak))
+#elif defined KMP_DYNAMIC_LIB
 _OMP_EXTERN
-__attribute__((weak)) ompt_initialize_t ompt_tool() {
+#warning Activation of OMPT is might fail for tools statically linked into the application.
+#else
+#error Activation of OMPT is not supported on this platform.
+#endif
+ompt_fns_t *
+ompt_start_tool(unsigned int omp_version, const char *runtime_version) {
+#ifdef KMP_DYNAMIC_LIB
+  ompt_fns_t *ret = NULL;
+  // Try next symbol in the address space
+  ompt_start_tool_t next_tool = NULL;
+  next_tool = (ompt_start_tool_t)dlsym(RTLD_NEXT, "ompt_start_tool");
+  if (next_tool)
+    ret = (next_tool)(omp_version, runtime_version);
+  return ret;
+#else
 #if OMPT_DEBUG
-  printf("ompt_tool() is called from the RTL\n");
+  printf("ompt_start_tool() is called from the RTL\n");
 #endif
   return NULL;
+#endif
 }
 
 #elif OMPT_HAVE_PSAPI
 
 #include <psapi.h>
 #pragma comment(lib, "psapi.lib")
-#define ompt_tool ompt_tool_windows
+#define ompt_start_tool ompt_tool_windows
 
 // The number of loaded modules to start enumeration with EnumProcessModules()
 #define NUM_MODULES 128
 
-static ompt_initialize_t ompt_tool_windows() {
+static ompt_fns_t *ompt_tool_windows(unsigned int omp_version,
+                                     const char *runtime_version) {
   int i;
   DWORD needed, new_size;
   HMODULE *modules;
   HANDLE process = GetCurrentProcess();
   modules = (HMODULE *)malloc(NUM_MODULES * sizeof(HMODULE));
-  ompt_initialize_t (*ompt_tool_p)() = NULL;
+  ompt_start_tool_t ompt_tool_p = NULL;
 
 #if OMPT_DEBUG
-  printf("ompt_tool_windows(): looking for ompt_tool\n");
+  printf("ompt_tool_windows(): looking for ompt_start_tool\n");
 #endif
   if (!EnumProcessModules(process, modules, NUM_MODULES * sizeof(HMODULE),
                           &needed)) {
@@ -135,21 +168,22 @@ static ompt_initialize_t ompt_tool_windows() {
     }
   }
   for (i = 0; i < new_size; ++i) {
-    (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_tool");
+    (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_start_tool");
     if (ompt_tool_p) {
 #if OMPT_DEBUG
       TCHAR modName[MAX_PATH];
       if (GetModuleFileName(modules[i], modName, MAX_PATH))
-        printf("ompt_tool_windows(): ompt_tool found in module %s\n", modName);
+        printf("ompt_tool_windows(): ompt_start_tool found in module %s\n",
+               modName);
 #endif
       free(modules);
-      return ompt_tool_p();
+      return (*ompt_tool_p)(omp_version, runtime_version);
     }
 #if OMPT_DEBUG
     else {
       TCHAR modName[MAX_PATH];
       if (GetModuleFileName(modules[i], modName, MAX_PATH))
-        printf("ompt_tool_windows(): ompt_tool not found in module %s\n",
+        printf("ompt_tool_windows(): ompt_start_tool not found in module %s\n",
                modName);
     }
 #endif
@@ -161,6 +195,49 @@ static ompt_initialize_t ompt_tool_windows() {
 #error Either __attribute__((weak)) or psapi.dll are required for OMPT support
 #endif // OMPT_HAVE_WEAK_ATTRIBUTE
 
+static ompt_fns_t *ompt_try_start_tool(unsigned int omp_version,
+                                       const char *runtime_version) {
+  ompt_fns_t *ret = NULL;
+  ompt_start_tool_t start_tool = NULL;
+#if KMP_OS_WINDOWS
+  // Cannot use colon to describe a list of absolute paths on Windows
+  const char *sep = ";";
+#else
+  const char *sep = ":";
+#endif
+
+  // Try in the current address space
+  if ((ret = ompt_start_tool(omp_version, runtime_version)))
+    return ret;
+
+  // Try tool-libraries-var ICV
+  const char *tool_libs = getenv("OMP_TOOL_LIBRARIES");
+  if (tool_libs) {
+    const char *libs = __kmp_str_format("%s", tool_libs);
+    char *buf;
+    char *fname = __kmp_str_token(CCAST(char *, libs), sep, &buf);
+    while (fname) {
+#if KMP_OS_UNIX
+      void *h = dlopen(fname, RTLD_LAZY);
+      if (h) {
+        start_tool = (ompt_start_tool_t)dlsym(h, "ompt_start_tool");
+#elif KMP_OS_WINDOWS
+      HMODULE h = LoadLibrary(fname);
+      if (h) {
+        start_tool = (ompt_start_tool_t)GetProcAddress(h, "ompt_start_tool");
+#else
+#error Activation of OMPT is not supported on this platform.
+#endif
+        if (start_tool && (ret = (*start_tool)(omp_version, runtime_version)))
+          break;
+      }
+      fname = __kmp_str_token(NULL, sep, &buf);
+    }
+    __kmp_str_free(&libs);
+  }
+  return ret;
+}
+
 void ompt_pre_init() {
   //--------------------------------------------------
   // Execute the pre-initialization logic only once.
@@ -194,10 +271,14 @@ void ompt_pre_init() {
 
   case omp_tool_unset:
   case omp_tool_enabled:
-    ompt_initialize_fn = ompt_tool();
-    if (ompt_initialize_fn) {
-      ompt_enabled = 1;
-    }
+
+    //--------------------------------------------------
+    // Load tool iff specified in environment variable
+    //--------------------------------------------------
+    ompt_fns =
+        ompt_try_start_tool(__kmp_openmp_version, ompt_get_runtime_version());
+
+    memset(&ompt_enabled, 0, sizeof(ompt_enabled));
     break;
 
   case omp_tool_error:
@@ -226,31 +307,34 @@ void ompt_post_init() {
   //--------------------------------------------------
   // Initialize the tool if so indicated.
   //--------------------------------------------------
-  if (ompt_enabled) {
-    ompt_initialize_fn(ompt_fn_lookup, ompt_get_runtime_version(),
-                       OMPT_VERSION);
+  if (ompt_fns) {
+    ompt_enabled.enabled = !!ompt_fns->initialize(ompt_fn_lookup, ompt_fns);
 
     ompt_thread_t *root_thread = ompt_get_thread();
 
-    ompt_set_thread_state(root_thread, ompt_state_overhead);
+    ompt_set_thread_state(root_thread, omp_state_overhead);
 
-    if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
-      ompt_callbacks.ompt_callback(ompt_event_thread_begin)(
-          ompt_thread_initial, ompt_get_thread_id());
+    if (ompt_enabled.ompt_callback_thread_begin) {
+      ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
+          ompt_thread_initial, __ompt_get_thread_data_internal());
+    }
+    ompt_data_t *task_data;
+    __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
+    if (ompt_enabled.ompt_callback_task_create) {
+      ompt_callbacks.ompt_callback(ompt_callback_task_create)(
+          NULL, NULL, task_data, ompt_task_initial, 0, NULL);
     }
 
-    ompt_set_thread_state(root_thread, ompt_state_work_serial);
+    ompt_set_thread_state(root_thread, omp_state_work_serial);
   }
 }
 
 void ompt_fini() {
-  if (ompt_enabled) {
-    if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) {
-      ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)();
-    }
+  if (ompt_enabled.enabled) {
+    ompt_fns->finalize(ompt_fns);
   }
 
-  ompt_enabled = 0;
+  memset(&ompt_enabled, 0, sizeof(ompt_enabled));
 }
 
 /*****************************************************************************
@@ -261,15 +345,15 @@ void ompt_fini() {
  * state
  ****************************************************************************/
 
-OMPT_API_ROUTINE int ompt_enumerate_state(int current_state, int *next_state,
-                                          const char **next_state_name) {
-  const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t);
+OMPT_API_ROUTINE int ompt_enumerate_states(int current_state, int *next_state,
+                                           const char **next_state_name) {
+  const static int len = sizeof(omp_state_info) / sizeof(omp_state_info_t);
   int i = 0;
 
   for (i = 0; i < len - 1; i++) {
-    if (ompt_state_info[i].state_id == current_state) {
-      *next_state = ompt_state_info[i + 1].state_id;
-      *next_state_name = ompt_state_info[i + 1].state_name;
+    if (omp_state_info[i].state_id == current_state) {
+      *next_state = omp_state_info[i + 1].state_id;
+      *next_state_name = omp_state_info[i + 1].state_name;
       return 1;
     }
   }
@@ -277,17 +361,35 @@ OMPT_API_ROUTINE int ompt_enumerate_state(int current_state, int *next_state,
   return 0;
 }
 
+OMPT_API_ROUTINE int ompt_enumerate_mutex_impls(int current_impl,
+                                                int *next_impl,
+                                                const char **next_impl_name) {
+  const static int len =
+      sizeof(ompt_mutex_impl_info) / sizeof(ompt_mutex_impl_info_t);
+  int i = 0;
+  for (i = 0; i < len - 1; i++) {
+    if (ompt_mutex_impl_info[i].id != current_impl)
+      continue;
+    *next_impl = ompt_mutex_impl_info[i + 1].id;
+    *next_impl_name = ompt_mutex_impl_info[i + 1].name;
+    return 1;
+  }
+  return 0;
+}
+
 /*****************************************************************************
  * callbacks
  ****************************************************************************/
 
-OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb) {
-  switch (evid) {
+OMPT_API_ROUTINE int ompt_set_callback(ompt_callbacks_t which,
+                                       ompt_callback_t callback) {
+  switch (which) {
 
 #define ompt_event_macro(event_name, callback_type, event_id)                  \
   case event_name:                                                             \
     if (ompt_event_implementation_status(event_name)) {                        \
-      ompt_callbacks.ompt_callback(event_name) = (callback_type)cb;            \
+      ompt_callbacks.ompt_callback(event_name) = (callback_type)callback;      \
+      ompt_enabled.event_name = 1;                                             \
     }                                                                          \
     return ompt_event_implementation_status(event_name);
 
@@ -296,12 +398,13 @@ OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb) {
 #undef ompt_event_macro
 
   default:
-    return ompt_set_result_registration_error;
+    return ompt_set_error;
   }
 }
 
-OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) {
-  switch (evid) {
+OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which,
+                                       ompt_callback_t *callback) {
+  switch (which) {
 
 #define ompt_event_macro(event_name, callback_type, event_id)                  \
   case event_name:                                                             \
@@ -309,7 +412,7 @@ OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) {
       ompt_callback_t mycb =                                                   \
           (ompt_callback_t)ompt_callbacks.ompt_callback(event_name);           \
       if (mycb) {                                                              \
-        *cb = mycb;                                                            \
+        *callback = mycb;                                                      \
         return ompt_get_callback_success;                                      \
       }                                                                        \
     }                                                                          \
@@ -328,54 +431,149 @@ OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) {
  * parallel regions
  ****************************************************************************/
 
-OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level) {
-  return __ompt_get_parallel_id_internal(ancestor_level);
-}
-
-OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level) {
-  return __ompt_get_parallel_team_size_internal(ancestor_level);
-}
-
-OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level) {
-  return __ompt_get_parallel_function_internal(ancestor_level);
+OMPT_API_ROUTINE int ompt_get_parallel_info(int ancestor_level,
+                                            ompt_data_t **parallel_data,
+                                            int *team_size) {
+  return __ompt_get_parallel_info_internal(ancestor_level, parallel_data,
+                                           team_size);
 }
 
-OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id) {
-  ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id);
+OMPT_API_ROUTINE omp_state_t ompt_get_state(ompt_wait_id_t *wait_id) {
+  omp_state_t thread_state = __ompt_get_state_internal(wait_id);
 
-  if (thread_state == ompt_state_undefined) {
-    thread_state = ompt_state_work_serial;
+  if (thread_state == omp_state_undefined) {
+    thread_state = omp_state_work_serial;
   }
 
   return thread_state;
 }
 
 /*****************************************************************************
- * threads
+ * tasks
  ****************************************************************************/
 
-OMPT_API_ROUTINE void *ompt_get_idle_frame() {
-  return __ompt_get_idle_frame_internal();
+OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void) {
+  return __ompt_get_thread_data_internal();
+}
+
+OMPT_API_ROUTINE int ompt_get_task_info(int ancestor_level, int *type,
+                                        ompt_data_t **task_data,
+                                        ompt_frame_t **task_frame,
+                                        ompt_data_t **parallel_data,
+                                        int *thread_num) {
+  return __ompt_get_task_info_internal(ancestor_level, type, task_data,
+                                       task_frame, parallel_data, thread_num);
 }
 
 /*****************************************************************************
- * tasks
+ * places
  ****************************************************************************/
 
-OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void) {
-  return __ompt_get_thread_id_internal();
+OMPT_API_ROUTINE int ompt_get_num_places(void) {
+// copied from kmp_ftn_entry.h (but modified)
+#if !KMP_AFFINITY_SUPPORTED
+  return 0;
+#else
+  if (!KMP_AFFINITY_CAPABLE())
+    return 0;
+  return __kmp_affinity_num_masks;
+#endif
+}
+
+OMPT_API_ROUTINE int ompt_get_place_proc_ids(int place_num, int ids_size,
+                                             int *ids) {
+// copied from kmp_ftn_entry.h (but modified)
+#if !KMP_AFFINITY_SUPPORTED
+  return 0;
+#else
+  int i, count;
+  int tmp_ids[ids_size];
+  if (!KMP_AFFINITY_CAPABLE())
+    return 0;
+  if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
+    return 0;
+  /* TODO: Is this safe for asynchronous call from signal handler during runtime
+   * shutdown? */
+  kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num);
+  count = 0;
+  KMP_CPU_SET_ITERATE(i, mask) {
+    if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) ||
+        (!KMP_CPU_ISSET(i, mask))) {
+      continue;
+    }
+    if (count < ids_size)
+      tmp_ids[count] = i;
+    count++;
+  }
+  if (ids_size >= count) {
+    for (i = 0; i < count; i++) {
+      ids[i] = tmp_ids[i];
+    }
+  }
+  return count;
+#endif
 }
 
-OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth) {
-  return __ompt_get_task_id_internal(depth);
+OMPT_API_ROUTINE int ompt_get_place_num(void) {
+// copied from kmp_ftn_entry.h (but modified)
+#if !KMP_AFFINITY_SUPPORTED
+  return -1;
+#else
+  int gtid;
+  kmp_info_t *thread;
+  if (!KMP_AFFINITY_CAPABLE())
+    return -1;
+  gtid = __kmp_entry_gtid();
+  thread = __kmp_thread_from_gtid(gtid);
+  if (thread == NULL || thread->th.th_current_place < 0)
+    return -1;
+  return thread->th.th_current_place;
+#endif
 }
 
-OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth) {
-  return __ompt_get_task_frame_internal(depth);
+OMPT_API_ROUTINE int ompt_get_partition_place_nums(int place_nums_size,
+                                                   int *place_nums) {
+// copied from kmp_ftn_entry.h (but modified)
+#if !KMP_AFFINITY_SUPPORTED
+  return 0;
+#else
+  int i, gtid, place_num, first_place, last_place, start, end;
+  kmp_info_t *thread;
+  if (!KMP_AFFINITY_CAPABLE())
+    return 0;
+  gtid = __kmp_entry_gtid();
+  thread = __kmp_thread_from_gtid(gtid);
+  if (thread == NULL)
+    return 0;
+  first_place = thread->th.th_first_place;
+  last_place = thread->th.th_last_place;
+  if (first_place < 0 || last_place < 0)
+    return 0;
+  if (first_place <= last_place) {
+    start = first_place;
+    end = last_place;
+  } else {
+    start = last_place;
+    end = first_place;
+  }
+  if (end - start <= place_nums_size)
+    for (i = 0, place_num = start; place_num <= end; ++place_num, ++i) {
+      place_nums[i] = place_num;
+    }
+  return end - start;
+#endif
 }
 
-OMPT_API_ROUTINE void *ompt_get_task_function(int depth) {
-  return __ompt_get_task_function_internal(depth);
+/*****************************************************************************
+ * places
+ ****************************************************************************/
+
+OMPT_API_ROUTINE int ompt_get_proc_id(void) {
+#if KMP_OS_LINUX
+  return sched_getcpu();
+#else
+  return -1;
+#endif
 }
 
 /*****************************************************************************
@@ -435,28 +633,59 @@ OMPT_API_PLACEHOLDER void ompt_mutex_wait(void) {
 OMPT_API_ROUTINE int ompt_get_ompt_version() { return OMPT_VERSION; }
 
 /*****************************************************************************
- * application-facing API
+* application-facing API
  ****************************************************************************/
 
 /*----------------------------------------------------------------------------
  | control
  ---------------------------------------------------------------------------*/
 
-_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier) {
-  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_control)) {
-    ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier);
+int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg) {
+
+  if (ompt_enabled.enabled) {
+    if (ompt_enabled.ompt_callback_control_tool) {
+      return ompt_callbacks.ompt_callback(ompt_callback_control_tool)(
+          command, modifier, arg, OMPT_LOAD_RETURN_ADDRESS(__kmp_entry_gtid()));
+    } else {
+      return -1;
+    }
+  } else {
+    return -2;
   }
 }
 
 /*****************************************************************************
+ * misc
+ ****************************************************************************/
+
+OMPT_API_ROUTINE uint64_t ompt_get_unique_id(void) {
+  return __ompt_get_unique_id_internal();
+}
+
+/*****************************************************************************
+ * Target
+ ****************************************************************************/
+
+OMPT_API_ROUTINE int ompt_get_target_info(uint64_t *device_num,
+                                          ompt_id_t *target_id,
+                                          ompt_id_t *host_op_id) {
+  return 0; // thread is not in a target region
+}
+
+OMPT_API_ROUTINE int ompt_get_num_devices(void) {
+  return 1; // only one device (the current device) is available
+}
+
+/*****************************************************************************
  * API inquiry for tool
  ****************************************************************************/
 
 static ompt_interface_fn_t ompt_fn_lookup(const char *s) {
 
 #define ompt_interface_fn(fn)                                                  \
+  fn##_t fn##_f = fn;                                                          \
   if (strcmp(s, #fn) == 0)                                                     \
-    return (ompt_interface_fn_t)fn;
+    return (ompt_interface_fn_t)fn##_f;
 
   FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn)
 
index 44929ef..6139e00 100644 (file)
 
 #define ompt_callback(e) e##_callback
 
-typedef struct ompt_callbacks_s {
+typedef struct ompt_callbacks_internal_s {
 #define ompt_event_macro(event, callback, eventid)                             \
   callback ompt_callback(event);
 
   FOREACH_OMPT_EVENT(ompt_event_macro)
 
 #undef ompt_event_macro
-} ompt_callbacks_t;
+} ompt_callbacks_internal_t;
+
+typedef struct ompt_callbacks_active_s {
+  unsigned int enabled : 1;
+#define ompt_event_macro(event, callback, eventid) unsigned int event : 1;
+
+  FOREACH_OMPT_EVENT(ompt_event_macro)
+
+#undef ompt_event_macro
+} ompt_callbacks_active_t;
+
+#define TASK_TYPE_DETAILS_FORMAT(info)                                         \
+  ((info->td_flags.task_serial || info->td_flags.tasking_ser)                  \
+       ? ompt_task_undeferred                                                  \
+       : 0x0) |                                                                \
+      ((!(info->td_flags.tiedness)) ? ompt_task_untied : 0x0) |                \
+      (info->td_flags.final ? ompt_task_final : 0x0) |                         \
+      (info->td_flags.merged_if0 ? ompt_task_mergeable : 0x0)
 
 typedef struct {
   ompt_frame_t frame;
-  void *function;
-  ompt_task_id_t task_id;
+  ompt_data_t task_data;
+  struct kmp_taskdata *scheduling_parent;
 #if OMP_40_ENABLED
   int ndeps;
   ompt_task_dependence_t *deps;
@@ -33,32 +50,31 @@ typedef struct {
 } ompt_task_info_t;
 
 typedef struct {
-  ompt_parallel_id_t parallel_id;
-  void *microtask;
+  ompt_data_t parallel_data;
+  void *master_return_address;
 } ompt_team_info_t;
 
 typedef struct ompt_lw_taskteam_s {
   ompt_team_info_t ompt_team_info;
   ompt_task_info_t ompt_task_info;
+  int heap;
   struct ompt_lw_taskteam_s *parent;
 } ompt_lw_taskteam_t;
 
-typedef struct ompt_parallel_info_s {
-  ompt_task_id_t parent_task_id; /* id of parent task            */
-  ompt_parallel_id_t parallel_id; /* id of parallel region        */
-  ompt_frame_t *parent_task_frame; /* frame data of parent task    */
-  void *parallel_function; /* pointer to outlined function */
-} ompt_parallel_info_t;
-
 typedef struct {
-  ompt_state_t state;
+  ompt_data_t thread_data;
+  ompt_data_t task_data; /* stored here from implicit barrier-begin until
+                            implicit-task-end */
+  void *return_address; /* stored here on entry of runtime */
+  omp_state_t state;
   ompt_wait_id_t wait_id;
+  int ompt_task_yielded;
   void *idle_frame;
 } ompt_thread_info_t;
 
-extern ompt_callbacks_t ompt_callbacks;
+extern ompt_callbacks_internal_t ompt_callbacks;
 
-#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
+#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_OPTIONAL
 #if USE_FAST_MEMORY
 #define KMP_OMPT_DEPS_ALLOC __kmp_fast_allocate
 #define KMP_OMPT_DEPS_FREE __kmp_fast_free
@@ -66,7 +82,7 @@ extern ompt_callbacks_t ompt_callbacks;
 #define KMP_OMPT_DEPS_ALLOC __kmp_thread_malloc
 #define KMP_OMPT_DEPS_FREE __kmp_thread_free
 #endif
-#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
+#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_OPTIONAL */
 
 #ifdef __cplusplus
 extern "C" {
@@ -76,7 +92,20 @@ void ompt_pre_init(void);
 void ompt_post_init(void);
 void ompt_fini(void);
 
-extern int ompt_enabled;
+#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level)
+#define OMPT_GET_FRAME_ADDRESS(level) __builtin_frame_address(level)
+
+int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg);
+
+extern ompt_callbacks_active_t ompt_enabled;
+
+#if KMP_OS_WINDOWS
+#define UNLIKELY(x) (x)
+#define OMPT_NOINLINE __declspec(noinline)
+#else
+#define UNLIKELY(x) __builtin_expect(!!(x), 0)
+#define OMPT_NOINLINE __attribute__((noinline))
+#endif
 
 #ifdef __cplusplus
 };
index 9e0d1ee..bdf6bd1 100644 (file)
@@ -3,42 +3,33 @@
 //******************************************************************************
 
 #include "kmp.h"
-#include "ompt-internal.h"
 #include "ompt-specific.h"
 
+#if KMP_OS_UNIX
+#include <dlfcn.h>
+#endif
+
+#if KMP_OS_WINDOWS
+#define THREAD_LOCAL __declspec(thread)
+#else
+#define THREAD_LOCAL __thread
+#endif
+
 //******************************************************************************
 // macros
 //******************************************************************************
 
-#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t)(id >= 0) ? id + 1 : 0)
-
-#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info;
+#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info
 
 #define OMPT_THREAD_ID_BITS 16
 
-// 2013 08 24 - John Mellor-Crummey
-//   ideally, a thread should assign its own ids based on thread private data.
-//   however, the way the intel runtime reinitializes thread data structures
-//   when it creates teams makes it difficult to maintain persistent thread
-//   data. using a shared variable instead is simple. I leave it to intel to
-//   sort out how to implement a higher performance version in their runtime.
-
-// when using fetch_and_add to generate the IDs, there isn't any reason to waste
-// bits for thread id.
-#if 0
-#define NEXT_ID(id_ptr, tid)                                                   \
-  ((KMP_TEST_THEN_INC64(id_ptr) << OMPT_THREAD_ID_BITS) | (tid))
-#else
-#define NEXT_ID(id_ptr, tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr))
-#endif
-
 //******************************************************************************
 // private operations
 //******************************************************************************
 
 //----------------------------------------------------------
 // traverse the team and task hierarchy
-// note: __ompt_get_teaminfo and __ompt_get_taskinfo
+// note: __ompt_get_teaminfo and __ompt_get_task_info_object
 //       traverse the hierarchy similarly and need to be
 //       kept consistent
 //----------------------------------------------------------
@@ -51,7 +42,7 @@ ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size) {
     if (team == NULL)
       return NULL;
 
-    ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team);
+    ompt_lw_taskteam_t *next_lwt = LWT_FROM_TEAM(team), *lwt = NULL;
 
     while (depth > 0) {
       // next lightweight team (if any)
@@ -61,9 +52,14 @@ ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size) {
       // next heavyweight team (if any) after
       // lightweight teams are exhausted
       if (!lwt && team) {
-        team = team->t.t_parent;
-        if (team) {
-          lwt = LWT_FROM_TEAM(team);
+        if (next_lwt) {
+          lwt = next_lwt;
+          next_lwt = NULL;
+        } else {
+          team = team->t.t_parent;
+          if (team) {
+            next_lwt = LWT_FROM_TEAM(team);
+          }
         }
       }
 
@@ -90,13 +86,14 @@ ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size) {
   return NULL;
 }
 
-ompt_task_info_t *__ompt_get_taskinfo(int depth) {
+ompt_task_info_t *__ompt_get_task_info_object(int depth) {
   ompt_task_info_t *info = NULL;
   kmp_info_t *thr = ompt_get_thread();
 
   if (thr) {
     kmp_taskdata_t *taskdata = thr->th.th_current_task;
-    ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team);
+    ompt_lw_taskteam_t *lwt = NULL,
+                       *next_lwt = LWT_FROM_TEAM(taskdata->td_team);
 
     while (depth > 0) {
       // next lightweight team (if any)
@@ -106,9 +103,59 @@ ompt_task_info_t *__ompt_get_taskinfo(int depth) {
       // next heavyweight team (if any) after
       // lightweight teams are exhausted
       if (!lwt && taskdata) {
-        taskdata = taskdata->td_parent;
-        if (taskdata) {
-          lwt = LWT_FROM_TEAM(taskdata->td_team);
+        if (next_lwt) {
+          lwt = next_lwt;
+          next_lwt = NULL;
+        } else {
+          taskdata = taskdata->td_parent;
+          if (taskdata) {
+            next_lwt = LWT_FROM_TEAM(taskdata->td_team);
+          }
+        }
+      }
+      depth--;
+    }
+
+    if (lwt) {
+      info = &lwt->ompt_task_info;
+    } else if (taskdata) {
+      info = &taskdata->ompt_task_info;
+    }
+  }
+
+  return info;
+}
+
+ompt_task_info_t *__ompt_get_scheduling_taskinfo(int depth) {
+  ompt_task_info_t *info = NULL;
+  kmp_info_t *thr = ompt_get_thread();
+
+  if (thr) {
+    kmp_taskdata_t *taskdata = thr->th.th_current_task;
+
+    ompt_lw_taskteam_t *lwt = NULL,
+                       *next_lwt = LWT_FROM_TEAM(taskdata->td_team);
+
+    while (depth > 0) {
+      // next lightweight team (if any)
+      if (lwt)
+        lwt = lwt->parent;
+
+      // next heavyweight team (if any) after
+      // lightweight teams are exhausted
+      if (!lwt && taskdata) {
+        // first try scheduling parent (for explicit task scheduling)
+        if (taskdata->ompt_task_info.scheduling_parent) {
+          taskdata = taskdata->ompt_task_info.scheduling_parent;
+        } else if (next_lwt) {
+          lwt = next_lwt;
+          next_lwt = NULL;
+        } else {
+          // then go for implicit tasks
+          taskdata = taskdata->td_parent;
+          if (taskdata) {
+            next_lwt = LWT_FROM_TEAM(taskdata->td_team);
+          }
         }
       }
       depth--;
@@ -132,29 +179,14 @@ ompt_task_info_t *__ompt_get_taskinfo(int depth) {
 // thread support
 //----------------------------------------------------------
 
-ompt_parallel_id_t __ompt_thread_id_new() {
-  static uint64_t ompt_thread_id = 1;
-  return NEXT_ID(&ompt_thread_id, 0);
-}
-
-void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid) {
-  ompt_callbacks.ompt_callback(ompt_event_thread_begin)(
-      thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
-}
-
-void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid) {
-  ompt_callbacks.ompt_callback(ompt_event_thread_end)(
-      thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
-}
-
-ompt_thread_id_t __ompt_get_thread_id_internal() {
-  // FIXME: until we have a better way of assigning ids, use __kmp_get_gtid
-  // since the return value might be negative, we need to test that before
-  // assigning it to an ompt_thread_id_t, which is unsigned.
-  int id = __kmp_get_gtid();
-  assert(id >= 0);
-
-  return GTID_TO_OMPT_THREAD_ID(id);
+ompt_data_t *__ompt_get_thread_data_internal() {
+  if (__kmp_get_gtid() >= 0) {
+    kmp_info_t *thread = ompt_get_thread();
+    if (thread == NULL)
+      return NULL;
+    return &(thread->th.ompt_thread_info.thread_data);
+  }
+  return NULL;
 }
 
 //----------------------------------------------------------
@@ -162,13 +194,12 @@ ompt_thread_id_t __ompt_get_thread_id_internal() {
 //----------------------------------------------------------
 
 void __ompt_thread_assign_wait_id(void *variable) {
-  int gtid = __kmp_gtid_get_specific();
-  kmp_info_t *ti = ompt_get_thread_gtid(gtid);
+  kmp_info_t *ti = ompt_get_thread();
 
   ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)variable;
 }
 
-ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) {
+omp_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) {
   kmp_info_t *ti = ompt_get_thread();
 
   if (ti) {
@@ -176,46 +207,26 @@ ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) {
       *ompt_wait_id = ti->th.ompt_thread_info.wait_id;
     return ti->th.ompt_thread_info.state;
   }
-  return ompt_state_undefined;
-}
-
-//----------------------------------------------------------
-// idle frame support
-//----------------------------------------------------------
-
-void *__ompt_get_idle_frame_internal(void) {
-  kmp_info_t *ti = ompt_get_thread();
-  return ti ? ti->th.ompt_thread_info.idle_frame : NULL;
+  return omp_state_undefined;
 }
 
 //----------------------------------------------------------
 // parallel region support
 //----------------------------------------------------------
 
-ompt_parallel_id_t __ompt_parallel_id_new(int gtid) {
-  static uint64_t ompt_parallel_id = 1;
-  return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0;
-}
-
-void *__ompt_get_parallel_function_internal(int depth) {
-  ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
-  void *function = info ? info->microtask : NULL;
-  return function;
-}
-
-ompt_parallel_id_t __ompt_get_parallel_id_internal(int depth) {
-  ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
-  ompt_parallel_id_t id = info ? info->parallel_id : 0;
-  return id;
-}
-
-int __ompt_get_parallel_team_size_internal(int depth) {
-  // initialize the return value with the error value.
-  // if there is a team at the specified depth, the default
-  // value will be overwritten the size of that team.
-  int size = -1;
-  (void)__ompt_get_teaminfo(depth, &size);
-  return size;
+int __ompt_get_parallel_info_internal(int ancestor_level,
+                                      ompt_data_t **parallel_data,
+                                      int *team_size) {
+  ompt_team_info_t *info;
+  if (team_size) {
+    info = __ompt_get_teaminfo(ancestor_level, team_size);
+  } else {
+    info = __ompt_get_teaminfo(ancestor_level, NULL);
+  }
+  if (parallel_data) {
+    *parallel_data = info ? &(info->parallel_data) : NULL;
+  }
+  return info ? 2 : 0;
 }
 
 //----------------------------------------------------------
@@ -223,60 +234,182 @@ int __ompt_get_parallel_team_size_internal(int depth) {
 //----------------------------------------------------------
 
 void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid,
-                             void *microtask, ompt_parallel_id_t ompt_pid) {
-  lwt->ompt_team_info.parallel_id = ompt_pid;
-  lwt->ompt_team_info.microtask = microtask;
-  lwt->ompt_task_info.task_id = 0;
+                             ompt_data_t *ompt_pid, void *codeptr) {
+  // initialize parallel_data with input, return address to parallel_data on
+  // exit
+  lwt->ompt_team_info.parallel_data = *ompt_pid;
+  lwt->ompt_team_info.master_return_address = codeptr;
+  lwt->ompt_task_info.task_data.value = 0;
   lwt->ompt_task_info.frame.reenter_runtime_frame = NULL;
   lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
-  lwt->ompt_task_info.function = NULL;
+  lwt->ompt_task_info.scheduling_parent = NULL;
+  lwt->ompt_task_info.deps = NULL;
+  lwt->ompt_task_info.ndeps = 0;
+  lwt->heap = 0;
   lwt->parent = 0;
 }
 
-void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr) {
-  ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info;
-  lwt->parent = my_parent;
-  thr->th.th_team->t.ompt_serialized_team_info = lwt;
+void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
+                             int on_heap) {
+  ompt_lw_taskteam_t *link_lwt = lwt;
+  if (thr->th.th_team->t.t_serialized >
+      1) { // we already have a team, so link the new team and swap values
+    if (on_heap) { // the lw_taskteam cannot stay on stack, allocate it on heap
+      link_lwt =
+          (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));
+    }
+    link_lwt->heap = on_heap;
+
+    // would be swap in the (on_stack) case.
+    ompt_team_info_t tmp_team = lwt->ompt_team_info;
+    link_lwt->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr);
+    *OMPT_CUR_TEAM_INFO(thr) = tmp_team;
+
+    ompt_task_info_t tmp_task = lwt->ompt_task_info;
+    link_lwt->ompt_task_info = *OMPT_CUR_TASK_INFO(thr);
+    *OMPT_CUR_TASK_INFO(thr) = tmp_task;
+
+    // link the taskteam into the list of taskteams:
+    ompt_lw_taskteam_t *my_parent =
+        thr->th.th_team->t.ompt_serialized_team_info;
+    link_lwt->parent = my_parent;
+    thr->th.th_team->t.ompt_serialized_team_info = link_lwt;
+  } else {
+    // this is the first serialized team, so we just store the values in the
+    // team and drop the taskteam-object
+    *OMPT_CUR_TEAM_INFO(thr) = lwt->ompt_team_info;
+    *OMPT_CUR_TASK_INFO(thr) = lwt->ompt_task_info;
+  }
 }
 
-ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(kmp_info_t *thr) {
+void __ompt_lw_taskteam_unlink(kmp_info_t *thr) {
   ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info;
-  if (lwtask)
+  if (lwtask) {
     thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent;
-  return lwtask;
+
+    ompt_team_info_t tmp_team = lwtask->ompt_team_info;
+    lwtask->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr);
+    *OMPT_CUR_TEAM_INFO(thr) = tmp_team;
+
+    ompt_task_info_t tmp_task = lwtask->ompt_task_info;
+    lwtask->ompt_task_info = *OMPT_CUR_TASK_INFO(thr);
+    *OMPT_CUR_TASK_INFO(thr) = tmp_task;
+
+    if (lwtask->heap) {
+      __kmp_free(lwtask);
+      lwtask = NULL;
+    }
+  }
+  //    return lwtask;
 }
 
 //----------------------------------------------------------
 // task support
 //----------------------------------------------------------
 
-ompt_task_id_t __ompt_task_id_new(int gtid) {
-  static uint64_t ompt_task_id = 1;
-  return NEXT_ID(&ompt_task_id, gtid);
-}
+int __ompt_get_task_info_internal(int ancestor_level, int *type,
+                                  ompt_data_t **task_data,
+                                  ompt_frame_t **task_frame,
+                                  ompt_data_t **parallel_data,
+                                  int *thread_num) {
+  if (ancestor_level < 0)
+    return 0;
 
-ompt_task_id_t __ompt_get_task_id_internal(int depth) {
-  ompt_task_info_t *info = __ompt_get_taskinfo(depth);
-  ompt_task_id_t task_id = info ? info->task_id : 0;
-  return task_id;
-}
+  // copied from __ompt_get_scheduling_taskinfo
+  ompt_task_info_t *info = NULL;
+  ompt_team_info_t *team_info = NULL;
+  kmp_info_t *thr = ompt_get_thread();
 
-void *__ompt_get_task_function_internal(int depth) {
-  ompt_task_info_t *info = __ompt_get_taskinfo(depth);
-  void *function = info ? info->function : NULL;
-  return function;
-}
+  if (thr) {
+    kmp_taskdata_t *taskdata = thr->th.th_current_task;
+    if (taskdata == NULL)
+      return 0;
+    kmp_team *team = thr->th.th_team;
+    if (team == NULL)
+      return 0;
+    ompt_lw_taskteam_t *lwt = NULL,
+                       *next_lwt = LWT_FROM_TEAM(taskdata->td_team);
+
+    while (ancestor_level > 0) {
+      // next lightweight team (if any)
+      if (lwt)
+        lwt = lwt->parent;
+
+      // next heavyweight team (if any) after
+      // lightweight teams are exhausted
+      if (!lwt && taskdata) {
+        // first try scheduling parent (for explicit task scheduling)
+        if (taskdata->ompt_task_info.scheduling_parent) {
+          taskdata = taskdata->ompt_task_info.scheduling_parent;
+        } else if (next_lwt) {
+          lwt = next_lwt;
+          next_lwt = NULL;
+        } else {
+          // then go for implicit tasks
+          taskdata = taskdata->td_parent;
+          if (team == NULL)
+            return 0;
+          team = team->t.t_parent;
+          if (taskdata) {
+            next_lwt = LWT_FROM_TEAM(taskdata->td_team);
+          }
+        }
+      }
+      ancestor_level--;
+    }
 
-ompt_frame_t *__ompt_get_task_frame_internal(int depth) {
-  ompt_task_info_t *info = __ompt_get_taskinfo(depth);
-  ompt_frame_t *frame = info ? frame = &info->frame : NULL;
-  return frame;
+    if (lwt) {
+      info = &lwt->ompt_task_info;
+      team_info = &lwt->ompt_team_info;
+      if (type) {
+        *type = ompt_task_implicit;
+      }
+    } else if (taskdata) {
+      info = &taskdata->ompt_task_info;
+      team_info = &team->t.ompt_team_info;
+      if (type) {
+        if (taskdata->td_parent) {
+          *type = (taskdata->td_flags.tasktype ? ompt_task_explicit
+                                               : ompt_task_implicit) |
+                  TASK_TYPE_DETAILS_FORMAT(taskdata);
+        } else {
+          *type = ompt_task_initial;
+        }
+      }
+    }
+    if (task_data) {
+      *task_data = info ? &info->task_data : NULL;
+    }
+    if (task_frame) {
+      // OpenMP spec asks for the scheduling task to be returned.
+      *task_frame = info ? &info->frame : NULL;
+    }
+    if (parallel_data) {
+      *parallel_data = team_info ? &(team_info->parallel_data) : NULL;
+    }
+    return info ? 2 : 0;
+  }
+  return 0;
 }
 
 //----------------------------------------------------------
 // team support
 //----------------------------------------------------------
 
-void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid) {
-  team->t.ompt_team_info.parallel_id = ompt_pid;
+void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid) {
+  team->t.ompt_team_info.parallel_data = ompt_pid;
+}
+
+//----------------------------------------------------------
+// misc
+//----------------------------------------------------------
+
+static uint64_t __ompt_get_unique_id_internal() {
+  static uint64_t thread = 1;
+  static THREAD_LOCAL uint64_t ID = 0;
+  if (ID == 0) {
+    uint64_t new_thread = KMP_TEST_THEN_INC64((kmp_int64 *)&thread);
+    ID = new_thread << (sizeof(uint64_t) * 8 - OMPT_THREAD_ID_BITS);
+  }
+  return ++ID;
 }
index b4f09a4..07223da 100644 (file)
@@ -13,42 +13,63 @@ typedef kmp_info_t ompt_thread_t;
  * forward declarations
  ****************************************************************************/
 
-void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid);
+void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid);
 void __ompt_thread_assign_wait_id(void *variable);
 
 void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr,
-                             int gtid, void *microtask,
-                             ompt_parallel_id_t ompt_pid);
+                             int gtid, ompt_data_t *ompt_pid, void *codeptr);
 
-void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr);
+void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr,
+                             int on_heap);
 
-ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(ompt_thread_t *thr);
-
-ompt_parallel_id_t __ompt_parallel_id_new(int gtid);
-ompt_task_id_t __ompt_task_id_new(int gtid);
+void __ompt_lw_taskteam_unlink(ompt_thread_t *thr);
 
 ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size);
 
-ompt_task_info_t *__ompt_get_taskinfo(int depth);
-
-void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid);
+ompt_task_info_t *__ompt_get_task_info_object(int depth);
 
-void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid);
+int __ompt_get_parallel_info_internal(int ancestor_level,
+                                      ompt_data_t **parallel_data,
+                                      int *team_size);
 
-int __ompt_get_parallel_team_size_internal(int ancestor_level);
+int __ompt_get_task_info_internal(int ancestor_level, int *type,
+                                  ompt_data_t **task_data,
+                                  ompt_frame_t **task_frame,
+                                  ompt_data_t **parallel_data, int *thread_num);
 
-ompt_task_id_t __ompt_get_task_id_internal(int depth);
+ompt_data_t *__ompt_get_thread_data_internal();
 
-ompt_frame_t *__ompt_get_task_frame_internal(int depth);
+static uint64_t __ompt_get_get_unique_id_internal();
 
 /*****************************************************************************
  * macros
  ****************************************************************************/
 
+#define OMPT_CUR_TASK_INFO(thr) (&(thr->th.th_current_task->ompt_task_info))
+#define OMPT_CUR_TASK_DATA(thr)                                                \
+  (&(thr->th.th_current_task->ompt_task_info.task_data))
+#define OMPT_CUR_TEAM_INFO(thr) (&(thr->th.th_team->t.ompt_team_info))
+#define OMPT_CUR_TEAM_DATA(thr)                                                \
+  (&(thr->th.th_team->t.ompt_team_info.parallel_data))
+
 #define OMPT_HAVE_WEAK_ATTRIBUTE KMP_HAVE_WEAK_ATTRIBUTE
 #define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI
 #define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle)
 
+inline void *__ompt_load_return_address(int gtid) {
+  kmp_info_t *thr = __kmp_threads[gtid];
+  void *return_address = thr->th.ompt_thread_info.return_address;
+  thr->th.ompt_thread_info.return_address = NULL;
+  return return_address;
+}
+
+#define OMPT_STORE_RETURN_ADDRESS(gtid)                                        \
+  if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] &&              \
+      !__kmp_threads[gtid]->th.ompt_thread_info.return_address)                \
+  __kmp_threads[gtid]->th.ompt_thread_info.return_address =                    \
+      __builtin_return_address(0)
+#define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid)
+
 //******************************************************************************
 // inline functions
 //******************************************************************************
@@ -62,7 +83,7 @@ inline ompt_thread_t *ompt_get_thread() {
   return ompt_get_thread_gtid(gtid);
 }
 
-inline void ompt_set_thread_state(ompt_thread_t *thread, ompt_state_t state) {
+inline void ompt_set_thread_state(ompt_thread_t *thread, omp_state_t state) {
   thread->th.ompt_thread_info.state = state;
 }
 
index 23ea47d..42ff0ea 100644 (file)
@@ -2280,7 +2280,7 @@ int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
 #endif
                            ) {
 #if OMPT_SUPPORT
-  *exit_frame_ptr = __builtin_frame_address(0);
+  *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
 #endif
 
   switch (argc) {
index a22a7d9..9af5e17 100644 (file)
@@ -34,8 +34,7 @@ endmacro()
 
 pythonize_bool(LIBOMP_USE_HWLOC)
 pythonize_bool(LIBOMP_OMPT_SUPPORT)
-pythonize_bool(LIBOMP_OMPT_BLAME)
-pythonize_bool(LIBOMP_OMPT_TRACE)
+pythonize_bool(LIBOMP_OMPT_OPTIONAL)
 pythonize_bool(LIBOMP_HAVE_LIBM)
 pythonize_bool(LIBOMP_HAVE_LIBATOMIC)
 
index 22ad95f..655e8d6 100644 (file)
@@ -92,15 +92,15 @@ if config.has_ompt:
     # for callback.h
     config.test_cflags += " -I " + config.test_source_root + "/ompt"
 
+if 'Linux' in config.operating_system:
+    config.available_features.add("linux")
+
 # to run with icc INTEL_LICENSE_FILE must be set
 if 'INTEL_LICENSE_FILE' in os.environ:
     config.environment['INTEL_LICENSE_FILE'] = os.environ['INTEL_LICENSE_FILE']
 
-# substitutions
-if config.has_ompt:
-    config.substitutions.append(("FileCheck", config.test_filecheck))
-    config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable"))
 
+# substitutions
 config.substitutions.append(("%libomp-compile-and-run", \
     "%libomp-compile && %libomp-run"))
 config.substitutions.append(("%libomp-cxx-compile-and-run", \
@@ -109,9 +109,14 @@ config.substitutions.append(("%libomp-cxx-compile", \
     "%clangXX %cflags -std=c++11 %s -o %t" + libs))
 config.substitutions.append(("%libomp-compile", \
     "%clang %cflags %s -o %t" + libs))
+config.substitutions.append(("%libomp-tool", \
+    "%clang %cflags -shared -fPIC -o %T/tool.so" + libs))
 config.substitutions.append(("%libomp-run", "%t"))
 config.substitutions.append(("%clangXX", config.test_cxx_compiler))
 config.substitutions.append(("%clang", config.test_compiler))
 config.substitutions.append(("%openmp_flag", config.test_openmp_flag))
 config.substitutions.append(("%cflags", config.test_cflags))
 
+if config.has_ompt:
+    config.substitutions.append(("FileCheck", config.test_filecheck))
+    config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable"))
index 4ef828c..135e01a 100644 (file)
@@ -11,7 +11,7 @@ config.omp_header_directory = "@LIBOMP_BINARY_DIR@/src"
 config.operating_system = "@CMAKE_SYSTEM_NAME@"
 config.hwloc_library_dir = "@LIBOMP_HWLOC_LIBRARY_DIR@"
 config.using_hwloc = @LIBOMP_USE_HWLOC@
-config.has_ompt = @LIBOMP_OMPT_SUPPORT@ and @LIBOMP_OMPT_BLAME@ and @LIBOMP_OMPT_TRACE@
+config.has_ompt = @LIBOMP_OMPT_SUPPORT@ and @LIBOMP_OMPT_OPTIONAL@
 config.has_libm = @LIBOMP_HAVE_LIBM@
 config.has_libatomic = @LIBOMP_HAVE_LIBATOMIC@
 
old mode 100644 (file)
new mode 100755 (executable)
index d2d8966..9b19456
+#define _BSD_SOURCE
 #include <stdio.h>
 #include <inttypes.h>
+#include <omp.h>
 #include <ompt.h>
+#include "ompt-signal.h"
 
-static ompt_get_task_id_t ompt_get_task_id;
-static ompt_get_task_frame_t ompt_get_task_frame;
-static ompt_get_thread_id_t ompt_get_thread_id;
-static ompt_get_parallel_id_t ompt_get_parallel_id;
+static const char* ompt_thread_type_t_values[] = {
+  NULL,
+  "ompt_thread_initial",
+  "ompt_thread_worker",
+  "ompt_thread_other"
+};
+
+static const char* ompt_task_status_t_values[] = {
+  NULL,
+  "ompt_task_complete",
+  "ompt_task_yield",
+  "ompt_task_cancel",
+  "ompt_task_others"
+};
+static const char* ompt_cancel_flag_t_values[] = {
+  "ompt_cancel_parallel",
+  "ompt_cancel_sections",
+  "ompt_cancel_do",
+  "ompt_cancel_taskgroup",
+  "ompt_cancel_activated",
+  "ompt_cancel_detected",
+  "ompt_cancel_discarded_task"
+};
+
+static ompt_set_callback_t ompt_set_callback;
+static ompt_get_task_info_t ompt_get_task_info;
+static ompt_get_thread_data_t ompt_get_thread_data;
+static ompt_get_parallel_info_t ompt_get_parallel_info;
+static ompt_get_unique_id_t ompt_get_unique_id;
+static ompt_get_num_places_t ompt_get_num_places;
+static ompt_get_place_proc_ids_t ompt_get_place_proc_ids;
+static ompt_get_place_num_t ompt_get_place_num;
+static ompt_get_partition_place_nums_t ompt_get_partition_place_nums;
+static ompt_get_proc_id_t ompt_get_proc_id;
+static ompt_enumerate_states_t ompt_enumerate_states;
+static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
 
 static void print_ids(int level)
 {
-  ompt_frame_t* frame = ompt_get_task_frame(level);
-  printf("%" PRIu64 ": level %d: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p\n", ompt_get_thread_id(), level, ompt_get_parallel_id(level), ompt_get_task_id(level), frame->exit_runtime_frame, frame->reenter_runtime_frame);
+  ompt_frame_t* frame ;
+  ompt_data_t* parallel_data;
+  ompt_data_t* task_data;
+  int exists_task = ompt_get_task_info(level, NULL, &task_data, &frame, &parallel_data, NULL);
+  if (frame)
+  {
+    printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p\n", ompt_get_thread_data()->value, level, exists_task ? parallel_data->value : 0, exists_task ? task_data->value : 0, frame->exit_runtime_frame, frame->reenter_runtime_frame);
+  }
+  else
+    printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", frame=%p\n", ompt_get_thread_data()->value, level, exists_task ? parallel_data->value : 0, exists_task ? task_data->value : 0, frame);
 }
 
 #define print_frame(level)\
 do {\
-  printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", ompt_get_thread_id(), level, __builtin_frame_address(level));\
+  printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", ompt_get_thread_data()->value, level, __builtin_frame_address(level));\
 } while(0)
 
+#define print_current_address(id)\
+{}              /* Empty block between "#pragma omp ..." and __asm__ statement as a workaround for icc bug */ \
+__asm__("nop"); /* provide an instruction as jump target (compiler would insert an instruction if label is target of a jmp ) */ \
+ompt_label_##id:\
+    printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, (char*)(&& ompt_label_##id)-1, (char*)(&& ompt_label_##id)-4) 
+    /* "&& label" returns the address of the label (GNU extension); works with gcc, clang, icc */
+    /* for void-type runtime function, the label is after the nop (-1), for functions with return value, there is a mov instruction before the label (-4) */
+
+#define print_fuzzy_address(id)\
+{}              /* Empty block between "#pragma omp ..." and __asm__ statement as a workaround for icc bug */ \
+__asm__("nop"); /* provide an instruction as jump target (compiler would insert an instruction if label is target of a jmp ) */ \
+ompt_label_##id:\
+    printf("%" PRIu64 ": fuzzy_address=0x%lx or 0x%lx\n", ompt_get_thread_data()->value, ((uint64_t)(char*)(&& ompt_label_##id))/256-1, ((uint64_t)(char*)(&& ompt_label_##id))/256) 
+    /* "&& label" returns the address of the label (GNU extension); works with gcc, clang, icc */
+    /* for void-type runtime function, the label is after the nop (-1), for functions with return value, there is a mov instruction before the label (-4) */
+
+static void format_task_type(int type, char* buffer)
+{
+  char* progress = buffer;
+  if(type & ompt_task_initial) progress += sprintf(progress, "ompt_task_initial");
+  if(type & ompt_task_implicit) progress += sprintf(progress, "ompt_task_implicit");
+  if(type & ompt_task_explicit) progress += sprintf(progress, "ompt_task_explicit");
+  if(type & ompt_task_target) progress += sprintf(progress, "ompt_task_target");
+  if(type & ompt_task_undeferred) progress += sprintf(progress, "|ompt_task_undeferred");
+  if(type & ompt_task_untied) progress += sprintf(progress, "|ompt_task_untied");
+  if(type & ompt_task_final) progress += sprintf(progress, "|ompt_task_final");
+  if(type & ompt_task_mergeable) progress += sprintf(progress, "|ompt_task_mergeable");
+  if(type & ompt_task_merged) progress += sprintf(progress, "|ompt_task_merged");
+}
+
+static void
+on_ompt_callback_mutex_acquire(
+  ompt_mutex_kind_t kind,
+  unsigned int hint,
+  unsigned int impl,
+  ompt_wait_id_t wait_id,
+  const void *codeptr_ra)
+{
+  switch(kind)
+  {
+    case ompt_mutex_lock:
+      printf("%" PRIu64 ": ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+      break;
+    case ompt_mutex_nest_lock:
+      printf("%" PRIu64 ": ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+      break;
+    case ompt_mutex_critical:
+      printf("%" PRIu64 ": ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+      break;
+    case ompt_mutex_atomic:
+      printf("%" PRIu64 ": ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+      break;
+    case ompt_mutex_ordered:
+      printf("%" PRIu64 ": ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+      break;
+    default:
+      break;
+  }
+}
+
+static void
+on_ompt_callback_mutex_acquired(
+  ompt_mutex_kind_t kind,
+  ompt_wait_id_t wait_id,
+  const void *codeptr_ra)
+{
+  switch(kind)
+  {
+    case ompt_mutex_lock:
+      printf("%" PRIu64 ": ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+      break;
+    case ompt_mutex_nest_lock:
+      printf("%" PRIu64 ": ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+      break;
+    case ompt_mutex_critical:
+      printf("%" PRIu64 ": ompt_event_acquired_critical: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+      break;
+    case ompt_mutex_atomic:
+      printf("%" PRIu64 ": ompt_event_acquired_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+      break;
+    case ompt_mutex_ordered:
+      printf("%" PRIu64 ": ompt_event_acquired_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+      break;
+    default:
+      break;
+  }
+}
+
+static void
+on_ompt_callback_mutex_released(
+  ompt_mutex_kind_t kind,
+  ompt_wait_id_t wait_id,
+  const void *codeptr_ra)
+{
+  switch(kind)
+  {
+    case ompt_mutex_lock:
+      printf("%" PRIu64 ": ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+      break;
+    case ompt_mutex_nest_lock:
+      printf("%" PRIu64 ": ompt_event_release_nest_lock_last: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+      break;
+    case ompt_mutex_critical:
+      printf("%" PRIu64 ": ompt_event_release_critical: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+      break;
+    case ompt_mutex_atomic:
+      printf("%" PRIu64 ": ompt_event_release_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+      break;
+    case ompt_mutex_ordered:
+      printf("%" PRIu64 ": ompt_event_release_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+      break;
+    default:
+      break;
+  }
+}
+
+static void
+on_ompt_callback_nest_lock(
+    ompt_scope_endpoint_t endpoint,
+    ompt_wait_id_t wait_id,
+    const void *codeptr_ra)
+{
+  switch(endpoint)
+  {
+    case ompt_scope_begin:
+      printf("%" PRIu64 ": ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+      break;
+    case ompt_scope_end:
+      printf("%" PRIu64 ": ompt_event_release_nest_lock_prev: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+      break;
+  }
+}
+
+static void
+on_ompt_callback_sync_region(
+  ompt_sync_region_kind_t kind,
+  ompt_scope_endpoint_t endpoint,
+  ompt_data_t *parallel_data,
+  ompt_data_t *task_data,
+  const void *codeptr_ra)
+{
+  switch(endpoint)
+  {
+    case ompt_scope_begin:
+      switch(kind)
+      {
+        case ompt_sync_region_barrier:
+          printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+          print_ids(0);
+          break;
+        case ompt_sync_region_taskwait:
+          printf("%" PRIu64 ": ompt_event_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+          break;
+        case ompt_sync_region_taskgroup:
+          printf("%" PRIu64 ": ompt_event_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+          break;
+      }
+      break;
+    case ompt_scope_end:
+      switch(kind)
+      {
+        case ompt_sync_region_barrier:
+          printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+          break;
+        case ompt_sync_region_taskwait:
+          printf("%" PRIu64 ": ompt_event_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+          break;
+        case ompt_sync_region_taskgroup:
+          printf("%" PRIu64 ": ompt_event_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+          break;
+      }
+      break;
+  }
+}
+
+static void
+on_ompt_callback_sync_region_wait(
+  ompt_sync_region_kind_t kind,
+  ompt_scope_endpoint_t endpoint,
+  ompt_data_t *parallel_data,
+  ompt_data_t *task_data,
+  const void *codeptr_ra)
+{
+  switch(endpoint)
+  {
+    case ompt_scope_begin:
+      switch(kind)
+      {
+        case ompt_sync_region_barrier:
+          printf("%" PRIu64 ": ompt_event_wait_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+          break;
+        case ompt_sync_region_taskwait:
+          printf("%" PRIu64 ": ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+          break;
+        case ompt_sync_region_taskgroup:
+          printf("%" PRIu64 ": ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+          break;
+      }
+      break;
+    case ompt_scope_end:
+      switch(kind)
+      {
+        case ompt_sync_region_barrier:
+          printf("%" PRIu64 ": ompt_event_wait_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+          break;
+        case ompt_sync_region_taskwait:
+          printf("%" PRIu64 ": ompt_event_wait_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+          break;
+        case ompt_sync_region_taskgroup:
+          printf("%" PRIu64 ": ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+          break;
+      }
+      break;
+  }
+}
+
+static void
+on_ompt_callback_flush(
+    ompt_data_t *thread_data,
+    const void *codeptr_ra)
+{
+  printf("%" PRIu64 ": ompt_event_flush: codeptr_ra=%p\n", thread_data->value, codeptr_ra);
+}
+
+static void
+on_ompt_callback_cancel(
+    ompt_data_t *task_data,
+    int flags,
+    const void *codeptr_ra)
+{
+  const char* first_flag_value;
+  const char* second_flag_value;
+  if(flags & ompt_cancel_parallel)
+    first_flag_value = ompt_cancel_flag_t_values[0];
+  else if(flags & ompt_cancel_sections)
+    first_flag_value = ompt_cancel_flag_t_values[1];
+  else if(flags & ompt_cancel_do)
+    first_flag_value = ompt_cancel_flag_t_values[2];
+  else if(flags & ompt_cancel_taskgroup)
+    first_flag_value = ompt_cancel_flag_t_values[3];
+
+  if(flags & ompt_cancel_activated)
+    second_flag_value = ompt_cancel_flag_t_values[4];
+  else if(flags & ompt_cancel_detected)
+    second_flag_value = ompt_cancel_flag_t_values[5];
+  else if(flags & ompt_cancel_discarded_task)
+    second_flag_value = ompt_cancel_flag_t_values[6];
+    
+  printf("%" PRIu64 ": ompt_event_cancel: task_data=%" PRIu64 ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, task_data->value, first_flag_value, second_flag_value, flags,  codeptr_ra);
+}
 
 static void
-on_ompt_event_barrier_begin(
-  ompt_parallel_id_t parallel_id,
-  ompt_task_id_t task_id)
+on_ompt_callback_idle(
+  ompt_scope_endpoint_t endpoint)
 {
-  printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id);
-  print_ids(0);
+  switch(endpoint)
+  {
+    case ompt_scope_begin:
+      printf("%" PRIu64 ": ompt_event_idle_begin:\n", ompt_get_thread_data()->value);
+      break;
+    case ompt_scope_end:
+      printf("%" PRIu64 ": ompt_event_idle_end:\n", ompt_get_thread_data()->value);
+      break;
+  }
 }
 
 static void
-on_ompt_event_barrier_end(
-  ompt_parallel_id_t parallel_id,
-  ompt_task_id_t task_id)
+on_ompt_callback_implicit_task(
+    ompt_scope_endpoint_t endpoint,
+    ompt_data_t *parallel_data,
+    ompt_data_t *task_data,
+    unsigned int team_size,
+    unsigned int thread_num)
 {
-  printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id);
+  switch(endpoint)
+  {
+    case ompt_scope_begin:
+      if(task_data->ptr)
+        printf("%s\n", "0: task_data initially not null");
+      task_data->value = ompt_get_unique_id();
+      printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, team_size, thread_num);
+      break;
+    case ompt_scope_end:
+      printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num);
+      break;
+  }
 }
 
 static void
-on_ompt_event_implicit_task_begin(
-  ompt_parallel_id_t parallel_id,
-  ompt_task_id_t task_id)
+on_ompt_callback_lock_init(
+  ompt_mutex_kind_t kind,
+  unsigned int hint,
+  unsigned int impl,
+  ompt_wait_id_t wait_id,
+  const void *codeptr_ra)
 {
-  printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id);
+  switch(kind)
+  {
+    case ompt_mutex_lock:
+      printf("%" PRIu64 ": ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+      break;
+    case ompt_mutex_nest_lock:
+      printf("%" PRIu64 ": ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+      break;
+    default:
+      break;
+  }
 }
 
 static void
-on_ompt_event_implicit_task_end(
-  ompt_parallel_id_t parallel_id,
-  ompt_task_id_t task_id)
+on_ompt_callback_lock_destroy(
+  ompt_mutex_kind_t kind,
+  ompt_wait_id_t wait_id,
+  const void *codeptr_ra)
 {
-  printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id);
+  switch(kind)
+  {
+    case ompt_mutex_lock:
+      printf("%" PRIu64 ": ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+      break;
+    case ompt_mutex_nest_lock:
+      printf("%" PRIu64 ": ompt_event_destroy_nest_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra);
+      break;
+    default:
+      break;
+  }
 }
 
 static void
-on_ompt_event_loop_begin(
-  ompt_parallel_id_t parallel_id,
-  ompt_task_id_t parent_task_id,
-  void *workshare_function)
+on_ompt_callback_work(
+  ompt_work_type_t wstype,
+  ompt_scope_endpoint_t endpoint,
+  ompt_data_t *parallel_data,
+  ompt_data_t *task_data,
+  uint64_t count,
+  const void *codeptr_ra)
 {
-  printf("%" PRIu64 ": ompt_event_loop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", workshare_function=%p\n", ompt_get_thread_id(), parallel_id, parent_task_id, workshare_function);
+  switch(endpoint)
+  {
+    case ompt_scope_begin:
+      switch(wstype)
+      {
+        case ompt_work_loop:
+          printf("%" PRIu64 ": ompt_event_loop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+          break;
+        case ompt_work_sections:
+          printf("%" PRIu64 ": ompt_event_sections_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+          break;
+        case ompt_work_single_executor:
+          printf("%" PRIu64 ": ompt_event_single_in_block_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+          break;
+        case ompt_work_single_other:
+          printf("%" PRIu64 ": ompt_event_single_others_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+          break;
+        case ompt_work_workshare:
+          //impl
+          break;
+        case ompt_work_distribute:
+          printf("%" PRIu64 ": ompt_event_distribute_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+          break;
+        case ompt_work_taskloop:
+          //impl
+          printf("%" PRIu64 ": ompt_event_taskloop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+          break;
+      }
+      break;
+    case ompt_scope_end:
+      switch(wstype)
+      {
+        case ompt_work_loop:
+          printf("%" PRIu64 ": ompt_event_loop_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+          break;
+        case ompt_work_sections:
+          printf("%" PRIu64 ": ompt_event_sections_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+          break;
+        case ompt_work_single_executor:
+          printf("%" PRIu64 ": ompt_event_single_in_block_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+          break;
+        case ompt_work_single_other:
+          printf("%" PRIu64 ": ompt_event_single_others_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+          break;
+        case ompt_work_workshare:
+          //impl
+          break;
+        case ompt_work_distribute:
+          printf("%" PRIu64 ": ompt_event_distribute_end: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+          break;
+        case ompt_work_taskloop:
+          //impl
+          printf("%" PRIu64 ": ompt_event_taskloop_end: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count);
+          break;
+      }
+      break;
+  }
 }
 
 static void
-on_ompt_event_loop_end(
-  ompt_parallel_id_t parallel_id,
-  ompt_task_id_t task_id)
+on_ompt_callback_master(
+  ompt_scope_endpoint_t endpoint,
+  ompt_data_t *parallel_data,
+  ompt_data_t *task_data,
+  const void *codeptr_ra)
 {
-  printf("%" PRIu64 ": ompt_event_loop_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id);
+  switch(endpoint)
+  {
+    case ompt_scope_begin:
+      printf("%" PRIu64 ": ompt_event_master_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+      break;
+    case ompt_scope_end:
+      printf("%" PRIu64 ": ompt_event_master_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
+      break;
+  }
 }
 
 static void
-on_ompt_event_parallel_begin(
-  ompt_task_id_t parent_task_id,
-  ompt_frame_t *parent_task_frame,
-  ompt_parallel_id_t parallel_id,
+on_ompt_callback_parallel_begin(
+  ompt_data_t *parent_task_data,
+  const ompt_frame_t *parent_task_frame,
+  ompt_data_t* parallel_data,
   uint32_t requested_team_size,
-  void *parallel_function,
-  ompt_invoker_t invoker)
+  ompt_invoker_t invoker,
+  const void *codeptr_ra)
 {
-  printf("%" PRIu64 ": ompt_event_parallel_begin: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", parallel_function=%p, invoker=%d\n", ompt_get_thread_id(), parent_task_id, parent_task_frame->exit_runtime_frame, parent_task_frame->reenter_runtime_frame, parallel_id, requested_team_size, parallel_function, invoker);
+  if(parallel_data->ptr)
+    printf("%s\n", "0: parallel_data initially not null");
+  parallel_data->value = ompt_get_unique_id();
+  printf("%" PRIu64 ": ompt_event_parallel_begin: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", codeptr_ra=%p, invoker=%d\n", ompt_get_thread_data()->value, parent_task_data->value, parent_task_frame->exit_runtime_frame, parent_task_frame->reenter_runtime_frame, parallel_data->value, requested_team_size, codeptr_ra, invoker);
 }
 
 static void
-on_ompt_event_parallel_end(
-  ompt_parallel_id_t parallel_id,
-  ompt_task_id_t task_id,
-  ompt_invoker_t invoker)
+on_ompt_callback_parallel_end(
+  ompt_data_t *parallel_data,
+  ompt_data_t *task_data,
+  ompt_invoker_t invoker,
+  const void *codeptr_ra)
 {
-  printf("%" PRIu64 ": ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", invoker=%d\n", ompt_get_thread_id(), parallel_id, task_id, invoker);
+  printf("%" PRIu64 ": ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, invoker, codeptr_ra);
 }
 
+static void
+on_ompt_callback_task_create(
+    ompt_data_t *parent_task_data,     /* id of parent task            */
+    const ompt_frame_t *parent_frame,  /* frame data for parent task   */
+    ompt_data_t* new_task_data,        /* id of created task           */
+    int type,
+    int has_dependences,
+    const void *codeptr_ra)               /* pointer to outlined function */
+{
+  if(new_task_data->ptr)
+    printf("%s\n", "0: new_task_data initially not null");
+  new_task_data->value = ompt_get_unique_id();
+  char buffer[2048];
+
+  format_task_type(type, buffer);
 
-void ompt_initialize(
+  //there is no paralllel_begin callback for implicit parallel region
+  //thus it is initialized in initial task
+  if(type & ompt_task_initial)
+  {
+    ompt_data_t *parallel_data;
+    ompt_get_parallel_info(0, &parallel_data, NULL);
+    if(parallel_data->ptr)
+      printf("%s\n", "0: parallel_data initially not null");
+    parallel_data->value = ompt_get_unique_id();
+  }
+
+  printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, parent_task_data ? parent_task_data->value : 0, parent_frame ? parent_frame->exit_runtime_frame : NULL, parent_frame ? parent_frame->reenter_runtime_frame : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no");
+}
+
+static void
+on_ompt_callback_task_schedule(
+    ompt_data_t *first_task_data,
+    ompt_task_status_t prior_task_status,
+    ompt_data_t *second_task_data)
+{
+  printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value, ompt_task_status_t_values[prior_task_status], prior_task_status);
+  if(prior_task_status == ompt_task_complete)
+  {
+    printf("%" PRIu64 ": ompt_event_task_end: task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value);
+  }
+}
+
+static void
+on_ompt_callback_task_dependences(
+  ompt_data_t *task_data,
+  const ompt_task_dependence_t *deps,
+  int ndeps)
+{
+  printf("%" PRIu64 ": ompt_event_task_dependences: task_id=%" PRIu64 ", deps=%p, ndeps=%d\n", ompt_get_thread_data()->value, task_data->value, (void *)deps, ndeps);
+}
+
+static void
+on_ompt_callback_task_dependence(
+  ompt_data_t *first_task_data,
+  ompt_data_t *second_task_data)
+{
+  printf("%" PRIu64 ": ompt_event_task_dependence_pair: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value);
+}
+
+static void
+on_ompt_callback_thread_begin(
+  ompt_thread_type_t thread_type,
+  ompt_data_t *thread_data)
+{
+  if(thread_data->ptr)
+    printf("%s\n", "0: thread_data initially not null");
+  thread_data->value = ompt_get_unique_id();
+  printf("%" PRIu64 ": ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, ompt_thread_type_t_values[thread_type], thread_type, thread_data->value);
+}
+
+static void
+on_ompt_callback_thread_end(
+  ompt_data_t *thread_data)
+{
+  printf("%" PRIu64 ": ompt_event_thread_end: thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, thread_data->value);
+}
+
+static int
+on_ompt_callback_control_tool(
+  uint64_t command,
+  uint64_t modifier,
+  void *arg,
+  const void *codeptr_ra)
+{
+  ompt_frame_t* omptTaskFrame;
+  ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL);
+  printf("%" PRIu64 ": ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, current_task_frame.reenter=%p \n", ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra, omptTaskFrame->exit_runtime_frame, omptTaskFrame->reenter_runtime_frame);
+  return 0; //success
+}
+
+#define register_callback_t(name, type)                       \
+do{                                                           \
+  type f_##name = &on_##name;                                 \
+  if (ompt_set_callback(name, (ompt_callback_t)f_##name) ==   \
+      ompt_set_never)                                         \
+    printf("0: Could not register callback '" #name "'\n");   \
+}while(0)
+
+#define register_callback(name) register_callback_t(name, name##_t)
+
+int ompt_initialize(
   ompt_function_lookup_t lookup,
-  const char *runtime_version,
-  unsigned int ompt_version)
+  ompt_fns_t* fns)
 {
-  ompt_set_callback_t ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
-  ompt_get_task_id = (ompt_get_task_id_t) lookup("ompt_get_task_id");
-  ompt_get_task_frame = (ompt_get_task_frame_t) lookup("ompt_get_task_frame");
-  ompt_get_thread_id = (ompt_get_thread_id_t) lookup("ompt_get_thread_id");
-  ompt_get_parallel_id = (ompt_get_parallel_id_t) lookup("ompt_get_parallel_id");
+  ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
+  ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info");
+  ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data");
+  ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info");
+  ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id");
 
-  ompt_set_callback(ompt_event_barrier_begin, (ompt_callback_t) &on_ompt_event_barrier_begin);
-  ompt_set_callback(ompt_event_barrier_end, (ompt_callback_t) &on_ompt_event_barrier_end);
-  ompt_set_callback(ompt_event_implicit_task_begin, (ompt_callback_t) &on_ompt_event_implicit_task_begin);
-  ompt_set_callback(ompt_event_implicit_task_end, (ompt_callback_t) &on_ompt_event_implicit_task_end);
-  ompt_set_callback(ompt_event_loop_begin, (ompt_callback_t) &on_ompt_event_loop_begin);
-  ompt_set_callback(ompt_event_loop_end, (ompt_callback_t) &on_ompt_event_loop_end);
-  ompt_set_callback(ompt_event_parallel_begin, (ompt_callback_t) &on_ompt_event_parallel_begin);
-  ompt_set_callback(ompt_event_parallel_end, (ompt_callback_t) &on_ompt_event_parallel_end);
-  printf("0: NULL_POINTER=%p\n", NULL);
+  ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places");
+  ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids");
+  ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num");
+  ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums");
+  ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id");
+  ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states");
+  ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls");
+
+  register_callback(ompt_callback_mutex_acquire);
+  register_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
+  register_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
+  register_callback(ompt_callback_nest_lock);
+  register_callback(ompt_callback_sync_region);
+  register_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t);
+  register_callback(ompt_callback_control_tool);
+  register_callback(ompt_callback_flush);
+  register_callback(ompt_callback_cancel);
+  register_callback(ompt_callback_idle);
+  register_callback(ompt_callback_implicit_task);
+  register_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t);
+  register_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
+  register_callback(ompt_callback_work);
+  register_callback(ompt_callback_master);
+  register_callback(ompt_callback_parallel_begin);
+  register_callback(ompt_callback_parallel_end);
+  register_callback(ompt_callback_task_create);
+  register_callback(ompt_callback_task_schedule);
+  register_callback(ompt_callback_task_dependences);
+  register_callback(ompt_callback_task_dependence);
+  register_callback(ompt_callback_thread_begin);
+  register_callback(ompt_callback_thread_end);
+  printf("0: NULL_POINTER=%p\n", (void*)NULL);
+  return 1; //success
+}
+
+void ompt_finalize(ompt_fns_t* fns)
+{
+  printf("0: ompt_event_runtime_shutdown\n");
 }
 
-ompt_initialize_t ompt_tool()
+ompt_fns_t* ompt_start_tool(
+  unsigned int omp_version,
+  const char *runtime_version)
 {
-  return &ompt_initialize;
+  static ompt_fns_t ompt_fns = {&ompt_initialize,&ompt_finalize};
+  return &ompt_fns;
 }
diff --git a/openmp/runtime/test/ompt/cancel/cancel_parallel.c b/openmp/runtime/test/ompt/cancel/cancel_parallel.c
new file mode 100644 (file)
index 0000000..a374801
--- /dev/null
@@ -0,0 +1,42 @@
+// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// Current GOMP interface implementation does not support cancellation
+// XFAIL: gcc
+
+#include "callback.h"
+#include "omp.h"
+
+int main()
+{
+  #pragma omp parallel num_threads(2)
+  {
+    if(omp_get_thread_num() == 0)
+    {
+      printf("%" PRIu64 ": fuzzy_address=0x%lx or 0x%lx\n", ompt_get_thread_data()->value, ((uint64_t)(char*)(&& ompt_label_1))/256-1, ((uint64_t)(char*)(&& ompt_label_1))/256);
+      #pragma omp cancel parallel
+      print_fuzzy_address(1); //does not actually print the address but provides a label
+    }
+    else
+    {
+      delay(100);
+      printf("%" PRIu64 ": fuzzy_address=0x%lx or 0x%lx\n", ompt_get_thread_data()->value, ((uint64_t)(char*)(&& ompt_label_2))/256-1, ((uint64_t)(char*)(&& ompt_label_2))/256);
+      #pragma omp cancellation point parallel
+      print_fuzzy_address(2); //does not actually print the address but provides a label
+    }
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel'
+
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_parallel|ompt_cancel_activated=17, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: fuzzy_address={{.*}}[[OTHER_RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_parallel|ompt_cancel_detected=33, codeptr_ra=[[OTHER_RETURN_ADDRESS]]{{[0-f][0-f]}}
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/cancel/cancel_taskgroup.c b/openmp/runtime/test/ompt/cancel/cancel_taskgroup.c
new file mode 100644 (file)
index 0000000..28558f8
--- /dev/null
@@ -0,0 +1,88 @@
+// RUN:  %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// Current GOMP interface implementation does not support cancellation
+// XFAIL: gcc
+
+#include "callback.h"
+#include <unistd.h>  
+#include <stdio.h>
+
+int main()
+{
+  int condition=0;
+  #pragma omp parallel num_threads(2)
+  {}
+
+  print_frame(0);
+  #pragma omp parallel num_threads(2)
+  {
+    #pragma omp master
+    {
+      #pragma omp taskgroup
+      {
+        #pragma omp task shared(condition)
+        {
+          printf("start execute task 1\n");
+          OMPT_SIGNAL(condition);
+          OMPT_WAIT(condition,2);
+          #pragma omp cancellation point taskgroup
+          printf("end execute task 1\n");
+        }
+        #pragma omp task shared(condition)
+        {
+          printf("start execute task 2\n");
+          OMPT_SIGNAL(condition);
+          OMPT_WAIT(condition,2);
+          #pragma omp cancellation point taskgroup
+          printf("end execute task 2\n");
+        }
+      #pragma omp task shared(condition)
+        {
+          printf("start execute task 3\n");
+          OMPT_SIGNAL(condition);
+          OMPT_WAIT(condition,2);
+          #pragma omp cancellation point taskgroup
+          printf("end execute task 3\n");
+        }
+      #pragma omp task if(0) shared(condition)
+        {
+          printf("start execute task 4\n");
+          OMPT_WAIT(condition,1);
+          #pragma omp cancel taskgroup
+          printf("end execute task 4\n");
+        }
+        OMPT_SIGNAL(condition);
+      }
+    }
+    #pragma omp barrier
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
+
+
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_master_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[FIRST_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[SECOND_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[THIRD_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[CANCEL_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[PARENT_TASK_ID]], second_task_id=[[CANCEL_TASK_ID]], prior_task_status=ompt_task_others=4
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[CANCEL_TASK_ID]], flags=ompt_cancel_taskgroup|ompt_cancel_activated=24, codeptr_ra={{0x[0-f]*}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[CANCEL_TASK_ID]], second_task_id=[[PARENT_TASK_ID]], prior_task_status=ompt_task_cancel=3
+
+  // CHECK-DAG: {{^}}{{[0-9]+}}: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_discarded_task=72, codeptr_ra=[[NULL]]
+  // CHECK-DAG: {{^}}{{[0-9]+}}: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_discarded_task=72, codeptr_ra=[[NULL]]
+  
+  // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+  // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_detected=40, codeptr_ra={{0x[0-f]*}}
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/cancel/cancel_worksharing.c b/openmp/runtime/test/ompt/cancel/cancel_worksharing.c
new file mode 100644 (file)
index 0000000..4107336
--- /dev/null
@@ -0,0 +1,68 @@
+// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// Current GOMP interface implementation does not support cancellation
+// XFAIL: gcc
+
+
+#include "callback.h"
+#include <unistd.h>
+
+int main()
+{
+  int condition=0;
+  #pragma omp parallel num_threads(2)
+  {
+    int x = 0;
+    int i;
+    #pragma omp for
+    for(i = 0; i < 2; i++)
+    {
+      if(i == 0)
+      {
+        x++;
+        OMPT_SIGNAL(condition);
+        #pragma omp cancel for
+      }
+      else
+      {
+        x++;
+        OMPT_WAIT(condition,1);
+        delay(10000);
+        #pragma omp cancellation point for
+      }
+    }
+  }
+  #pragma omp parallel num_threads(2)
+  {
+    #pragma omp sections
+    {
+      #pragma omp section
+      {
+        OMPT_SIGNAL(condition);
+        #pragma omp cancel sections
+      }
+      #pragma omp section
+      {
+        OMPT_WAIT(condition,2);
+        delay(10000);
+        #pragma omp cancellation point sections
+      }
+    }
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel'
+
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
+  
+  // cancel for and sections
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_do|ompt_cancel_activated=20, codeptr_ra={{0x[0-f]*}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_sections|ompt_cancel_activated=18, codeptr_ra={{0x[0-f]*}}
+  // CHECK: {{^}}[[OTHER_THREAD_ID:[0-9]+]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_do|ompt_cancel_detected=36, codeptr_ra={{0x[0-f]*}}
+  // CHECK: {{^}}[[OTHER_THREAD_ID:[0-9]+]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_sections|ompt_cancel_detected=34, codeptr_ra={{0x[0-f]*}}
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/loadtool/tool_available.c b/openmp/runtime/test/ompt/loadtool/tool_available.c
new file mode 100644 (file)
index 0000000..0205193
--- /dev/null
@@ -0,0 +1,59 @@
+// RUN: %libomp-compile -DCODE && %libomp-compile -DTOOL -o%T/tool.so -shared -fPIC && env OMP_TOOL_LIBRARIES=%T/tool.so %libomp-run | FileCheck %s
+// REQUIRES: ompt
+
+/*
+ *  This file contains code for an OMPT shared library tool to be 
+ *  loaded and the code for the OpenMP executable. 
+ *  -DTOOL enables the code for the tool during compilation
+ *  -DCODE enables the code for the executable during compilation
+ *  The RUN line compiles the two binaries and then tries to load
+ *  the tool using the OMP_TOOL_LIBRARIES environmental variable.
+ */
+
+#ifdef CODE
+#include "omp.h"
+
+int main()
+{
+  #pragma omp parallel num_threads(2)
+  {
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 
+  
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+  // CHECK: {{^}}0: ompt_event_runtime_shutdown
+
+  return 0;
+}
+
+#endif /* CODE */
+
+#ifdef TOOL
+
+#include <stdio.h>
+#include <ompt.h>
+
+int ompt_initialize(
+  ompt_function_lookup_t lookup,
+  ompt_fns_t* fns)
+{
+  printf("0: NULL_POINTER=%p\n", (void*)NULL);
+  return 1; //success
+}
+
+void ompt_finalize(ompt_fns_t* fns)
+{
+  printf("%d: ompt_event_runtime_shutdown\n", omp_get_thread_num());
+}
+
+ompt_fns_t* ompt_start_tool(
+  unsigned int omp_version,
+  const char *runtime_version)
+{
+  static ompt_fns_t ompt_fns = {&ompt_initialize,&ompt_finalize};
+  return &ompt_fns;
+}
+#endif /* TOOL */
diff --git a/openmp/runtime/test/ompt/misc/api_calls.c b/openmp/runtime/test/ompt/misc/api_calls.c
new file mode 100644 (file)
index 0000000..f7f1e16
--- /dev/null
@@ -0,0 +1,66 @@
+// RUN: %libomp-compile && env OMP_PLACES=cores %libomp-run | FileCheck %s
+// REQUIRES: ompt, linux
+#include "callback.h"
+#include <omp.h>
+#define __USE_GNU
+#include <sched.h>
+#undef __USE_GNU
+
+void print_list(char* function_name, int list[])
+{
+  printf("%" PRIu64 ": %s(0)=(%d", ompt_get_thread_data()->value, function_name, list[0]);
+  int i;
+  for(i = 1; i < omp_get_place_num_procs(0); i++)
+  {
+    printf(",%d", list[i]);
+  }
+  printf(")\n");
+}
+
+int main()
+{
+  #pragma omp parallel num_threads(1)
+  {
+       printf("%" PRIu64 ": omp_get_num_places()=%d\n", ompt_get_thread_data()->value, omp_get_num_places());
+       printf("%" PRIu64 ": ompt_get_num_places()=%d\n", ompt_get_thread_data()->value, ompt_get_num_places());
+
+    int omp_ids[omp_get_place_num_procs(0)];
+    omp_get_place_proc_ids(0, omp_ids);
+    print_list("omp_get_place_proc_ids" ,omp_ids);
+    int ompt_ids[omp_get_place_num_procs(0)];
+    ompt_get_place_proc_ids(0, omp_get_place_num_procs(0), ompt_ids);
+    print_list("ompt_get_place_proc_ids", ompt_ids);
+
+       printf("%" PRIu64 ": omp_get_place_num()=%d\n", ompt_get_thread_data()->value, omp_get_place_num());
+       printf("%" PRIu64 ": ompt_get_place_num()=%d\n", ompt_get_thread_data()->value, ompt_get_place_num());
+
+    int omp_nums[omp_get_partition_num_places()];
+    omp_get_partition_place_nums(omp_nums);
+    print_list("omp_get_partition_place_nums" ,omp_nums);
+    int ompt_nums[omp_get_partition_num_places()];
+    ompt_get_partition_place_nums(omp_get_partition_num_places(), ompt_nums);
+    print_list("ompt_get_partition_place_nums", ompt_nums);
+
+    printf("%" PRIu64 ": sched_getcpu()=%d\n", ompt_get_thread_data()->value, sched_getcpu());
+    printf("%" PRIu64 ": ompt_get_proc_id()=%d\n", ompt_get_thread_data()->value, ompt_get_proc_id());
+  }
+
+  // Check if libomp supports the callbacks for this test.
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: omp_get_num_places()=[[NUM_PLACES:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_get_num_places()=[[NUM_PLACES]]
+
+  // CHECK: {{^}}[[MASTER_ID]]: omp_get_place_proc_ids(0)=([[PROC_IDS:[0-9\,]+]])
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_proc_ids(0)=([[PROC_IDS]])
+
+  // CHECK: {{^}}[[MASTER_ID]]: omp_get_place_num()=[[PLACE_NUM:[-]?[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_num()=[[PLACE_NUM]]
+
+  // CHECK: {{^}}[[MASTER_ID]]: sched_getcpu()=[[CPU_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_get_proc_id()=[[CPU_ID]]
+
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/misc/control_tool.c b/openmp/runtime/test/ompt/misc/control_tool.c
new file mode 100644 (file)
index 0000000..7fc4f02
--- /dev/null
@@ -0,0 +1,27 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  #pragma omp parallel num_threads(1)
+  {
+    print_frame(1);
+    print_frame(0);
+    omp_control_tool(omp_control_tool_flush, 1, NULL);
+    print_current_address(0);
+  }
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_control_tool'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(1)=[[EXIT_FRAME:0x[0-f]*]]
+  // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER_FRAME:0x[0-f]*]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_control_tool: command=3, modifier=1, arg=[[NULL]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]*]], current_task_frame.exit=[[EXIT_FRAME]], current_task_frame.reenter=[[REENTER_FRAME]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/misc/control_tool_no_ompt_support.c b/openmp/runtime/test/ompt/misc/control_tool_no_ompt_support.c
new file mode 100644 (file)
index 0000000..ee64da0
--- /dev/null
@@ -0,0 +1,12 @@
+// RUN: %libomp-compile-and-run
+#include <omp.h>
+
+int main()
+{
+  #pragma omp parallel num_threads(1)
+  {
+    omp_control_tool(omp_control_tool_flush, 1, NULL);
+  }
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/misc/idle.c b/openmp/runtime/test/ompt/misc/idle.c
new file mode 100644 (file)
index 0000000..7413c32
--- /dev/null
@@ -0,0 +1,32 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  int x = 0;
+  #pragma omp parallel num_threads(3)
+  {
+    #pragma omp atomic
+    x++;
+  }
+  #pragma omp parallel num_threads(2)
+  {
+    #pragma omp atomic
+    x++;
+  }
+
+
+  printf("x=%d\n", x);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_idle'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_idle_begin:
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_idle_end:
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/ompt-signal.h b/openmp/runtime/test/ompt/ompt-signal.h
new file mode 100644 (file)
index 0000000..b5c28cf
--- /dev/null
@@ -0,0 +1,31 @@
+#if defined(WIN32) || defined(_WIN32)
+#include <windows.h>
+#define delay() Sleep(1);
+#else
+#include <unistd.h>
+#define delay(t) usleep(t);
+#endif
+
+// These functions are used to provide a signal-wait mechanism to enforce expected scheduling for the test cases.
+// Conditional variable (s) needs to be shared! Initialize to 0
+
+#define OMPT_SIGNAL(s) ompt_signal(&s)
+//inline 
+void ompt_signal(int* s) 
+{                
+  #pragma omp atomic
+  (*s)++;
+}
+                
+#define OMPT_WAIT(s,v) ompt_wait(&s,v)
+// wait for s >= v
+//inline 
+void ompt_wait(int *s, int v)
+{
+  int wait=0;
+  do{
+    delay(10);
+    #pragma omp atomic read
+         wait = (*s);
+  }while(wait<v);
+}
diff --git a/openmp/runtime/test/ompt/parallel/dynamic_enough_threads.c b/openmp/runtime/test/ompt/parallel/dynamic_enough_threads.c
new file mode 100644 (file)
index 0000000..4c340ba
--- /dev/null
@@ -0,0 +1,43 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+
+int main()
+{
+  omp_set_dynamic(1);
+
+  #pragma omp parallel num_threads(4)
+  {
+    print_ids(0);
+    print_ids(1);
+  }
+  print_fuzzy_address(1);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+
+  //team-size of 1-4 is expected
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: parallel_data initially not null
+  // CHECK-NOT: 0: task_data initially not null
+  // CHECK-NOT: 0: thread_data initially not null
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size={{[1-4]}}
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/parallel/dynamic_not_enough_threads.c b/openmp/runtime/test/ompt/parallel/dynamic_not_enough_threads.c
new file mode 100644 (file)
index 0000000..f3a6e17
--- /dev/null
@@ -0,0 +1,43 @@
+// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+
+int main()
+{
+  omp_set_dynamic(1);
+
+  #pragma omp parallel num_threads(4)
+  {
+    print_ids(0);
+    print_ids(1);
+  }
+  print_fuzzy_address(1);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+
+  //team-size of 1-4 is expected
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: parallel_data initially not null
+  // CHECK-NOT: 0: task_data initially not null
+  // CHECK-NOT: 0: thread_data initially not null
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size={{[1-4]}}
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/parallel/max_active_levels_serialized.c b/openmp/runtime/test/ompt/parallel/max_active_levels_serialized.c
new file mode 100644 (file)
index 0000000..6b45354
--- /dev/null
@@ -0,0 +1,72 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  omp_set_nested(1);
+  omp_set_max_active_levels(1);
+
+  #pragma omp parallel num_threads(2)
+  {
+    print_ids(0);
+    print_ids(1);
+    #pragma omp parallel num_threads(2)
+    {
+      print_ids(0);
+      print_ids(1);
+      print_ids(2);
+    }
+    print_fuzzy_address(1);
+  }
+  print_fuzzy_address(2);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+  // THREADS: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: parallel_data initially not null
+  // CHECK-NOT: 0: task_data initially not null
+  // CHECK-NOT: 0: thread_data initially not null
+
+  // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+  
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+  // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+  return 0;
+}
index 1c7b68d..d128e80 100644 (file)
@@ -3,9 +3,11 @@
 // REQUIRES: ompt
 #include "callback.h"
 #include <omp.h>
+#include <unistd.h>
 
 int main()
 {
+  int condition=0;
   omp_set_nested(1);
   print_frame(0);
 
@@ -15,6 +17,10 @@ int main()
     print_ids(0);
     print_ids(1);
     print_frame(0);
+
+    //get all implicit task events before starting nested:
+    #pragma omp barrier
+    
     #pragma omp parallel num_threads(4)
     {
       print_frame(1);
@@ -22,17 +28,38 @@ int main()
       print_ids(1);
       print_ids(2);
       print_frame(0);
+      OMPT_SIGNAL(condition);
+      OMPT_WAIT(condition,16);
       #pragma omp barrier
+      print_fuzzy_address(1);
       print_ids(0);
     }
+    print_fuzzy_address(2);
     print_ids(0);
   }
+  print_fuzzy_address(3);
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
 
   // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
-  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: parallel_data initially not null
+  // CHECK-NOT: 0: task_data initially not null
+  // CHECK-NOT: 0: thread_data initially not null
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
 
   // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
 
@@ -46,219 +73,224 @@ int main()
   // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
 
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
 
 
   // THREADS: {{^}}0: NULL_POINTER=[[NULL:.*$]]
   // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
 
   // nested parallel masters
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
   // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
   // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
   // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[NESTED_EXIT:0x[0-f]+]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
   // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[NESTED_REENTER:0x[0-f]+]]
   // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
   // explicit barrier
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[BARRIER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]]
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[BARRIER_RETURN_ADDRESS]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
   // implicit barrier
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
   // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
-  // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
   // implicit barrier
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // nested parallel worker threads
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   return 0;
 }
index 6128816..b3327c2 100644 (file)
@@ -3,35 +3,59 @@
 // REQUIRES: ompt
 #include "callback.h"
 #include <omp.h>
+#include <unistd.h>
 
 int main()
 {
   omp_set_nested(1);
+  int condition;
 
   #pragma omp parallel num_threads(4)
   {
     print_ids(0);
     print_ids(1);
+    //get all implicit task events before starting nested:
+    #pragma omp barrier
     #pragma omp parallel num_threads(1)
     {
       print_ids(0);
       print_ids(1);
       print_ids(2);
+      //get all implicit task events before starting nested:
+      #pragma omp barrier
       #pragma omp parallel num_threads(4)
       {
         print_ids(0);
         print_ids(1);
         print_ids(2);
         print_ids(3);
+      OMPT_SIGNAL(condition);
+      OMPT_WAIT(condition,16);
       }
+      print_fuzzy_address(1);
     }
+    print_fuzzy_address(2);
   }
+  print_fuzzy_address(3);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
 
   // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
-  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: parallel_data initially not null
+  // CHECK-NOT: 0: task_data initially not null
+  // CHECK-NOT: 0: thread_data initially not null
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
 
   // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
 
@@ -48,251 +72,261 @@ int main()
 
 
   // THREADS: 0: NULL_POINTER=[[NULL:.*$]]
-  // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
+  // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
 
   // nested parallel masters
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
 
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
 
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]]
   // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
   // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // nested parallel worker threads
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
   // can't reliably tell which parallel region is the parent...
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
-  // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}
+  // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
 
   return 0;
index b43323f..e55eb2e 100644 (file)
@@ -18,13 +18,29 @@ int main()
       print_ids(1);
       print_ids(2);
     }
+    print_fuzzy_address(1);
   }
+  print_fuzzy_address(2);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
 
   // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
-  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: parallel_data initially not null
+  // CHECK-NOT: 0: task_data initially not null
+  // CHECK-NOT: 0: thread_data initially not null
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
 
   // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
 
@@ -41,67 +57,71 @@ int main()
 
 
   // THREADS: 0: NULL_POINTER=[[NULL:.*$]]
-  // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
+  // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
 
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
   // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   return 0;
 }
diff --git a/openmp/runtime/test/ompt/parallel/no_thread_num_clause.c b/openmp/runtime/test/ompt/parallel/no_thread_num_clause.c
new file mode 100644 (file)
index 0000000..251037e
--- /dev/null
@@ -0,0 +1,95 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s
+// REQUIRES: ompt
+#include "callback.h"
+
+int main()
+{
+  omp_set_num_threads(4);
+  #pragma omp parallel
+  {
+    print_ids(0);
+    print_ids(1);
+  }
+  print_fuzzy_address(1);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: parallel_data initially not null
+  // CHECK-NOT: 0: task_data initially not null
+  // CHECK-NOT: 0: thread_data initially not null
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+  // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+  // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
+
+  // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+  // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+  // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+
+
+  // THREADS: 0: NULL_POINTER=[[NULL:.*$]]
+  // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=(nil), parent_task_frame.reenter=(nil), new_task_id=281474976710658, codeptr_ra=(nil), task_type=ompt_task_initial=1, has_dependences=no
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}}
+
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
+  // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+  // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+  // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+  // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+  return 0;
+}
index 36f57e7..015ff5c 100644 (file)
@@ -10,12 +10,27 @@ int main()
     print_ids(0);
     print_ids(1);
   }
+  print_fuzzy_address(1);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+
 
   // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
-  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
+  
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: parallel_data initially not null
+  // CHECK-NOT: 0: task_data initially not null
+  // CHECK-NOT: 0: thread_data initially not null
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
 
   // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
 
@@ -28,43 +43,48 @@ int main()
   // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
   // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
 
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
 
 
   // THREADS: 0: NULL_POINTER=[[NULL:.*$]]
-  // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker={{.*}}
+  // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{.*}}
 
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
-  // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
-  // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
-  // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
   // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
   // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   return 0;
 }
diff --git a/openmp/runtime/test/ompt/parallel/not_enough_threads.c b/openmp/runtime/test/ompt/parallel/not_enough_threads.c
new file mode 100644 (file)
index 0000000..cdc6b2f
--- /dev/null
@@ -0,0 +1,76 @@
+// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | FileCheck %s
+// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | %sort-threads | FileCheck --check-prefix=THREADS %s
+// REQUIRES: ompt
+#include "callback.h"
+
+int main()
+{
+  #pragma omp parallel num_threads(4)
+  {
+    print_ids(0);
+    print_ids(1);
+  }
+  print_fuzzy_address(1);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: parallel_data initially not null
+  // CHECK-NOT: 0: task_data initially not null
+  // CHECK-NOT: 0: thread_data initially not null
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+  // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+  // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end!
+
+  // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+  // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+  // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+
+
+  // THREADS: 0: NULL_POINTER=[[NULL:.*$]]
+  // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}}
+
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]]
+  // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+  // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]]
+  // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/parallel/parallel_if0.c b/openmp/runtime/test/ompt/parallel/parallel_if0.c
new file mode 100644 (file)
index 0000000..8827448
--- /dev/null
@@ -0,0 +1,75 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+
+int main()
+{
+//  print_frame(0);
+  #pragma omp parallel if(0)
+  {
+//    print_frame(1);
+    print_ids(0);
+    print_ids(1);
+//    print_frame(0);
+    #pragma omp parallel if(0)
+    {
+//      print_frame(1);
+      print_ids(0);
+      print_ids(1);
+      print_ids(2);
+//      print_frame(0);
+      #pragma omp task
+      {
+//        print_frame(1);
+        print_ids(0);
+        print_ids(1);
+        print_ids(2);
+        print_ids(3);
+      }
+    }
+    print_fuzzy_address(1);
+  }
+  print_fuzzy_address(2);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_end'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: parallel_data initially not null
+  // CHECK-NOT: 0: task_data initially not null
+  // CHECK-NOT: 0: thread_data initially not null
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}}
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[EXPLICIT_TASK_ID]], second_task_id=[[NESTED_IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[EXPLICIT_TASK_ID]]
+
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+  return 0;
+}
index beb64da..55bf98f 100644 (file)
@@ -4,21 +4,73 @@
 
 int main()
 {
+//  print_frame(0);
   #pragma omp parallel num_threads(1)
   {
+//    print_frame(1);
     print_ids(0);
     print_ids(1);
+//    print_frame(0);
+    #pragma omp parallel num_threads(1)
+    {
+//      print_frame(1);
+      print_ids(0);
+      print_ids(1);
+      print_ids(2);
+//      print_frame(0);
+      #pragma omp task
+      {
+//        print_frame(1);
+        print_ids(0);
+        print_ids(1);
+        print_ids(2);
+        print_ids(3);
+      }
+    }
+    print_fuzzy_address(1);
   }
+  print_fuzzy_address(2);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_end'
 
   // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
-  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: parallel_data initially not null
+  // CHECK-NOT: 0: task_data initially not null
+  // CHECK-NOT: 0: thread_data initially not null
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[OUTER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
 
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // CHECK: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // CHECK: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]]
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[INNER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}}
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[EXPLICIT_TASK_ID]], second_task_id=[[NESTED_IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[EXPLICIT_TASK_ID]]
+
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[NESTED_IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[INNER_RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[INNER_RETURN_ADDRESS]]
 
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[OUTER_RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[OUTER_RETURN_ADDRESS]]
 
   return 0;
 }
diff --git a/openmp/runtime/test/ompt/synchronization/barrier/explicit.c b/openmp/runtime/test/ompt/synchronization/barrier/explicit.c
new file mode 100644 (file)
index 0000000..4def7f8
--- /dev/null
@@ -0,0 +1,57 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  int x = 0;
+
+  #pragma omp parallel num_threads(2)
+  {
+    #pragma omp atomic
+    x++;
+
+    #pragma omp barrier
+    print_current_address();
+
+    #pragma omp atomic
+    x++;
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // master thread explicit barrier 
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+  // master thread implicit barrier at parallel end
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
+
+
+  // worker thread explicit barrier 
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[THREAD_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+  // worker thread implicit barrier at parallel end
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/barrier/for_loop.c b/openmp/runtime/test/ompt/synchronization/barrier/for_loop.c
new file mode 100644 (file)
index 0000000..0e3abc5
--- /dev/null
@@ -0,0 +1,55 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  int y[] = {0,1,2,3};
+
+  #pragma omp parallel num_threads(2)
+  {
+    //implicit barrier at end of for loop
+    int i;
+    #pragma omp for
+    for (i = 0; i < 4; i++)
+    {
+      y[i]++;
+    }
+    print_current_address();
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // master thread implicit barrier at loop end 
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+  // master thread implicit barrier at parallel end
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+
+  // worker thread explicit barrier 
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+  // worker thread implicit barrier after parallel
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/barrier/for_simd.c b/openmp/runtime/test/ompt/synchronization/barrier/for_simd.c
new file mode 100644 (file)
index 0000000..376a435
--- /dev/null
@@ -0,0 +1,32 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  int y[] = {0,1,2,3};
+
+  int i;
+  #pragma omp for simd
+  for (i = 0; i < 4; i++)
+  {
+    y[i]++;
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // master thread implicit barrier at simd loop end 
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/barrier/parallel_region.c b/openmp/runtime/test/ompt/synchronization/barrier/parallel_region.c
new file mode 100644 (file)
index 0000000..ea0a23f
--- /dev/null
@@ -0,0 +1,40 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  int x = 0;
+
+  //implicit barrier at end of a parallel region
+  #pragma omp parallel num_threads(2)
+  {
+    #pragma omp atomic
+    x++;
+  }
+  print_fuzzy_address();
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // master thread implicit barrier at parallel end
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+
+  // worker thread implicit barrier at parallel end
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/barrier/sections.c b/openmp/runtime/test/ompt/synchronization/barrier/sections.c
new file mode 100644 (file)
index 0000000..4e1dfdd
--- /dev/null
@@ -0,0 +1,63 @@
+// RUN: %libomp-compile-and-run | %sort-threads  | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  int x = 0;
+
+  #pragma omp parallel num_threads(2)
+  {
+    //implicit barrier after sections with nowait but with lastprivates
+    //implicit barrier at end of sections
+    #pragma omp sections
+    {
+      #pragma omp section 
+      {
+        #pragma omp atomic
+        x++;
+      }
+      
+      #pragma omp section 
+      {
+        #pragma omp atomic
+        x++;
+      }
+    }
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // master thread implicit barrier at sections end
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+  // master thread implicit barrier at parallel end
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+
+  // worker thread implicit barrier at sections end
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+  // worker thread implicit barrier at parallel end
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/barrier/single.c b/openmp/runtime/test/ompt/synchronization/barrier/single.c
new file mode 100644 (file)
index 0000000..4b928ff
--- /dev/null
@@ -0,0 +1,60 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  int x = 0;
+
+  #pragma omp parallel num_threads(2)
+  {
+    //implicit barrier at end of single
+    #pragma omp single
+    {
+      x++;
+    }
+    print_fuzzy_address();
+    //critical section to avoid merge of two barriers into one
+    #pragma omp critical
+    {
+      x++;
+    }
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // master thread implicit barrier at single end
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+  // master thread implicit barrier at parallel end
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+
+
+  // worker thread implicit barrier at single end
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+  // worker thread implicit barrier at parallel end
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/critical.c b/openmp/runtime/test/ompt/synchronization/critical.c
new file mode 100644 (file)
index 0000000..7787ede
--- /dev/null
@@ -0,0 +1,31 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  #pragma omp critical
+  {
+    print_current_address(1);
+    print_ids(0);
+  }
+  print_current_address(2);
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_critical: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_critical: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_critical: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/flush.c b/openmp/runtime/test/ompt/synchronization/flush.c
new file mode 100644 (file)
index 0000000..875871d
--- /dev/null
@@ -0,0 +1,32 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// GCC generates code that does not call the runtime for the flush construct
+// XFAIL: gcc
+
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  #pragma omp parallel num_threads(2)
+  {
+    int tid = omp_get_thread_num();
+    
+    #pragma omp flush
+    print_current_address(1);
+  }
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_flush'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_flush: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: current_address=[[RETURN_ADDRESS]]
+  //
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_flush: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: current_address=[[RETURN_ADDRESS]]
+
+
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/lock.c b/openmp/runtime/test/ompt/synchronization/lock.c
new file mode 100644 (file)
index 0000000..b546d1e
--- /dev/null
@@ -0,0 +1,44 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  //need to use an OpenMP construct so that OMPT will be initalized
+  #pragma omp parallel num_threads(1)
+    print_ids(0);
+
+  omp_lock_t lock;
+  printf("%" PRIu64 ": &lock: %lli\n", ompt_get_thread_data()->value, (long long) &lock);
+  omp_init_lock(&lock);
+  print_current_address(1);
+  omp_set_lock(&lock);
+  print_current_address(2);
+  omp_unset_lock(&lock);
+  print_current_address(3);
+  omp_destroy_lock(&lock);
+  print_current_address(4);
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: &lock: [[WAIT_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_init_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/master.c b/openmp/runtime/test/ompt/synchronization/master.c
new file mode 100644 (file)
index 0000000..9e1e7c2
--- /dev/null
@@ -0,0 +1,36 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+// GCC generates code that does not call the runtime for the master construct
+// XFAIL: gcc
+
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  int x = 0;
+  #pragma omp parallel num_threads(2)
+  {
+    #pragma omp master
+    {
+      print_fuzzy_address(1);
+      x++;
+    }
+    print_current_address(2);
+  }
+
+  printf("%" PRIu64 ": x=%d\n", ompt_get_thread_data()->value, x);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_master_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_master_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS_END:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: current_address=[[RETURN_ADDRESS_END]]
+
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/nest_lock.c b/openmp/runtime/test/ompt/synchronization/nest_lock.c
new file mode 100644 (file)
index 0000000..39cd4cc
--- /dev/null
@@ -0,0 +1,52 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  //need to use an OpenMP construct so that OMPT will be initalized
+  #pragma omp parallel num_threads(1)
+    print_ids(0);
+
+  omp_nest_lock_t nest_lock;
+  printf("%" PRIu64 ": &nest_lock: %lli\n", ompt_get_thread_data()->value, (long long) &nest_lock);
+  omp_init_nest_lock(&nest_lock);
+  print_current_address(1);
+  omp_set_nest_lock(&nest_lock);
+  print_current_address(2);
+  omp_set_nest_lock(&nest_lock);
+  print_current_address(3);
+  omp_unset_nest_lock(&nest_lock);
+  print_current_address(4);
+  omp_unset_nest_lock(&nest_lock);
+  print_current_address(5);
+  omp_destroy_nest_lock(&nest_lock);
+  print_current_address(6);
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_nest_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/ordered.c b/openmp/runtime/test/ompt/synchronization/ordered.c
new file mode 100644 (file)
index 0000000..1d498b9
--- /dev/null
@@ -0,0 +1,31 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  #pragma omp ordered
+  {
+    print_current_address(1);
+    print_ids(0);
+  }
+  print_current_address(2);
+  
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_ordered: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_ordered: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_ordered: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/taskgroup.c b/openmp/runtime/test/ompt/synchronization/taskgroup.c
new file mode 100644 (file)
index 0000000..a85bf20
--- /dev/null
@@ -0,0 +1,48 @@
+// RUN:  %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+
+#include "callback.h"
+#include <unistd.h>  
+#include <stdio.h>
+
+int main()
+{
+  int condition=0;
+  int x=0;
+  #pragma omp parallel num_threads(2)
+  {
+    #pragma omp master
+    {
+      #pragma omp taskgroup
+      {
+        print_current_address(1);
+        #pragma omp task
+        {
+          #pragma omp atomic
+          x++;
+        }
+      }
+      print_current_address(2);
+    }
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
+
+
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskgroup_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/taskwait.c b/openmp/runtime/test/ompt/synchronization/taskwait.c
new file mode 100644 (file)
index 0000000..3e319ff
--- /dev/null
@@ -0,0 +1,35 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  int x = 0;
+  #pragma omp parallel num_threads(2)
+  {
+    #pragma omp master
+    {
+      #pragma omp task
+      {
+        x++;
+      }
+      #pragma omp taskwait
+      print_current_address(1);
+    }
+  }
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskwait_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskwait_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskwait_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: ompt_event_taskwait_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/test_lock.c b/openmp/runtime/test/ompt/synchronization/test_lock.c
new file mode 100644 (file)
index 0000000..bae4641
--- /dev/null
@@ -0,0 +1,54 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  omp_lock_t lock;
+  omp_init_lock(&lock);
+  print_current_address(1);
+
+  omp_test_lock(&lock);
+  print_current_address(2);
+  omp_unset_lock(&lock);
+  print_current_address(3);
+
+  omp_set_lock(&lock);
+  print_current_address(4);
+  omp_test_lock(&lock);
+  print_current_address(5);
+  omp_unset_lock(&lock);
+  print_current_address(6);
+
+  omp_destroy_lock(&lock);
+  print_current_address(7);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] 
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]  
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]  
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]  
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]  
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]]  
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]  
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]  
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]  
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/test_nest_lock.c b/openmp/runtime/test/ompt/synchronization/test_nest_lock.c
new file mode 100644 (file)
index 0000000..ad02d32
--- /dev/null
@@ -0,0 +1,42 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  omp_nest_lock_t nest_lock;
+  omp_init_nest_lock(&nest_lock);
+
+  omp_test_nest_lock(&nest_lock);
+  omp_unset_nest_lock(&nest_lock);
+
+  omp_set_nest_lock(&nest_lock);
+  omp_test_nest_lock(&nest_lock);
+  omp_unset_nest_lock(&nest_lock);
+  omp_unset_nest_lock(&nest_lock);
+
+  omp_destroy_nest_lock(&nest_lock);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}  
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}  
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}  
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}  
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}  
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}}  
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/test_nest_lock_parallel.c b/openmp/runtime/test/ompt/synchronization/test_nest_lock_parallel.c
new file mode 100644 (file)
index 0000000..0e1dcec
--- /dev/null
@@ -0,0 +1,59 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  omp_nest_lock_t nest_lock;
+  omp_init_nest_lock(&nest_lock);
+
+  #pragma omp parallel num_threads(2)
+  {
+    #pragma omp master
+    {
+      omp_set_nest_lock(&nest_lock);
+      print_current_address(1);
+    }
+    #pragma omp barrier
+    omp_test_nest_lock(&nest_lock); //should fail for non-master
+    print_current_address(2);
+    #pragma omp barrier
+    #pragma omp master
+    {
+      omp_unset_nest_lock(&nest_lock);
+      print_current_address(3);
+      omp_unset_nest_lock(&nest_lock);
+      print_current_address(4);
+    }
+  }
+
+  omp_destroy_nest_lock(&nest_lock);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] 
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] 
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] 
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] 
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] 
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] 
+  // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_nest_lock: wait_id=[[WAIT_ID]]
+
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] 
+  // CHECK-NOT: {{^}}[[THREAD_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]]
+  // CHECK-NEXT: {{^}}[[THREAD_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/tasks/dependences.c b/openmp/runtime/test/ompt/tasks/dependences.c
new file mode 100644 (file)
index 0000000..32817c4
--- /dev/null
@@ -0,0 +1,53 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+
+#include "callback.h"
+#include <omp.h>   
+#include <math.h>
+#include <unistd.h>
+
+int main()
+{
+  int x = 0;
+  #pragma omp parallel num_threads(2)
+  {
+    #pragma omp master
+    {  
+      #pragma omp task depend(out:x)
+      {
+        x++;
+        delay(100);
+      }
+      print_fuzzy_address(1);
+    
+      #pragma omp task depend(in:x)
+      {
+        x = -1;
+      }
+    }
+  }
+
+  x++;
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependences'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependence'
+  
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: new_task_data initially not null
+
+  // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter=[[NULL]], new_task_id=[[FIRST_TASK:[0-f]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, task_type=ompt_task_explicit=4, has_dependences=yes
+  // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_dependences: task_id=[[FIRST_TASK]], deps={{0x[0-f]+}}, ndeps=1
+  // CHECK: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+
+  // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter=[[NULL]], new_task_id=[[SECOND_TASK:[0-f]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=yes
+  // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_dependences: task_id=[[SECOND_TASK]], deps={{0x[0-f]+}}, ndeps=1
+  // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_dependence_pair: first_task_id=[[FIRST_TASK]], second_task_id=[[SECOND_TASK]]
+
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/tasks/explicit_task.c b/openmp/runtime/test/ompt/tasks/explicit_task.c
new file mode 100644 (file)
index 0000000..4911924
--- /dev/null
@@ -0,0 +1,100 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h> 
+
+int main()
+{
+  int condition=0;
+  omp_set_nested(0);
+  print_frame(0);
+  #pragma omp parallel num_threads(2)
+  {
+    print_frame(1);
+    print_ids(0);
+    print_ids(1);
+    print_frame(0);
+    #pragma omp master
+    {
+      print_ids(0);
+      #pragma omp task shared(condition)
+      {
+        OMPT_SIGNAL(condition);
+        print_frame(1);
+        print_ids(0);
+        print_ids(1);
+        print_ids(2);
+      }
+      print_fuzzy_address(1);
+      OMPT_WAIT(condition,1);
+      print_ids(0);
+    }
+    #pragma omp barrier
+    print_ids(0);
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: new_task_data initially not null
+  
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+  // nested parallel masters
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // <- ompt_event_task_create would be expected here
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // explicit barrier after master
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // implicit barrier parallel
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+  // this is expected to come earlier and at MASTER:
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/tasks/serialized.c b/openmp/runtime/test/ompt/tasks/serialized.c
new file mode 100644 (file)
index 0000000..1fb6f06
--- /dev/null
@@ -0,0 +1,93 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>   
+#include <math.h>
+
+int main()
+{
+  omp_set_nested(0);
+  print_frame(0);
+  #pragma omp parallel num_threads(2)
+  {
+    print_frame(1);
+    print_ids(0);
+    print_ids(1);
+    print_frame(0);
+    #pragma omp master
+    {
+      print_ids(0);
+      int t = (int)sin(0.1);
+      #pragma omp task if(t)
+      {
+        print_frame(1);
+        print_ids(0);
+        print_ids(1);
+        print_ids(2);
+      }
+      print_fuzzy_address(1);
+      print_ids(0);
+    }
+    print_ids(0);
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+  
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: new_task_data initially not null
+  
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
+  // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+
+  // nested parallel masters
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+  // <- ompt_event_task_schedule ([[IMPLICIT_TASK_ID]], [[TASK_ID]]) would be expected here
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // <- ompt_event_task_schedule ([[TASK_ID]], [[IMPLICIT_TASK_ID]]) would be expected here
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reen
+
+  // implicit barrier parallel
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/tasks/task_in_joinbarrier.c b/openmp/runtime/test/ompt/tasks/task_in_joinbarrier.c
new file mode 100644 (file)
index 0000000..74d8cc7
--- /dev/null
@@ -0,0 +1,90 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h> 
+
+int main()
+{
+  int condition=0;
+  omp_set_nested(0);
+  print_frame(0);
+  #pragma omp parallel num_threads(2)
+  {
+    print_frame(1);
+    print_ids(0);
+    print_ids(1);
+    print_frame(0);
+    #pragma omp master
+    {
+      print_ids(0);
+      #pragma omp task shared(condition)
+      {
+        OMPT_SIGNAL(condition);
+        print_frame(1);
+        print_ids(0);
+        print_ids(1);
+        print_ids(2);
+      }
+      OMPT_WAIT(condition,1);
+      print_ids(0);
+    }
+    print_ids(0);
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: new_task_data initially not null
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+  // nested parallel masters
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // <- ompt_event_task_create would be expected here
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // implicit barrier parallel
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+  // implicit barrier parallel
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/tasks/task_types.c b/openmp/runtime/test/ompt/tasks/task_types.c
new file mode 100644 (file)
index 0000000..7a4f775
--- /dev/null
@@ -0,0 +1,112 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+#include <math.h>
+
+
+void print_task_type(int id)
+{
+  #pragma omp critical
+  {
+    int task_type;
+    char buffer[2048];
+    ompt_get_task_info(0, &task_type, NULL, NULL, NULL, NULL);
+    format_task_type(task_type, buffer);
+    printf("%" PRIu64 ": id=%d task_type=%s=%d\n", ompt_get_thread_data()->value, id, buffer, task_type);
+  }
+};
+
+int main()
+{
+  //initial task
+  print_task_type(0);
+
+  int x;
+  //implicit task
+  #pragma omp parallel num_threads(1)
+  {
+    print_task_type(1);
+    x++;
+  }
+
+  #pragma omp parallel num_threads(2)
+  #pragma omp master
+  {
+    //explicit task
+    #pragma omp task
+    {
+      print_task_type(2);
+      x++;
+    }
+
+    //explicit task with undeferred
+    #pragma omp task if(0)
+    {
+      print_task_type(3);
+      x++;
+    }
+
+    //explicit task with untied
+    #pragma omp task untied
+    {
+      print_task_type(4);
+      x++;
+    }
+
+    //explicit task with final
+    #pragma omp task final(1)
+    {
+      print_task_type(5);
+      x++;
+      //nested explicit task with final and undeferred
+      #pragma omp task
+      {
+        print_task_type(6);
+        x++;
+      }
+    }
+
+    //Mergeable task test deactivated for now
+    //explicit task with mergeable
+    /*
+    #pragma omp task mergeable if((int)sin(0))
+    {
+      print_task_type(7);
+      x++;
+    }
+    */
+
+    //TODO: merged task
+  }
+
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+
+
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+  
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
+  // CHECK-NOT: 0: parallel_data initially not null
+  // CHECK: {{^}}[[MASTER_ID]]: id=0 task_type=ompt_task_initial=1
+  // CHECK: {{^}}[[MASTER_ID]]: id=1 task_type=ompt_task_implicit|ompt_task_undeferred=134217730
+
+  // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no
+  // CHECK-DAG: {{^[0-9]+}}: id=2 task_type=ompt_task_explicit=4
+
+  // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no
+  // CHECK-DAG: {{^[0-9]+}}: id=3 task_type=ompt_task_explicit|ompt_task_undeferred=134217732
+
+  // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_untied=268435460, has_dependences=no
+  // CHECK-DAG: {{^[0-9]+}}: id=4 task_type=ompt_task_explicit|ompt_task_untied=268435460
+
+  // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_final=536870916, has_dependences=no
+  // CHECK-DAG: {{^[0-9]+}}: id=5 task_type=ompt_task_explicit|ompt_task_final=536870916
+
+  // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no
+  // CHECK-DAG: {{^[0-9]+}}: id=6 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/tasks/task_types_serialized.c b/openmp/runtime/test/ompt/tasks/task_types_serialized.c
new file mode 100644 (file)
index 0000000..dfd5598
--- /dev/null
@@ -0,0 +1,112 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+
+#include "callback.h"
+#include <omp.h>
+
+void print_task_type(int id)
+{
+  #pragma omp critical
+  {
+    int task_type;
+    char buffer[2048];
+    ompt_get_task_info(0, &task_type, NULL, NULL, NULL, NULL);
+    format_task_type(task_type, buffer);
+    printf("%" PRIu64 ": id=%d task_type=%s=%d\n", ompt_get_thread_data()->value, id, buffer, task_type);
+  }
+};
+
+int main()
+{
+  //initial task
+  print_task_type(0);
+
+  int x;
+  //implicit task
+  #pragma omp parallel num_threads(1)
+  {
+    print_task_type(1);
+    x++;
+  }
+
+  #pragma omp parallel num_threads(1)
+  #pragma omp master
+  {
+    //explicit task
+    #pragma omp task
+    {
+      print_task_type(2);
+      x++;
+    }
+
+    //explicit task with undeferred
+    #pragma omp task if(0)
+    {
+      print_task_type(3);
+      x++;
+    }
+
+    //explicit task with untied
+    #pragma omp task untied
+    {
+      print_task_type(4);
+      x++;
+    }
+
+    //explicit task with final
+    #pragma omp task final(1)
+    {
+      print_task_type(5);
+      x++;
+      //nested explicit task with final and undeferred
+      #pragma omp task
+      {
+        print_task_type(6);
+        x++;
+      }
+    }
+
+/*
+    //TODO:not working
+    //explicit task with mergeable
+    #pragma omp task mergeable
+    {
+      print_task_type(7);
+      x++;
+    }
+*/
+
+    //TODO: merged task
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+
+
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+  
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
+  // CHECK: {{^}}[[MASTER_ID]]: id=0 task_type=ompt_task_initial=1
+  // CHECK: {{^}}[[MASTER_ID]]: id=1 task_type=ompt_task_implicit|ompt_task_undeferred=134217730
+
+  // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no
+  // CHECK: {{^[0-9]+}}: id=2 task_type=ompt_task_explicit|ompt_task_undeferred=134217732
+
+  // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no
+  // CHECK: {{^[0-9]+}}: id=3 task_type=ompt_task_explicit|ompt_task_undeferred=134217732
+
+  // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_untied=402653188, has_dependences=no
+  // CHECK: {{^[0-9]+}}: id=4 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_untied=402653188
+
+  // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no
+  // CHECK: {{^[0-9]+}}: id=5 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644
+
+  // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no
+  // CHECK: {{^[0-9]+}}: id=6 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644
+
+  // ___CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no
+  // ___CHECK: {{^[0-9]+}}: id=7 task_type=ompt_task_explicit|ompt_task_undeferred=134217732
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/tasks/taskyield.c b/openmp/runtime/test/ompt/tasks/taskyield.c
new file mode 100644 (file)
index 0000000..56a4697
--- /dev/null
@@ -0,0 +1,62 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// Current GOMP interface implements taskyield as stub
+// XFAIL: gcc
+
+#include "callback.h"
+#include <omp.h>   
+#include <unistd.h>
+
+int main()
+{
+  int condition=0, x=0;
+  #pragma omp parallel num_threads(2)
+  {
+    #pragma omp master
+    {
+        #pragma omp task shared(condition)
+        {
+          OMPT_SIGNAL(condition);
+          OMPT_WAIT(condition,2);
+        }
+        OMPT_WAIT(condition,1);
+        #pragma omp task shared(x)
+        {
+          x++;
+        }
+        printf("%" PRIu64 ": before yield\n", ompt_get_thread_data()->value);
+        #pragma omp taskyield
+        printf("%" PRIu64 ": after yield\n", ompt_get_thread_data()->value);
+        OMPT_SIGNAL(condition);
+    }
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+
+
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: new_task_data initially not null
+  
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size={{[0-9]+}}, thread_num={{[0-9]+}}
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[WORKER_TASK:[0-9]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[MAIN_TASK:[0-9]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[MAIN_TASK]], prior_task_status=ompt_task_yield=2
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[MAIN_TASK]], second_task_id=[[IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1
+
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_task_schedule: first_task_id={{[0-9]+}}, second_task_id=[[WORKER_TASK]], prior_task_status=ompt_task_others=4
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[WORKER_TASK]], second_task_id={{[0-9]+}}, prior_task_status=ompt_task_complete=1
+
+
+
+
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/tasks/untied_task.c b/openmp/runtime/test/ompt/tasks/untied_task.c
new file mode 100644 (file)
index 0000000..1adfe8f
--- /dev/null
@@ -0,0 +1,107 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h> 
+
+int main()
+{
+  int condition=0;
+  omp_set_nested(0);
+  print_frame(0);
+  #pragma omp parallel num_threads(2)
+  {
+    print_frame(1);
+    print_ids(0);
+    print_ids(1);
+    print_frame(0);
+    #pragma omp master
+    {
+      print_ids(0);
+      #pragma omp task untied shared(condition)
+      {
+        OMPT_SIGNAL(condition);
+        print_frame(1);
+        print_ids(0);
+        print_ids(1);
+        print_ids(2);
+        #pragma omp task if(0)
+        {
+          print_ids(0);
+          print_ids(1);
+          print_ids(2);
+        }
+        print_ids(0);
+        print_ids(1);
+        print_ids(2);
+      }
+      OMPT_WAIT(condition,1);
+      print_ids(0);
+    }
+    #pragma omp barrier
+    print_ids(0);
+  }
+
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+  // make sure initial data pointers are null
+  // CHECK-NOT: 0: new_task_data initially not null
+  
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+  // nested parallel masters
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // <- ompt_event_task_create would be expected here
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // explicit barrier after master
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // implicit barrier parallel
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+  // this is expected to come earlier and at MASTER:
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
+
+
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/worksharing/for/auto_split.c b/openmp/runtime/test/ompt/worksharing/for/auto_split.c
new file mode 100644 (file)
index 0000000..d82e3fd
--- /dev/null
@@ -0,0 +1,8 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
+// REQUIRES: ompt
+// GCC doesn't call runtime for auto = static schedule
+// XFAIL: gcc
+
+#define SCHEDULE auto
+#include "base_split.h"
index 7398879..96022d0 100644 (file)
@@ -9,28 +9,35 @@ int main()
   for (i = 0; i < 4; i++) {
   }
 
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
+
+
   // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
-  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker={{.*}}
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}}
 
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}}
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}}
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}}
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}}
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
 
   return 0;
 }
index bd8b226..3376b37 100644 (file)
@@ -8,14 +8,21 @@ int main()
   #pragma omp parallel for num_threads(1) schedule(SCHEDULE)
   for (i = 0; i < 1; i++) {
   }
+  
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
+
 
   // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
-  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=0x{{[0-f]+}}, invoker={{.+}}
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}}
 
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}}
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[PARALLEL_ID,0]}}, task_id=[[IMPLICIT_TASK_ID]]
 
   return 0;
 }
diff --git a/openmp/runtime/test/ompt/worksharing/for/base_split.h b/openmp/runtime/test/ompt/worksharing/for/base_split.h
new file mode 100644 (file)
index 0000000..0f1fed3
--- /dev/null
@@ -0,0 +1,66 @@
+#include "callback.h"
+#include <omp.h>
+
+/* With the combined parallel-for construct (base.h), the return-addresses are hard to compare.
+   With the separate parallel and for-nowait construct, the addresses become more predictable,
+   but the begin of the for-loop still generates additional code, so the offset of loop-begin 
+   to the label is >4 Byte.
+*/
+
+int main()
+{
+  unsigned int i;
+
+  #pragma omp parallel num_threads(4) 
+  {
+    print_current_address(0);
+    #pragma omp for schedule(SCHEDULE) nowait
+    for (i = 0; i < 4; i++) {
+      print_fuzzy_address(1);
+    }
+    print_fuzzy_address(2);
+  }
+  print_fuzzy_address(3);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
+
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[PARALLEL_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}}
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=[[LOOP_BEGIN_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]]
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, invoker={{[0-9]+}}, codeptr_ra=[[PARALLEL_RETURN_ADDRESS]]
+  // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[PARALLEL_RETURN_ADDRESS]]
+  
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]]
+
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]]
+
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}}
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]]
+
+
+  // CHECK-LOOP: 0: NULL_POINTER=[[NULL:.*$]]
+  // CHECK-LOOP: 0: ompt_event_runtime_shutdown
+  // CHECK-LOOP: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra={{0x[0-f]+}}, invoker={{[0-9]+}}
+  // CHECK-LOOP: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=[[LOOP_BEGIN_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+  // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]]
+  // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]]
+  // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]]
+  // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]]
+
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/worksharing/for/dynamic_split.c b/openmp/runtime/test/ompt/worksharing/for/dynamic_split.c
new file mode 100644 (file)
index 0000000..3886f71
--- /dev/null
@@ -0,0 +1,6 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
+// REQUIRES: ompt
+
+#define SCHEDULE dynamic
+#include "base_split.h"
diff --git a/openmp/runtime/test/ompt/worksharing/for/guided_split.c b/openmp/runtime/test/ompt/worksharing/for/guided_split.c
new file mode 100644 (file)
index 0000000..aaefbb3
--- /dev/null
@@ -0,0 +1,6 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
+// REQUIRES: ompt
+
+#define SCHEDULE guided
+#include "base_split.h"
diff --git a/openmp/runtime/test/ompt/worksharing/for/runtime_split.c b/openmp/runtime/test/ompt/worksharing/for/runtime_split.c
new file mode 100644 (file)
index 0000000..032353e
--- /dev/null
@@ -0,0 +1,6 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
+// REQUIRES: ompt
+
+#define SCHEDULE runtime
+#include "base_split.h"
diff --git a/openmp/runtime/test/ompt/worksharing/for/static_split.c b/openmp/runtime/test/ompt/worksharing/for/static_split.c
new file mode 100644 (file)
index 0000000..d8c88dd
--- /dev/null
@@ -0,0 +1,8 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h
+// REQUIRES: ompt
+// GCC doesn't call runtime for static schedule
+// XFAIL: gcc
+
+#define SCHEDULE static
+#include "base_split.h"
diff --git a/openmp/runtime/test/ompt/worksharing/sections.c b/openmp/runtime/test/ompt/worksharing/sections.c
new file mode 100644 (file)
index 0000000..7d274c4
--- /dev/null
@@ -0,0 +1,36 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+// GCC generates code that does not distinguish between sections and loops
+// XFAIL: gcc
+
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  #pragma omp parallel sections num_threads(2)
+  {
+    #pragma omp section
+    {
+      printf("%lu: section 1\n", ompt_get_thread_data()->value);
+    }
+    #pragma omp section
+    {
+      printf("%lu: section 2\n", ompt_get_thread_data()->value);
+    }
+  }
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_sections_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[SECT_BEGIN:0x[0-f]+]], count=2
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_sections_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[SECT_END:0x[0-f]+]]
+
+  // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_sections_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[SECT_BEGIN]], count=2
+  // CHECK: {{^}}[[THREAD_ID]]: ompt_event_sections_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[SECT_END]]
+
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/worksharing/single.c b/openmp/runtime/test/ompt/worksharing/single.c
new file mode 100644 (file)
index 0000000..94865c4
--- /dev/null
@@ -0,0 +1,36 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+// GCC generates code that does not call the runtime for the single construct
+// XFAIL: gcc
+
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+  int x = 0;
+  #pragma omp parallel num_threads(2)
+  {
+    #pragma omp single
+    {
+      x++;
+    }
+  }
+
+  printf("x=%d\n", x);
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work'
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[THREAD_ID_1:[0-9]+]]: ompt_event_single_in_block_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]+}}, count=1
+  // CHECK: {{^}}[[THREAD_ID_1]]: ompt_event_single_in_block_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]+}}, count=1
+
+  // CHECK: {{^}}[[THREAD_ID_2:[0-9]+]]: ompt_event_single_others_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]+}}, count=1
+  // CHECK: {{^}}[[THREAD_ID_2]]: ompt_event_single_others_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]+}}, count=1
+
+
+
+  return 0;
+}