From 29ddaf43f70e19fd1110b539e8b3d0436c757e34 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Fri, 23 Jul 2021 15:07:34 +0200 Subject: [PATCH] [OpenACC] Clarify sequencing of 'async' data copying vs. profiling events in 'libgomp.oacc-c-c++-common/acc_prof-{init,parallel}-1.c' ... as noticed with GCN offloading. Fix-up for r271346 (commit 5fae049dc272144f8e61af94ee0ba42b270915e5) "OpenACC Profiling Interface (incomplete)". libgomp/ * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Clarify sequencing of 'async' data copying vs. profiling events. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. --- .../libgomp.oacc-c-c++-common/acc_prof-init-1.c | 49 ++++++++++++++------- .../acc_prof-parallel-1.c | 51 +++++++++++++++------- 2 files changed, 68 insertions(+), 32 deletions(-) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c index 7d05f48..b5e7715 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c @@ -19,6 +19,19 @@ #define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__) +static acc_prof_reg reg; +static acc_prof_reg unreg; +static acc_prof_lookup_func lookup; +void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_) +{ + DEBUG_printf ("%s\n", __FUNCTION__); + + reg = reg_; + unreg = unreg_; + lookup = lookup_; +} + + static int state = -1; #define STATE_OP(state, op) \ @@ -34,7 +47,7 @@ static int state = -1; static acc_device_t acc_device_type; static int acc_device_num; -static int acc_async; +static int acc_async = acc_async_sync; struct tool_info @@ -192,6 +205,21 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * assert (state == 11 || state == 111); +#if defined COPYIN + /* In an 'async' setting, this event may be triggered before actual 'async' + data copying has completed. Given that 'state' appears in 'COPYIN', we + first have to synchronize (that is, let the 'async' 'COPYIN' read the + current 'state' value)... */ + if (acc_async != acc_async_sync) + { + /* "We're not yet accounting for the fact that _OpenACC events may occur + during event processing_"; temporarily disable to avoid deadlock. */ + unreg (acc_ev_none, NULL, acc_toggle_per_thread); + acc_wait (acc_async); + reg (acc_ev_none, NULL, acc_toggle_per_thread); + } + /* ... before modifying it in the following. */ +#endif STATE_OP (state, ++); assert (tool_info != NULL); @@ -240,19 +268,6 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * } -static acc_prof_reg reg; -static acc_prof_reg unreg; -static acc_prof_lookup_func lookup; -void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_) -{ - DEBUG_printf ("%s\n", __FUNCTION__); - - reg = reg_; - unreg = unreg_; - lookup = lookup_; -} - - int main() { acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); @@ -271,14 +286,15 @@ int main() acc_device_type = acc_get_device_type (); acc_device_num = acc_get_device_num (acc_device_type); - acc_async = 12; { int state_init; + acc_async = 12; #pragma acc parallel async(acc_async) COPYIN(state) copyout(state_init) { state_init = state; } + acc_async = acc_async_sync; #pragma acc wait assert (state_init == 11); } @@ -297,14 +313,15 @@ int main() acc_device_type = acc_get_device_type (); acc_device_num = acc_get_device_num (acc_device_type); - acc_async = 12; { int state_init; + acc_async = 12; #pragma acc parallel async(acc_async) COPYIN(state) copyout(state_init) { state_init = state; } + acc_async = acc_async_sync; #pragma acc wait assert (state_init == 111); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c index a5e9ab3..1f50386 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c @@ -29,6 +29,19 @@ #define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__) +static acc_prof_reg reg; +static acc_prof_reg unreg; +static acc_prof_lookup_func lookup; +void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_) +{ + DEBUG_printf ("%s\n", __FUNCTION__); + + reg = reg_; + unreg = unreg_; + lookup = lookup_; +} + + static int state = -1; #define STATE_OP(state, op) \ @@ -44,7 +57,7 @@ static int state = -1; static acc_device_t acc_device_type; static int acc_device_num; -static int acc_async; +static int acc_async = acc_async_sync; struct tool_info @@ -235,6 +248,25 @@ static void cb_enter_data_end (acc_prof_info *prof_info, acc_event_info *event_i assert (state == 4 || state == 104); +#if defined COPYIN + /* Conceptually, 'acc_ev_enter_data_end' marks the end of data copying, + before 'acc_ev_enqueue_launch_start' marks invoking the compute region. + That's the 'state_init = state;' intended to be captured in the compute + regions. */ + /* In an 'async' setting, this event may be triggered before actual 'async' + data copying has completed. Given that 'state' appears in 'COPYIN', we + first have to synchronize (that is, let the 'async' 'COPYIN' read the + current 'state' value)... */ + if (acc_async != acc_async_sync) + { + /* "We're not yet accounting for the fact that _OpenACC events may occur + during event processing_"; temporarily disable to avoid deadlock. */ + unreg (acc_ev_none, NULL, acc_toggle_per_thread); + acc_wait (acc_async); + reg (acc_ev_none, NULL, acc_toggle_per_thread); + } + /* ... before modifying it in the following. */ +#endif STATE_OP (state, ++); assert (tool_info != NULL); @@ -664,19 +696,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve } -static acc_prof_reg reg; -static acc_prof_reg unreg; -static acc_prof_lookup_func lookup; -void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_) -{ - DEBUG_printf ("%s\n", __FUNCTION__); - - reg = reg_; - unreg = unreg_; - lookup = lookup_; -} - - int main() { acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); @@ -696,7 +715,6 @@ int main() acc_device_type = acc_get_device_type (); acc_device_num = acc_get_device_num (acc_device_type); - acc_async = acc_async_sync; assert (state == 0); { @@ -713,15 +731,16 @@ int main() STATE_OP (state, = 100); - acc_async = 12; { int state_init; + acc_async = 12; #pragma acc parallel async(acc_async) COPYIN(state) copyout(state_init) { asm volatile ("" : : : "memory"); // TODO PR90488 state_init = state; } + acc_async = acc_async_sync; #pragma acc wait assert (state_init == 104); } -- 2.7.4