From 6a0d6e7ca9b9e338e82572db79c26168684a7441 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 15 Feb 2022 10:22:30 +0100 Subject: [PATCH] openmp: Make finalize_task_copyfn order reproduceable [PR104517] The following testcase fails -fcompare-debug, because finalize_task_copyfn was invoked from splay tree destruction, whose order can in some cases depend on -g/-g0. The fix is to queue the task stmts that need copyfn in a vector and run finalize_task_copyfn on elements of that vector. 2022-02-15 Jakub Jelinek PR debug/104517 * omp-low.cc (task_cpyfns): New variable. (delete_omp_context): Don't call finalize_task_copyfn from here. (create_task_copyfn): Push task_stmt into task_cpyfns. (execute_lower_omp): Call finalize_task_copyfn here on entries from task_cpyfns vector and release the vector. * gcc.dg/gomp/pr104517.c: New test. --- gcc/omp-low.cc | 9 ++++-- gcc/testsuite/gcc.dg/gomp/pr104517.c | 54 ++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/gomp/pr104517.c diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index 0652084..77176ef 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -191,6 +191,7 @@ static int target_nesting_level; static bitmap task_shared_vars; static bitmap global_nonaddressable_vars; static vec taskreg_contexts; +static vec task_cpyfns; static void scan_omp (gimple_seq *, omp_context *); static tree scan_omp_1_op (tree *, int *, void *); @@ -1082,9 +1083,6 @@ delete_omp_context (splay_tree_value value) DECL_ABSTRACT_ORIGIN (t) = NULL; } - if (is_task_ctx (ctx)) - finalize_task_copyfn (as_a (ctx->stmt)); - if (ctx->task_reduction_map) { ctx->task_reductions.release (); @@ -11951,6 +11949,7 @@ create_task_copyfn (gomp_task *task_stmt, omp_context *ctx) size_t looptempno = 0; child_fn = gimple_omp_task_copy_fn (task_stmt); + task_cpyfns.safe_push (task_stmt); child_cfun = DECL_STRUCT_FUNCTION (child_fn); gcc_assert (child_cfun->cfg == NULL); DECL_SAVED_TREE (child_fn) = alloc_stmt_list (); @@ -14475,6 +14474,10 @@ execute_lower_omp (void) && (TREE_CODE (TREE_TYPE (DECL_ARGUMENTS (current_function_decl))) == POINTER_TYPE)) remove_member_access_dummy_vars (DECL_INITIAL (current_function_decl)); + + for (auto task_stmt : task_cpyfns) + finalize_task_copyfn (task_stmt); + task_cpyfns.release (); return 0; } diff --git a/gcc/testsuite/gcc.dg/gomp/pr104517.c b/gcc/testsuite/gcc.dg/gomp/pr104517.c new file mode 100644 index 0000000..efb3175 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/pr104517.c @@ -0,0 +1,54 @@ +/* PR debug/104517 */ +/* { dg-do compile } */ +/* { dg-options "-O1 -fcompare-debug -fopenmp -fno-tree-ter -save-temps" } */ + +enum { + omp_default_mem_alloc, + omp_large_cap_mem_alloc, + omp_const_mem_alloc, + omp_high_bw_mem_alloc +} omp_allocator_handle_t; + +int t, bar_nte, bar_tl, bar_i3, bar_dd; + +#pragma omp threadprivate(t) +#pragma omp declare target +int f, l, ll, r, r2; +#pragma omp end declare target + +void +bar (int *idp, int s, int nth, int g, int nta, int fi, int pp, int *q, + int ntm) +{ + int p = 0, i2 = 0, i1 = 0, m = 0, d = 0; + +#pragma omp target parallel for \ + device(p) firstprivate (f) allocate (f) in_reduction(+:r2) + for (int i = 0; i < 4; i++) + ll++; + +#pragma omp target parallel for \ + device(d) map (m) \ + if (target: p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) \ + if (parallel: i2) reduction(+:r) num_threads (nth) linear (ll) \ + schedule(static) collapse(1) nowait depend(inout: d) allocate (f) \ + in_reduction(+:r2) + for (int i = 0; i < 4; i++) + ll++; + +#pragma omp taskloop simd firstprivate(f) lastprivate(s) grainsize(g) \ + collapse(1) untied if (i1) final(fi) mergeable nogroup \ + priority(pp) linear(ll) aligned(q) allocate(f) + for (int i = 0; i < 4; i++) + ll++; + +#pragma omp taskloop simd firstprivate(f) lastprivate(s) num_tasks(nta) \ + collapse(1) if (i1) final(fi) priority(pp) safelen(8) simdlen(4) \ + linear(ll) aligned(q) nontemporal(ntm) order(concurrent) allocate(f) + for (int i = 0; i < 4; i++) + ll++; + +#pragma omp parallel master firstprivate(f) shared(nth) proc_bind(spread) \ + copyin(t) allocate(f) + ; +} -- 2.7.4