From 6fcc3cac42998081dbbb1d3389bd42833ad9fdf4 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 12 Nov 2020 21:38:04 +0100 Subject: [PATCH] openmp: Implement allocate clause in omp lowering. For now, task/taskloop constructs aren't handled and C/C++ array reductions and reductions with task or inscan modifiers need further work. Instead of calling omp_alloc/omp_free (where the former doesn't have alignment argument and omp_aligned_alloc is 5.1 only feature), this calls GOMP_alloc/GOMP_free, so that the library can fail if it would fall back into NULL (exception is zero length allocations). 2020-11-12 Jakub Jelinek gcc/ * builtin-types.def (BT_FN_PTR_SIZE_SIZE_PTRMODE): New function type. * omp-builtins.def (BUILT_IN_GOACC_DECLARE): Move earlier. (BUILT_IN_GOMP_ALLOC, BUILT_IN_GOMP_FREE): New builtins. * gimplify.c (gimplify_scan_omp_clauses): Force allocator into a decl if it is not NULL, INTEGER_CST or decl. (gimplify_adjust_omp_clauses): Clear GOVD_EXPLICIT on explicit clauses which are being removed. Remove allocate clauses for variables not seen if they are private, firstprivate or linear too. Call omp_notice_variable on the allocator otherwise. (gimplify_omp_for): Handle iterator vars mentioned in allocate clauses similarly to non-is_gimple_reg iterators. * omp-low.c (struct omp_context): Add allocate_map field. (delete_omp_context): Delete it. (scan_sharing_clauses): Fill it from allocate clauses. Remove it if mentioned also in shared clause. (lower_private_allocate): New function. (lower_rec_input_clauses): Handle allocate clause for privatized variables, except for task/taskloop, C/C++ array reductions for now and task/inscan variables. (lower_send_shared_vars): Don't consider variables in allocate_map as shared. * omp-expand.c (expand_omp_for_generic, expand_omp_for_static_nochunk, expand_omp_for_static_chunk): Use expand_omp_build_assign instead of gimple_build_assign + gsi_insert_after. * builtins.c (builtin_fnspec): Handle BUILTIN_GOMP_ALLOC and BUILTIN_GOMP_FREE. * tree-ssa-ccp.c (evaluate_stmt): Handle BUILTIN_GOMP_ALLOC. * tree-ssa-dce.c (mark_stmt_if_obviously_necessary): Handle BUILTIN_GOMP_ALLOC. (mark_all_reaching_defs_necessary_1): Handle BUILTIN_GOMP_ALLOC and BUILTIN_GOMP_FREE. (propagate_necessity): Likewise. gcc/fortran/ * f95-lang.c (ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LIST): Define. (gfc_init_builtin_functions): Add alloc_size and warn_unused_result attributes to __builtin_GOMP_alloc. * types.def (BT_PTRMODE): New primitive type. (BT_FN_VOID_PTR_PTRMODE, BT_FN_PTR_SIZE_SIZE_PTRMODE): New function types. libgomp/ * libgomp.map (GOMP_alloc, GOMP_free): Export at GOMP_5.0.1. * omp.h.in (omp_alloc): Add malloc and alloc_size attributes. * libgomp_g.h (GOMP_alloc, GOMP_free): Declare. * allocator.c (omp_aligned_alloc): New for now static function, add alignment argument and handle it. (omp_alloc): Reimplement using omp_aligned_alloc. (GOMP_alloc, GOMP_free): New functions. (omp_free): Add ialias. * testsuite/libgomp.c-c++-common/allocate-1.c: New test. * testsuite/libgomp.c++/allocate-1.C: New test. --- gcc/builtin-types.def | 2 + gcc/builtins.c | 2 + gcc/fortran/f95-lang.c | 11 +- gcc/fortran/types.def | 5 + gcc/gimplify.c | 61 ++++- gcc/omp-builtins.def | 9 +- gcc/omp-expand.c | 9 +- gcc/omp-low.c | 148 ++++++++++-- gcc/tree-ssa-ccp.c | 1 + gcc/tree-ssa-dce.c | 41 ++-- libgomp/allocator.c | 42 +++- libgomp/libgomp.map | 6 + libgomp/libgomp_g.h | 5 + libgomp/omp.h.in | 2 +- libgomp/testsuite/libgomp.c++/allocate-1.C | 194 ++++++++++++++++ .../testsuite/libgomp.c-c++-common/allocate-1.c | 254 +++++++++++++++++++++ 16 files changed, 739 insertions(+), 53 deletions(-) create mode 100644 libgomp/testsuite/libgomp.c++/allocate-1.C create mode 100644 libgomp/testsuite/libgomp.c-c++-common/allocate-1.c diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def index c46b1bc..4a82ee4 100644 --- a/gcc/builtin-types.def +++ b/gcc/builtin-types.def @@ -637,6 +637,8 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_SIZE_SIZE_PTR, BT_VOID, BT_SIZE, BT_SIZE, DEF_FUNCTION_TYPE_3 (BT_FN_UINT_UINT_PTR_PTR, BT_UINT, BT_UINT, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_3 (BT_FN_PTR_PTR_CONST_SIZE_BOOL, BT_PTR, BT_PTR, BT_CONST_SIZE, BT_BOOL) +DEF_FUNCTION_TYPE_3 (BT_FN_PTR_SIZE_SIZE_PTRMODE, + BT_PTR, BT_SIZE, BT_SIZE, BT_PTRMODE) DEF_FUNCTION_TYPE_4 (BT_FN_SIZE_CONST_PTR_SIZE_SIZE_FILEPTR, BT_SIZE, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_FILEPTR) diff --git a/gcc/builtins.c b/gcc/builtins.c index 4ec1766..42c52a1 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -13023,6 +13023,7 @@ builtin_fnspec (tree callee) case BUILT_IN_MALLOC: case BUILT_IN_ALIGNED_ALLOC: case BUILT_IN_CALLOC: + case BUILT_IN_GOMP_ALLOC: return "mC"; CASE_BUILT_IN_ALLOCA: return "mc"; @@ -13044,6 +13045,7 @@ builtin_fnspec (tree callee) across it. */ case BUILT_IN_STACK_RESTORE: case BUILT_IN_FREE: + case BUILT_IN_GOMP_FREE: return ".co "; case BUILT_IN_VA_END: return ".cO "; diff --git a/gcc/fortran/f95-lang.c b/gcc/fortran/f95-lang.c index 526b721..1a05144 100644 --- a/gcc/fortran/f95-lang.c +++ b/gcc/fortran/f95-lang.c @@ -531,7 +531,7 @@ gfc_builtin_function (tree decl) return decl; } -/* So far we need just these 7 attribute types. */ +/* So far we need just these 8 attribute types. */ #define ATTR_NULL 0 #define ATTR_LEAF_LIST (ECF_LEAF) #define ATTR_NOTHROW_LEAF_LIST (ECF_NOTHROW | ECF_LEAF) @@ -540,6 +540,8 @@ gfc_builtin_function (tree decl) #define ATTR_PURE_NOTHROW_LEAF_LIST (ECF_NOTHROW | ECF_LEAF | ECF_PURE) #define ATTR_NOTHROW_LIST (ECF_NOTHROW) #define ATTR_CONST_NOTHROW_LIST (ECF_NOTHROW | ECF_CONST) +#define ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LIST \ + (ECF_NOTHROW) static void gfc_define_builtin (const char *name, tree type, enum built_in_function code, @@ -1236,6 +1238,13 @@ gfc_init_builtin_functions (void) #undef DEF_GOACC_BUILTIN #undef DEF_GOACC_BUILTIN_COMPILER #undef DEF_GOMP_BUILTIN + tree gomp_alloc = builtin_decl_explicit (BUILT_IN_GOMP_ALLOC); + tree two = build_int_cst (integer_type_node, 2); + DECL_ATTRIBUTES (gomp_alloc) + = tree_cons (get_identifier ("warn_unused_result"), NULL_TREE, + tree_cons (get_identifier ("alloc_size"), + build_tree_list (NULL_TREE, two), + DECL_ATTRIBUTES (gomp_alloc))); } gfc_define_builtin ("__builtin_trap", builtin_types[BT_FN_VOID], diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def index 7b4925c..5736bba 100644 --- a/gcc/fortran/types.def +++ b/gcc/fortran/types.def @@ -70,6 +70,7 @@ DEF_PRIMITIVE_TYPE (BT_CONST_VOLATILE_PTR, build_pointer_type (build_qualified_type (void_type_node, TYPE_QUAL_VOLATILE|TYPE_QUAL_CONST))) +DEF_PRIMITIVE_TYPE (BT_PTRMODE, (*lang_hooks.types.type_for_mode)(ptr_mode, 0)) DEF_POINTER_TYPE (BT_PTR_LONG, BT_LONG) DEF_POINTER_TYPE (BT_PTR_ULONGLONG, BT_ULONGLONG) DEF_POINTER_TYPE (BT_PTR_PTR, BT_PTR) @@ -117,6 +118,8 @@ DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE, BT_CONST_VOLATILE_PTR) DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_INT_BOOL, BT_BOOL, BT_INT, BT_BOOL) DEF_FUNCTION_TYPE_2 (BT_FN_VOID_UINT_UINT, BT_VOID, BT_UINT, BT_UINT) +DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTR_PTRMODE, + BT_VOID, BT_PTR, BT_PTRMODE) DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR_PTR, BT_FN_VOID_PTR_PTR) @@ -149,6 +152,8 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I16_INT, BT_VOID, BT_VOLATILE_PTR, BT_I16, DEF_FUNCTION_TYPE_3 (BT_FN_VOID_SIZE_SIZE_PTR, BT_VOID, BT_SIZE, BT_SIZE, BT_PTR) DEF_FUNCTION_TYPE_3 (BT_FN_UINT_UINT_PTR_PTR, BT_UINT, BT_UINT, BT_PTR, BT_PTR) +DEF_FUNCTION_TYPE_3 (BT_FN_PTR_SIZE_SIZE_PTRMODE, + BT_PTR, BT_SIZE, BT_SIZE, BT_PTRMODE) DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT) diff --git a/gcc/gimplify.c b/gcc/gimplify.c index d18c43e..b861e17 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -9904,10 +9904,12 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, remove = true; break; } + else if (OMP_CLAUSE_ALLOCATE_ALLOCATOR (c) == NULL_TREE + || (TREE_CODE (OMP_CLAUSE_ALLOCATE_ALLOCATOR (c)) + == INTEGER_CST)) + ; else if (code == OMP_TASKLOOP - && OMP_CLAUSE_ALLOCATE_ALLOCATOR (c) - && (TREE_CODE (OMP_CLAUSE_ALLOCATE_ALLOCATOR (c)) - != INTEGER_CST)) + || !DECL_P (OMP_CLAUSE_ALLOCATE_ALLOCATOR (c))) OMP_CLAUSE_ALLOCATE_ALLOCATOR (c) = get_initialized_tmp_var (OMP_CLAUSE_ALLOCATE_ALLOCATOR (c), pre_p, NULL, false); @@ -10475,6 +10477,8 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, && omp_shared_to_firstprivate_optimizable_decl_p (decl)) omp_mark_stores (gimplify_omp_ctxp->outer_context, decl); } + else + n->value &= ~GOVD_EXPLICIT; break; case OMP_CLAUSE_LASTPRIVATE: @@ -10774,6 +10778,41 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, && omp_shared_to_firstprivate_optimizable_decl_p (decl)) omp_mark_stores (gimplify_omp_ctxp->outer_context, decl); break; + + case OMP_CLAUSE_ALLOCATE: + decl = OMP_CLAUSE_DECL (c); + n = splay_tree_lookup (ctx->variables, (splay_tree_key) decl); + if (n != NULL && !(n->value & GOVD_SEEN)) + { + if ((n->value & (GOVD_PRIVATE | GOVD_FIRSTPRIVATE | GOVD_LINEAR)) + != 0 + && (n->value & (GOVD_REDUCTION | GOVD_LASTPRIVATE)) == 0) + remove = true; + } + if (!remove + && OMP_CLAUSE_ALLOCATE_ALLOCATOR (c) + && TREE_CODE (OMP_CLAUSE_ALLOCATE_ALLOCATOR (c)) != INTEGER_CST + && ((ctx->region_type & (ORT_PARALLEL | ORT_TARGET)) != 0 + || (ctx->region_type & ORT_TASKLOOP) == ORT_TASK + || (ctx->region_type & ORT_HOST_TEAMS) == ORT_HOST_TEAMS)) + { + tree allocator = OMP_CLAUSE_ALLOCATE_ALLOCATOR (c); + n = splay_tree_lookup (ctx->variables, (splay_tree_key) allocator); + if (n == NULL) + { + enum omp_clause_default_kind default_kind + = ctx->default_kind; + ctx->default_kind = OMP_CLAUSE_DEFAULT_FIRSTPRIVATE; + omp_notice_variable (ctx, OMP_CLAUSE_ALLOCATE_ALLOCATOR (c), + true); + ctx->default_kind = default_kind; + } + else + omp_notice_variable (ctx, OMP_CLAUSE_ALLOCATE_ALLOCATOR (c), + true); + } + break; + case OMP_CLAUSE_COPYIN: case OMP_CLAUSE_COPYPRIVATE: case OMP_CLAUSE_IF: @@ -10823,7 +10862,6 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, case OMP_CLAUSE_FINALIZE: case OMP_CLAUSE_INCLUSIVE: case OMP_CLAUSE_EXCLUSIVE: - case OMP_CLAUSE_ALLOCATE: break; default: @@ -11623,6 +11661,15 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p) c = omp_find_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_TILE); if (c) tile = list_length (OMP_CLAUSE_TILE_LIST (c)); + c = omp_find_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_ALLOCATE); + hash_set *allocate_uids = NULL; + if (c) + { + allocate_uids = new hash_set; + for (; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ALLOCATE) + allocate_uids->add (OMP_CLAUSE_DECL (c)); + } for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++) { t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i); @@ -11949,12 +11996,13 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p) as an iteration counter. This is valid, since DECL cannot be modified in the body of the loop. Similarly for any iteration vars in simd with collapse > 1 where the iterator vars must be - lastprivate. */ + lastprivate. And similarly for vars mentioned in allocate clauses. */ if (orig_for_stmt != for_stmt) var = decl; else if (!is_gimple_reg (decl) || (ort == ORT_SIMD - && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) > 1)) + && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) > 1) + || (allocate_uids && allocate_uids->contains (decl))) { struct gimplify_omp_ctx *ctx = gimplify_omp_ctxp; /* Make sure omp_add_variable is not called on it prematurely. @@ -12181,6 +12229,7 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p) } BITMAP_FREE (has_decl_expr); + delete allocate_uids; if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP || (loop_p && orig_for_stmt == for_stmt)) diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index f461d60..f9b78ed 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -47,6 +47,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update", DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait", BT_FN_VOID_INT_INT_VAR, ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DECLARE, "GOACC_declare", + BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST) DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device", BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST) @@ -444,5 +446,8 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASK_REDUCTION_REMAP, DEF_GOMP_BUILTIN (BUILT_IN_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER, "GOMP_workshare_task_reduction_unregister", BT_FN_VOID_BOOL, ATTR_NOTHROW_LEAF_LIST) -DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DECLARE, "GOACC_declare", - BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_ALLOC, + "GOMP_alloc", BT_FN_PTR_SIZE_SIZE_PTRMODE, + ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_FREE, + "GOMP_free", BT_FN_VOID_PTR_PTRMODE, ATTR_NOTHROW_LEAF_LIST) diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index 6583c88..ddca3d3 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -4255,8 +4255,7 @@ expand_omp_for_generic (struct omp_region *region, : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (dest, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + expand_omp_build_assign (&gsi, dest, t, true); } if (fd->collapse > 1) expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar); @@ -5250,8 +5249,7 @@ expand_omp_for_static_nochunk (struct omp_region *region, : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a); t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (dest, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + expand_omp_build_assign (&gsi, dest, t, true); } if (fd->collapse > 1) { @@ -5974,8 +5972,7 @@ expand_omp_for_static_chunk (struct omp_region *region, : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (dest, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + expand_omp_build_assign (&gsi, dest, t, true); } if (fd->collapse > 1) expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar); diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 83ca5fc..ed805e2 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -126,6 +126,10 @@ struct omp_context corresponding tracking loop iteration variables. */ hash_map *lastprivate_conditional_map; + /* And a hash map from the allocate variables to their corresponding + allocators. */ + hash_map *allocate_map; + /* A tree_list of the reduction clauses in this context. This is only used for checking the consistency of OpenACC reduction clauses in scan_omp_for and is not guaranteed to contain a valid @@ -1043,6 +1047,7 @@ delete_omp_context (splay_tree_value value) } delete ctx->lastprivate_conditional_map; + delete ctx->allocate_map; XDELETE (ctx); } @@ -1115,6 +1120,22 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) bool scan_array_reductions = false; for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ALLOCATE + && (OMP_CLAUSE_ALLOCATE_ALLOCATOR (c) == NULL_TREE + /* omp_default_mem_alloc is 1 */ + || !integer_onep (OMP_CLAUSE_ALLOCATE_ALLOCATOR (c)))) + { + if (is_task_ctx (ctx)) + continue; /* For now. */ + if (ctx->allocate_map == NULL) + ctx->allocate_map = new hash_map; + ctx->allocate_map->put (OMP_CLAUSE_DECL (c), + OMP_CLAUSE_ALLOCATE_ALLOCATOR (c) + ? OMP_CLAUSE_ALLOCATE_ALLOCATOR (c) + : integer_zero_node); + } + + for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) { bool by_ref; @@ -1130,6 +1151,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_SHARED: decl = OMP_CLAUSE_DECL (c); + if (ctx->allocate_map && ctx->allocate_map->get (decl)) + ctx->allocate_map->remove (decl); /* Ignore shared directives in teams construct inside of target construct. */ if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS @@ -4358,6 +4381,68 @@ task_reduction_read (gimple_seq *ilist, tree tskred_temp, tree type, return v; } +/* Lower early initialization of privatized variable NEW_VAR + if it needs an allocator (has allocate clause). */ + +static bool +lower_private_allocate (tree var, tree new_var, tree &allocator, + tree &allocate_ptr, gimple_seq *ilist, + omp_context *ctx, bool is_ref, tree size) +{ + if (allocator) + return false; + gcc_assert (allocate_ptr == NULL_TREE); + if (ctx->allocate_map && DECL_P (new_var)) + if (tree *allocatorp = ctx->allocate_map->get (var)) + allocator = *allocatorp; + if (allocator == NULL_TREE) + return false; + if (!is_ref && omp_is_reference (var)) + return false; + + if (TREE_CODE (allocator) != INTEGER_CST) + allocator = build_outer_var_ref (allocator, ctx); + allocator = fold_convert (pointer_sized_int_node, allocator); + if (TREE_CODE (allocator) != INTEGER_CST) + { + tree var = create_tmp_var (TREE_TYPE (allocator)); + gimplify_assign (var, allocator, ilist); + allocator = var; + } + + tree ptr_type, align, sz; + if (is_ref) + { + ptr_type = build_pointer_type (TREE_TYPE (TREE_TYPE (new_var))); + align = build_int_cst (size_type_node, + TYPE_ALIGN_UNIT (TREE_TYPE (ptr_type))); + sz = size; + } + else + { + ptr_type = build_pointer_type (TREE_TYPE (new_var)); + align = build_int_cst (size_type_node, DECL_ALIGN_UNIT (new_var)); + sz = fold_convert (size_type_node, DECL_SIZE_UNIT (new_var)); + } + if (TREE_CODE (sz) != INTEGER_CST) + { + tree szvar = create_tmp_var (size_type_node); + gimplify_assign (szvar, sz, ilist); + sz = szvar; + } + allocate_ptr = create_tmp_var (ptr_type); + tree a = builtin_decl_explicit (BUILT_IN_GOMP_ALLOC); + gimple *g = gimple_build_call (a, 3, align, sz, allocator); + gimple_call_set_lhs (g, allocate_ptr); + gimple_seq_add_stmt (ilist, g); + if (!is_ref) + { + SET_DECL_VALUE_EXPR (new_var, build_simple_mem_ref (allocate_ptr)); + DECL_HAS_VALUE_EXPR_P (new_var) = 1; + } + return true; +} + /* Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN, from the receiver (aka child) side and initializers for REFERENCE_TYPE private variables. Initialization statements go in ILIST, while calls @@ -4523,6 +4608,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, bool task_reduction_p = false; bool task_reduction_needs_orig_p = false; tree cond = NULL_TREE; + tree allocator, allocate_ptr; switch (c_kind) { @@ -4659,6 +4745,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, if (task_reduction_p != (pass >= 2)) continue; + allocator = NULL_TREE; + allocate_ptr = NULL_TREE; new_var = var = OMP_CLAUSE_DECL (c); if ((c_kind == OMP_CLAUSE_REDUCTION || c_kind == OMP_CLAUSE_IN_REDUCTION) @@ -5195,8 +5283,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, if (c_kind != OMP_CLAUSE_FIRSTPRIVATE || !is_task_ctx (ctx)) { - gcall *stmt; - tree tmp, atmp; + tree tmp; ptr = DECL_VALUE_EXPR (new_var); gcc_assert (TREE_CODE (ptr) == INDIRECT_REF); @@ -5204,16 +5291,25 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gcc_assert (DECL_P (ptr)); x = TYPE_SIZE_UNIT (TREE_TYPE (new_var)); - /* void *tmp = __builtin_alloca */ - atmp = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN); - stmt = gimple_build_call (atmp, 2, x, - size_int (DECL_ALIGN (var))); - cfun->calls_alloca = 1; - tmp = create_tmp_var_raw (ptr_type_node); - gimple_add_tmp_var (tmp); - gimple_call_set_lhs (stmt, tmp); - - gimple_seq_add_stmt (ilist, stmt); + if (lower_private_allocate (var, new_var, allocator, + allocate_ptr, ilist, ctx, + false, x)) + tmp = allocate_ptr; + else + { + /* void *tmp = __builtin_alloca */ + tree atmp + = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN); + gcall *stmt + = gimple_build_call (atmp, 2, x, + size_int (DECL_ALIGN (var))); + cfun->calls_alloca = 1; + tmp = create_tmp_var_raw (ptr_type_node); + gimple_add_tmp_var (tmp); + gimple_call_set_lhs (stmt, tmp); + + gimple_seq_add_stmt (ilist, stmt); + } x = fold_convert_loc (clause_loc, TREE_TYPE (ptr), tmp); gimplify_assign (ptr, x, ilist); @@ -5237,6 +5333,10 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, x = build_receiver_ref (var, false, ctx); x = build_fold_addr_expr_loc (clause_loc, x); } + else if (lower_private_allocate (var, new_var, allocator, + allocate_ptr, + ilist, ctx, true, x)) + x = allocate_ptr; else if (TREE_CONSTANT (x)) { /* For reduction in SIMD loop, defer adding the @@ -5349,6 +5449,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, tree nx; bool copy_ctor; copy_ctor = false; + lower_private_allocate (var, new_var, allocator, allocate_ptr, + ilist, ctx, false, NULL_TREE); nx = unshare_expr (new_var); if (is_simd && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE @@ -5494,6 +5596,13 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, x = lang_hooks.decls.omp_clause_dtor (c, new_var); if (x) gimplify_and_add (x, dlist); + if (allocator) + { + tree f = builtin_decl_explicit (BUILT_IN_GOMP_FREE); + gimple *g + = gimple_build_call (f, 2, allocate_ptr, allocator); + gimple_seq_add_stmt (dlist, g); + } break; case OMP_CLAUSE_LINEAR: @@ -5535,6 +5644,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, goto do_dtor; } do_firstprivate: + lower_private_allocate (var, new_var, allocator, allocate_ptr, + ilist, ctx, false, NULL_TREE); x = build_outer_var_ref (var, ctx); if (is_simd) { @@ -5722,6 +5833,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, } else { + lower_private_allocate (var, new_var, allocator, + allocate_ptr, ilist, ctx, false, + NULL_TREE); x = build_outer_var_ref (var, ctx); if (omp_is_reference (var) @@ -6118,6 +6232,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, } else { + lower_private_allocate (var, new_var, allocator, + allocate_ptr, ilist, ctx, + false, NULL_TREE); if (omp_is_reference (var) && is_simd) handle_simd_reference (clause_loc, new_vard, ilist); if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION @@ -6132,6 +6249,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ref = build_outer_var_ref (var, ctx); gimplify_assign (ref, x, dlist); } + if (allocator) + goto do_dtor; } } break; @@ -7488,7 +7607,10 @@ lower_send_shared_vars (gimple_seq *ilist, gimple_seq *olist, omp_context *ctx) continue; nvar = maybe_lookup_decl (ovar, ctx); - if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar)) + if (!nvar + || !DECL_HAS_VALUE_EXPR_P (nvar) + || (ctx->allocate_map + && ctx->allocate_map->get (ovar))) continue; /* If CTX is a nested parallel directive. Find the immediately diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c index 23b2902..9a2ff62 100644 --- a/gcc/tree-ssa-ccp.c +++ b/gcc/tree-ssa-ccp.c @@ -1972,6 +1972,7 @@ evaluate_stmt (gimple *stmt) break; case BUILT_IN_ALIGNED_ALLOC: + case BUILT_IN_GOMP_ALLOC: { tree align = get_constant_value (gimple_call_arg (stmt, 0)); if (align diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c index a046612..9fb156c 100644 --- a/gcc/tree-ssa-dce.c +++ b/gcc/tree-ssa-dce.c @@ -239,6 +239,7 @@ mark_stmt_if_obviously_necessary (gimple *stmt, bool aggressive) CASE_BUILT_IN_ALLOCA: case BUILT_IN_STRDUP: case BUILT_IN_STRNDUP: + case BUILT_IN_GOMP_ALLOC: return; default:; @@ -605,6 +606,8 @@ mark_all_reaching_defs_necessary_1 (ao_ref *ref ATTRIBUTE_UNUSED, case BUILT_IN_CALLOC: CASE_BUILT_IN_ALLOCA: case BUILT_IN_FREE: + case BUILT_IN_GOMP_ALLOC: + case BUILT_IN_GOMP_FREE: return false; default:; @@ -879,7 +882,8 @@ propagate_necessity (bool aggressive) && gimple_call_from_new_or_delete (as_a (stmt)) && gimple_call_operator_delete_p (as_a (stmt))); if (is_delete_operator - || gimple_call_builtin_p (stmt, BUILT_IN_FREE)) + || gimple_call_builtin_p (stmt, BUILT_IN_FREE) + || gimple_call_builtin_p (stmt, BUILT_IN_GOMP_FREE)) { tree ptr = gimple_call_arg (stmt, 0); gcall *def_stmt; @@ -892,27 +896,26 @@ propagate_necessity (bool aggressive) && ((DECL_BUILT_IN_CLASS (def_callee) == BUILT_IN_NORMAL && (DECL_FUNCTION_CODE (def_callee) == BUILT_IN_ALIGNED_ALLOC || DECL_FUNCTION_CODE (def_callee) == BUILT_IN_MALLOC - || DECL_FUNCTION_CODE (def_callee) == BUILT_IN_CALLOC)) + || DECL_FUNCTION_CODE (def_callee) == BUILT_IN_CALLOC + || DECL_FUNCTION_CODE (def_callee) == BUILT_IN_GOMP_ALLOC)) || (DECL_IS_REPLACEABLE_OPERATOR_NEW_P (def_callee) && gimple_call_from_new_or_delete (def_stmt)))) { - if (is_delete_operator) - { - if (!valid_new_delete_pair_p (def_stmt, stmt)) - mark_operand_necessary (gimple_call_arg (stmt, 0)); - - /* Delete operators can have alignment and (or) size - as next arguments. When being a SSA_NAME, they - must be marked as necessary. */ - if (gimple_call_num_args (stmt) >= 2) - for (unsigned i = 1; i < gimple_call_num_args (stmt); - i++) - { - tree arg = gimple_call_arg (stmt, i); - if (TREE_CODE (arg) == SSA_NAME) - mark_operand_necessary (arg); - } - } + if (is_delete_operator + && !valid_new_delete_pair_p (def_stmt, stmt)) + mark_operand_necessary (gimple_call_arg (stmt, 0)); + + /* Delete operators can have alignment and (or) size + as next arguments. When being a SSA_NAME, they + must be marked as necessary. Similarly GOMP_free. */ + if (gimple_call_num_args (stmt) >= 2) + for (unsigned i = 1; i < gimple_call_num_args (stmt); + i++) + { + tree arg = gimple_call_arg (stmt, i); + if (TREE_CODE (arg) == SSA_NAME) + mark_operand_necessary (arg); + } continue; } diff --git a/libgomp/allocator.c b/libgomp/allocator.c index 7166538..2790733 100644 --- a/libgomp/allocator.c +++ b/libgomp/allocator.c @@ -205,11 +205,12 @@ omp_destroy_allocator (omp_allocator_handle_t allocator) ialias (omp_init_allocator) ialias (omp_destroy_allocator) -void * -omp_alloc (size_t size, omp_allocator_handle_t allocator) +static void * +omp_aligned_alloc (size_t alignment, size_t size, + omp_allocator_handle_t allocator) { struct omp_allocator_data *allocator_data; - size_t alignment, new_size; + size_t new_size; void *ptr, *ret; if (__builtin_expect (size == 0, 0)) @@ -227,12 +228,14 @@ retry: if (allocator > omp_max_predefined_alloc) { allocator_data = (struct omp_allocator_data *) allocator; - alignment = allocator_data->alignment; + if (alignment < allocator_data->alignment) + alignment = allocator_data->alignment; } else { allocator_data = NULL; - alignment = sizeof (void *); + if (alignment < sizeof (void *)) + alignment = sizeof (void *); } new_size = sizeof (struct omp_mem_header); @@ -339,6 +342,27 @@ fail: return NULL; } +void * +omp_alloc (size_t size, omp_allocator_handle_t allocator) +{ + return omp_aligned_alloc (1, size, allocator); +} + +/* Like omp_aligned_alloc, but apply on top of that: + "For allocations that arise from this ... the null_fb value of the + fallback allocator trait behaves as if the abort_fb had been specified." */ + +void * +GOMP_alloc (size_t alignment, size_t size, uintptr_t allocator) +{ + void *ret = omp_aligned_alloc (alignment, size, + (omp_allocator_handle_t) allocator); + if (__builtin_expect (ret == NULL, 0) && size) + gomp_fatal ("Out of memory allocating %lu bytes", + (unsigned long) size); + return ret; +} + void omp_free (void *ptr, omp_allocator_handle_t allocator) { @@ -366,3 +390,11 @@ omp_free (void *ptr, omp_allocator_handle_t allocator) } free (data->ptr); } + +ialias (omp_free) + +void +GOMP_free (void *ptr, uintptr_t allocator) +{ + return omp_free (ptr, (omp_allocator_handle_t) allocator); +} diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index c5f52f7..2c95f78 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -359,6 +359,12 @@ GOMP_5.0 { GOMP_workshare_task_reduction_unregister; } GOMP_4.5; +GOMP_5.0.1 { + global: + GOMP_alloc; + GOMP_free; +} GOMP_5.0; + OACC_2.0 { global: acc_get_num_devices; diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index 59e3697..b20e186 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -357,6 +357,11 @@ extern void GOMP_teams (unsigned int, unsigned int); extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned, unsigned); +/* allocator.c */ + +extern void *GOMP_alloc (size_t, size_t, uintptr_t); +extern void GOMP_free (void *, uintptr_t); + /* oacc-async.c */ extern void GOACC_wait (int, int, ...); diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index be7df6d..4424a16 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -281,7 +281,7 @@ extern void omp_set_default_allocator (omp_allocator_handle_t) __GOMP_NOTHROW; extern omp_allocator_handle_t omp_get_default_allocator (void) __GOMP_NOTHROW; extern void *omp_alloc (__SIZE_TYPE__, omp_allocator_handle_t __GOMP_DEFAULT_NULL_ALLOCATOR) - __GOMP_NOTHROW; + __GOMP_NOTHROW __attribute__((__malloc__, __alloc_size__ (1))); extern void omp_free (void *, omp_allocator_handle_t __GOMP_DEFAULT_NULL_ALLOCATOR) __GOMP_NOTHROW; diff --git a/libgomp/testsuite/libgomp.c++/allocate-1.C b/libgomp/testsuite/libgomp.c++/allocate-1.C new file mode 100644 index 0000000..ee89f31 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/allocate-1.C @@ -0,0 +1,194 @@ +#include +#include +#include + +void +foo (int &x, int &y, int &r, int &l, int (&l2)[4], int &l3, int &n, omp_allocator_handle_t h, int fl) +{ + int i; + typedef int T[x]; + T v, w; + T &v2 = v; + T &w2 = w; + int xo = x; + for (i = 0; i < x; i++) + w[i] = i; + #pragma omp parallel private (y, v2) firstprivate (x) allocate (x, y, v2) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42) + abort (); + #pragma omp barrier + *p2 = 1; + p1[0]++; + v2[0] = 7; + v2[41] = 8; + #pragma omp barrier + if (x != 43 || y != 1) + abort (); + if (v2[0] != 7 || v2[41] != 8) + abort (); + if ((fl & 2) && (((uintptr_t) p1 | (uintptr_t) p2 + | (uintptr_t) &v2[0]) & 63) != 0) + abort (); + } + x = xo; + #pragma omp teams + #pragma omp parallel private (y) firstprivate (x, w2) allocate (h: x, y, w2) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42 || w2[17] != 17 || w2[41] != 41) + abort (); + #pragma omp barrier + *p2 = 1; + p1[0]++; + w2[19]++; + #pragma omp barrier + if (x != 43 || y != 1 || w2[19] != 20) + abort (); + if ((fl & 1) && (((uintptr_t) p1 | (uintptr_t) p2 + | (uintptr_t) &w2[0]) & 63) != 0) + abort (); + } + x = xo; + #pragma omp parallel for private (y) firstprivate (x) allocate (h: x, y, r, l, n) reduction(+: r) lastprivate (l) linear (n: 16) + for (i = 0; i < 64; i++) + { + if (x != 42) + abort (); + y = 1; + l = i; + n += y + 15; + r += i; + if ((fl & 1) && (((uintptr_t) &x | (uintptr_t) &y | (uintptr_t) &r + | (uintptr_t) &l | (uintptr_t) &n) & 63) != 0) + abort (); + } + #pragma omp parallel + { + #pragma omp for lastprivate (l2) allocate (h: l2, l3) lastprivate (conditional: l3) + for (i = 0; i < 64; i++) + { + l2[0] = i; + l2[1] = i + 1; + l2[2] = i + 2; + l2[3] = i + 3; + if (i < 37) + l3 = i; + if ((fl & 1) && (((uintptr_t) &l2[0] | (uintptr_t) &l3) & 63) != 0) + abort (); + } + } + if (r != 64 * 63 / 2 || l != 63 || n != 8 + 16 * 64) + abort (); + if (l2[0] != 63 || l2[1] != 63 + 1 || l2[2] != 63 + 2 || l2[3] != 63 + 3 || l3 != 36) + abort (); +} + +void +bar (int &x, int &y, int &r, int &l, int (&l2)[4], int &l3, int &n, omp_allocator_handle_t h) +{ + int i; + typedef int T[x]; + T v, w; + T &v2 = v; + T &w2 = w; + int xo = x; + for (i = 0; i < x; i++) + w[i] = i; + #pragma omp parallel private (y, v2) firstprivate (x) allocate (x, y, v2) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42) + abort (); + #pragma omp barrier + *p2 = 1; + p1[0]++; + v2[0] = 7; + v2[41] = 8; + #pragma omp barrier + if (x != 43 || y != 1) + abort (); + if (v2[0] != 7 || v2[41] != 8) + abort (); + } + x = xo; + #pragma omp teams + #pragma omp parallel private (y) firstprivate (x, w2) allocate (h: x, y, w2) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42 || w2[17] != 17 || w2[41] != 41) + abort (); + #pragma omp barrier + *p2 = 1; + p1[0]++; + #pragma omp barrier + if (x != 43 || y != 1) + abort (); + } + x = xo; + #pragma omp parallel for private (y) firstprivate (x) allocate (h: x, y, r, l, n) reduction(+: r) lastprivate (l) linear (n: 16) + for (i = 0; i < 64; i++) + { + if (x != 42) + abort (); + y = 1; + l = i; + n += y + 15; + r += i; + } + #pragma omp parallel + { + #pragma omp for lastprivate (l2) allocate (h: l2, l3) lastprivate (conditional: l3) + for (i = 0; i < 64; i++) + { + l2[0] = i; + l2[1] = i + 1; + l2[2] = i + 2; + l2[3] = i + 3; + if (i < 37) + l3 = i; + } + } + if (r != 64 * 63 / 2 || l != 63 || n != 8 + 16 * 64) + abort (); + if (l2[0] != 63 || l2[1] != 63 + 1 || l2[2] != 63 + 2 || l2[3] != 63 + 3 || l3 != 36) + abort (); +} + +int +main () +{ + omp_alloctrait_t traits[3] + = { { omp_atk_alignment, 64 }, + { omp_atk_fallback, omp_atv_null_fb } }; + omp_allocator_handle_t a + = omp_init_allocator (omp_default_mem_space, 2, traits); + if (a == omp_null_allocator) + abort (); + omp_set_default_allocator (omp_default_mem_alloc); + int x = 42, y = 0, r = 0, l, l2[4], l3, n = 8; + foo (x, y, r, l, l2, l3, n, omp_null_allocator, 0); + x = 42; y = 0; r = 0; l = -1; l2[0] = -1; l2[1] = -1; + l2[2] = -1; l2[3] = -1; n = 8; + foo (x, y, r, l, l2, l3, n, omp_default_mem_alloc, 0); + x = 42; y = 0; r = 0; l = -1; l2[0] = -1; l2[1] = -1; + l2[2] = -1; l2[3] = -1; n = 8; + foo (x, y, r, l, l2, l3, n, a, 1); + x = 42; y = 0; r = 0; l = -1; l2[0] = -1; l2[1] = -1; + l2[2] = -1; l2[3] = -1; n = 8; + omp_set_default_allocator (a); + foo (x, y, r, l, l2, l3, n, omp_null_allocator, 3); + x = 42; y = 0; r = 0; l = -1; l2[0] = -1; l2[1] = -1; + l2[2] = -1; l2[3] = -1; n = 8; + foo (x, y, r, l, l2, l3, n, omp_default_mem_alloc, 2); + x = 42; y = 0; r = 0; l = -1; l2[0] = -1; l2[1] = -1; + l2[2] = -1; l2[3] = -1; n = 8; + bar (x, y, r, l, l2, l3, n, a); + omp_destroy_allocator (a); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/allocate-1.c b/libgomp/testsuite/libgomp.c-c++-common/allocate-1.c new file mode 100644 index 0000000..532795f --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/allocate-1.c @@ -0,0 +1,254 @@ +#include +#include +#include + +void +foo (int x, omp_allocator_handle_t h, int fl) +{ + int y = 0, r = 0, i, i1, l, l2[4], l3, n = 8; + int i2, j2, n2 = 9, l4; + int i3, j3, n3 = 10, l5; + int i4, j4, n4 = 11, l6; + int i5; + int v[x], w[x]; + int xo = x; + for (i = 0; i < x; i++) + w[i] = i; + #pragma omp parallel private (y, v) firstprivate (x) allocate (x, y, v) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42) + abort (); + #pragma omp barrier + *p2 = 1; + p1[0]++; + v[0] = 7; + v[41] = 8; + #pragma omp barrier + if (x != 43 || y != 1) + abort (); + if (v[0] != 7 || v[41] != 8) + abort (); + if ((fl & 2) && (((uintptr_t) p1 | (uintptr_t) p2 + | (uintptr_t) &v[0]) & 63) != 0) + abort (); + } + x = xo; + #pragma omp teams + #pragma omp parallel private (y) firstprivate (x, w) allocate (h: x, y, w) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42 || w[17] != 17 || w[41] != 41) + abort (); + #pragma omp barrier + *p2 = 1; + p1[0]++; + w[19]++; + #pragma omp barrier + if (x != 43 || y != 1 || w[19] != 20) + abort (); + if ((fl & 1) && (((uintptr_t) p1 | (uintptr_t) p2 + | (uintptr_t) &w[0]) & 63) != 0) + abort (); + } + x = xo; + #pragma omp parallel for private (y) firstprivate (x) allocate (h: x, y, r, l, n) reduction(+: r) lastprivate (l) linear (n: 16) + for (i = 0; i < 64; i++) + { + if (x != 42) + abort (); + y = 1; + l = i; + n += y + 15; + r += i; + if ((fl & 1) && (((uintptr_t) &x | (uintptr_t) &y | (uintptr_t) &r + | (uintptr_t) &l | (uintptr_t) &n) & 63) != 0) + abort (); + } + #pragma omp parallel + { + #pragma omp for lastprivate (l2) private (i1) allocate (h: l2, l3, i1) lastprivate (conditional: l3) + for (i1 = 0; i1 < 64; i1++) + { + l2[0] = i1; + l2[1] = i1 + 1; + l2[2] = i1 + 2; + l2[3] = i1 + 3; + if (i1 < 37) + l3 = i1; + if ((fl & 1) && (((uintptr_t) &l2[0] | (uintptr_t) &l3 | (uintptr_t) &i1) & 63) != 0) + abort (); + } + #pragma omp for collapse(2) lastprivate(l4, i2, j2) linear (n2:17) allocate (h: n2, l4, i2, j2) + for (i2 = 3; i2 < 5; i2++) + for (j2 = 17; j2 < 22; j2 += 2) + { + n2 += 17; + l4 = i2 * 31 + j2; + if ((fl & 1) && (((uintptr_t) &l4 | (uintptr_t) &n2 + | (uintptr_t) &i2 | (uintptr_t) &j2) & 63) != 0) + abort (); + } + #pragma omp for collapse(2) lastprivate(l5, i3, j3) linear (n3:17) schedule (static, 3) allocate (n3, l5, i3, j3) + for (i3 = 3; i3 < 5; i3++) + for (j3 = 17; j3 < 23; j3 += 2) + { + n3 += 17; + l5 = i3 * 31 + j3; + if ((fl & 2) && (((uintptr_t) &l5 | (uintptr_t) &n3 + | (uintptr_t) &i3 | (uintptr_t) &j3) & 63) != 0) + abort (); + } + #pragma omp for collapse(2) lastprivate(l6, i4, j4) linear (n4:17) schedule (dynamic) allocate (h: n4, l6, i4, j4) + for (i4 = 3; i4 < 5; i4++) + for (j4 = 17; j4 < 22; j4 += 2) + { + n4 += 17; + l6 = i4 * 31 + j4; + if ((fl & 1) && (((uintptr_t) &l6 | (uintptr_t) &n4 + | (uintptr_t) &i4 | (uintptr_t) &j4) & 63) != 0) + abort (); + } + #pragma omp for lastprivate (i5) allocate (i5) + for (i5 = 1; i5 < 17; i5 += 3) + { + if ((fl & 2) && (((uintptr_t) &i5) & 63) != 0) + abort (); + } + } + if (r != 64 * 63 / 2 || l != 63 || n != 8 + 16 * 64) + abort (); + if (l2[0] != 63 || l2[1] != 63 + 1 || l2[2] != 63 + 2 || l2[3] != 63 + 3 || l3 != 36) + abort (); + if (i2 != 5 || j2 != 23 || n2 != 9 + 6 * 17 || l4 != 4 * 31 + 21) + abort (); + if (i3 != 5 || j3 != 23 || n3 != 10 + 6 * 17 || l5 != 4 * 31 + 21) + abort (); + if (i4 != 5 || j4 != 23 || n4 != 11 + 6 * 17 || l6 != 4 * 31 + 21) + abort (); + if (i5 != 19) + abort (); +} + +void +bar (int x, omp_allocator_handle_t h) +{ + int y = 0, r = 0, i, i1, l, l2[4], l3, n = 8; + int i2, j2, n2 = 9, l4; + int i3, j3, n3 = 10, l5; + int i4, j4, n4 = 11, l6; + int i5; + int xo = x; + #pragma omp parallel private (y) firstprivate (x) allocate (x, y) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42) + abort (); + #pragma omp barrier + *p2 = 1; + p1[0]++; + #pragma omp barrier + if (x != 43 || y != 1) + abort (); + } + x = xo; + #pragma omp teams + #pragma omp parallel private (y) firstprivate (x) allocate (h: x, y) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42) + abort (); + #pragma omp barrier + *p2 = 1; + p1[0]++; + #pragma omp barrier + if (x != 43 || y != 1) + abort (); + } + x = xo; + #pragma omp parallel for private (y) firstprivate (x) allocate (h: x, y, r, l, n) reduction(+: r) lastprivate (l) linear (n: 16) + for (i = 0; i < 64; i++) + { + if (x != 42) + abort (); + y = 1; + l = i; + n += y + 15; + r += i; + } + #pragma omp parallel + { + #pragma omp for lastprivate (l2) private (i1) allocate (h: l2, l3, i1) lastprivate (conditional: l3) + for (i1 = 0; i1 < 64; i1++) + { + l2[0] = i1; + l2[1] = i1 + 1; + l2[2] = i1 + 2; + l2[3] = i1 + 3; + if (i1 < 37) + l3 = i1; + } + #pragma omp for collapse(2) lastprivate(l4, i2, j2) linear (n2:17) allocate (h: n2, l4, i2, j2) + for (i2 = 3; i2 < 5; i2++) + for (j2 = 17; j2 < 22; j2 += 2) + { + n2 += 17; + l4 = i2 * 31 + j2; + } + #pragma omp for collapse(2) lastprivate(l5, i3, j3) linear (n3:17) schedule (static, 3) allocate (n3, l5, i3, j3) + for (i3 = 3; i3 < 5; i3++) + for (j3 = 17; j3 < 23; j3 += 2) + { + n3 += 17; + l5 = i3 * 31 + j3; + } + #pragma omp for collapse(2) lastprivate(l6, i4, j4) linear (n4:17) schedule (dynamic) allocate (h: n4, l6, i4, j4) + for (i4 = 3; i4 < 5; i4++) + for (j4 = 17; j4 < 22; j4 += 2) + { + n4 += 17; + l6 = i4 * 31 + j4; + } + #pragma omp for lastprivate (i5) allocate (i5) + for (i5 = 1; i5 < 17; i5 += 3) + ; + } + if (r != 64 * 63 / 2 || l != 63 || n != 8 + 16 * 64) + abort (); + if (l2[0] != 63 || l2[1] != 63 + 1 || l2[2] != 63 + 2 || l2[3] != 63 + 3 || l3 != 36) + abort (); + if (i2 != 5 || j2 != 23 || n2 != 9 + 6 * 17 || l4 != 4 * 31 + 21) + abort (); + if (i3 != 5 || j3 != 23 || n3 != 10 + 6 * 17 || l5 != 4 * 31 + 21) + abort (); + if (i4 != 5 || j4 != 23 || n4 != 11 + 6 * 17 || l6 != 4 * 31 + 21) + abort (); + if (i5 != 19) + abort (); +} + +int +main () +{ + omp_alloctrait_t traits[3] + = { { omp_atk_alignment, 64 }, + { omp_atk_fallback, omp_atv_null_fb } }; + omp_allocator_handle_t a + = omp_init_allocator (omp_default_mem_space, 2, traits); + if (a == omp_null_allocator) + abort (); + omp_set_default_allocator (omp_default_mem_alloc); + foo (42, omp_null_allocator, 0); + foo (42, omp_default_mem_alloc, 0); + foo (42, a, 1); + omp_set_default_allocator (a); + foo (42, omp_null_allocator, 3); + foo (42, omp_default_mem_alloc, 2); + bar (42, a); + omp_destroy_allocator (a); + return 0; +} -- 2.7.4