From: Jonathan Peyton Date: Thu, 28 Feb 2019 20:47:21 +0000 (+0000) Subject: [OpenMP 5.0] Deprecate nest-var and associated features X-Git-Tag: llvmorg-10-init~10966 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=76b45e874d477cebc8c4f9e11cedbf3d4352aa5a;p=platform%2Fupstream%2Fllvm.git [OpenMP 5.0] Deprecate nest-var and associated features Nest-var, OMP_NESTED, omp_set_nested()., and omp_get_nested() have been deprecated in the 5.0 spec. Initial nesting info is now derived from OMP_MAX_ACTIVE_LEVELS, OMP_NUM_THREADS, and OMP_PROC_BIND. This patch deprecates the internal ICV that corresponds to nest-var, and replaces it with the max-active-levels-var ICV to determine nesting. The change still allows for use of OMP_NESTED (according to 5.0 changes), omp_get_nested, and omp_set_nested, which have had deprecation messages added to them. The change allows certain settings of OMP_NUM_THREADS, OMP_PROC_BIND, and OMP_MAX_ACTIVE_LEVELS to turn on nesting, but OMP_NESTED=0 will still force nesting to be off. The runtime now prints informative messages about deprecation of OMP_NESTED, omp_set_nested(), and omp_get_nested(), when those environment variables or routines are used. It also prints deprecated message in output for KMP_SETTINGS and OMP_DISPLAY_ENV for OMP_NESTED. This patch also fixes OMP_DISPLAY_ENV output for OMP_TARGET_OFFLOAD. Patch by Terry Wilmarth Differential Revision: https://reviews.llvm.org/D58408 llvm-svn: 355138 --- diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt index cf262d9..822f73c 100644 --- a/openmp/runtime/src/i18n/en_US.txt +++ b/openmp/runtime/src/i18n/en_US.txt @@ -425,7 +425,7 @@ AffHWSubsetManyTiles "KMP_HW_SUBSET ignored: too many L2 Caches requeste AffHWSubsetManyProcs "KMP_HW_SUBSET ignored: too many Procs requested." HierSchedInvalid "Hierarchy ignored: unsupported level: %1$s." AffFormatDefault "OMP: pid %1$s tid %2$s thread %3$s bound to OS proc set {%4$s}" - +APIDeprecated "%1$s routine deprecated, please use %2$s instead." # -------------------------------------------------------------------------------------------------- -*- HINTS -*- @@ -489,4 +489,3 @@ SystemLimitOnThreads "System-related limit on the number of threads." # -------------------------------------------------------------------------------------------------- # end of file # # -------------------------------------------------------------------------------------------------- - diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 5125eca..93fcf0f 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -1847,7 +1847,6 @@ typedef enum kmp_bar_pat { /* Barrier communication patterns */ typedef struct kmp_internal_control { int serial_nesting_level; /* corresponds to the value of the th_team_serialized field */ - kmp_int8 nested; /* internal control for nested parallelism (per thread) */ kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per thread) */ kmp_int8 @@ -2054,8 +2053,6 @@ typedef struct kmp_local { ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) #endif -#define get__nested_2(xteam, xtid) \ - ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested) #define get__dynamic_2(xteam, xtid) \ ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic) #define get__nproc_2(xteam, xtid) \ @@ -2076,11 +2073,6 @@ typedef struct kmp_local { #define set__bt_set_team(xteam, xtid, xval) \ (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval)) -#define set__nested(xthread, xval) \ - (((xthread)->th.th_current_task->td_icvs.nested) = (xval)) -#define get__nested(xthread) \ - (((xthread)->th.th_current_task->td_icvs.nested) ? (FTN_TRUE) : (FTN_FALSE)) - #define set__dynamic(xthread, xval) \ (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval)) #define get__dynamic(xthread) \ @@ -2095,6 +2087,9 @@ typedef struct kmp_local { #define set__max_active_levels(xthread, xval) \ (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval)) +#define get__max_active_levels(xthread) \ + ((xthread)->th.th_current_task->td_icvs.max_active_levels) + #define set__sched(xthread, xval) \ (((xthread)->th.th_current_task->td_icvs.sched) = (xval)) @@ -2821,8 +2816,6 @@ typedef struct kmp_base_root { // TODO: GEH - then replace r_active with t_active_levels if we can to reduce // the synch overhead or keeping r_active volatile int r_active; /* TRUE if some region in a nest has > 1 thread */ - // GEH: This is misnamed, should be r_in_parallel - volatile int r_nested; // TODO: GEH - This is unused, just remove it entirely. // keeps a count of active parallel regions per root std::atomic r_in_parallel; // GEH: This is misnamed, should be r_active_levels @@ -3013,8 +3006,6 @@ extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is used (fixed) */ extern int __kmp_tp_cached; /* whether threadprivate cache has been created (__kmpc_threadprivate_cached()) */ -extern int __kmp_dflt_nested; /* nested parallelism enabled by default a la - OMP_NESTED */ extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before blocking (env setting) */ #if KMP_USE_MONITOR @@ -3056,9 +3047,12 @@ extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */ #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ -extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested - parallelism enabled by default via - OMP_MAX_ACTIVE_LEVELS */ +// max_active_levels for nested parallelism enabled by default via +// OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND +extern int __kmp_dflt_max_active_levels; +// Indicates whether value of __kmp_dflt_max_active_levels was already +// explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false +extern bool __kmp_dflt_max_active_levels_set; extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in concurrent execution per team */ #if KMP_NESTED_HOT_TEAMS diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index 592a266..4cc8757 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -1872,7 +1872,7 @@ void ompc_set_nested(int flag) { __kmp_save_internal_controls(thread); - set__nested(thread, flag ? TRUE : FALSE); + set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); } void ompc_set_max_active_levels(int max_active_levels) { diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h index 6fd089f..9eb80b4 100644 --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -595,6 +595,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PROCS)(void) { } void FTN_STDCALL KMP_EXPAND_NAME(FTN_SET_NESTED)(int KMP_DEREF flag) { + KMP_INFORM(APIDeprecated, "omp_set_nested", "omp_set_max_active_levels"); #ifdef KMP_STUB __kmps_set_nested(KMP_DEREF flag); #else @@ -602,17 +603,22 @@ void FTN_STDCALL KMP_EXPAND_NAME(FTN_SET_NESTED)(int KMP_DEREF flag) { /* For the thread-private internal controls implementation */ thread = __kmp_entry_thread(); __kmp_save_internal_controls(thread); - set__nested(thread, ((KMP_DEREF flag) ? TRUE : FALSE)); + // Somewhat arbitrarily decide where to get a value for max_active_levels + int max_active_levels = get__max_active_levels(thread); + if (max_active_levels == 1) + max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; + set__max_active_levels(thread, (KMP_DEREF flag) ? max_active_levels : 1); #endif } int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NESTED)(void) { + KMP_INFORM(APIDeprecated, "omp_get_nested", "omp_get_max_active_levels"); #ifdef KMP_STUB return __kmps_get_nested(); #else kmp_info_t *thread; thread = __kmp_entry_thread(); - return get__nested(thread); + return get__max_active_levels(thread) > 1; #endif } diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index 627724c..1805ade 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -127,10 +127,9 @@ int __kmp_dflt_team_nth = 0; int __kmp_dflt_team_nth_ub = 0; int __kmp_tp_capacity = 0; int __kmp_tp_cached = 0; -int __kmp_dflt_nested = FALSE; int __kmp_dispatch_num_buffers = KMP_DFLT_DISP_NUM_BUFF; -int __kmp_dflt_max_active_levels = - KMP_MAX_ACTIVE_LEVELS_LIMIT; /* max_active_levels limit */ +int __kmp_dflt_max_active_levels = 1; // Nesting off by default +bool __kmp_dflt_max_active_levels_set = false; // Don't override set value #if KMP_NESTED_HOT_TEAMS int __kmp_hot_teams_mode = 0; /* 0 - free extra threads when reduced */ /* 1 - keep extra threads when reduced */ diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 2a9e31d..e6b673b 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -1719,11 +1719,11 @@ int __kmp_fork_call(ident_t *loc, int gtid, // parallel out of teams construct). This code moved here from // __kmp_reserve_threads() to speedup nested serialized parallels. if (nthreads > 1) { - if ((!get__nested(master_th) && (root->r.r_in_parallel + if ((get__max_active_levels(master_th) == 1 && (root->r.r_in_parallel #if OMP_40_ENABLED - && !enter_teams + && !enter_teams #endif /* OMP_40_ENABLED */ - )) || + )) || (__kmp_library == library_serial)) { KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d" " threads\n", @@ -3150,8 +3150,6 @@ static kmp_internal_control_t __kmp_get_global_icvs(void) { kmp_internal_control_t g_icvs = { 0, // int serial_nesting_level; //corresponds to value of th_team_serialized - (kmp_int8)__kmp_dflt_nested, // int nested; //internal control - // for nested parallelism (per thread) (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic // adjustment of threads (per thread) (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for @@ -3207,7 +3205,6 @@ static void __kmp_initialize_root(kmp_root_t *root) { root->r.r_active = FALSE; root->r.r_in_parallel = 0; root->r.r_blocktime = __kmp_dflt_blocktime; - root->r.r_nested = __kmp_dflt_nested; /* setup the root team for this task */ /* allocate the root team structure */ @@ -3432,7 +3429,6 @@ void __kmp_print_structure(void) { __kmp_print_structure_thread(" Uber Thread: ", root->r.r_uber_thread); __kmp_printf(" Active?: %2d\n", root->r.r_active); - __kmp_printf(" Nested?: %2d\n", root->r.r_nested); __kmp_printf(" In Parallel: %2d\n", KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel)); __kmp_printf("\n"); diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index 68e36f5..b61df06 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -975,12 +975,27 @@ static void __kmp_stg_print_warnings(kmp_str_buf_t *buffer, char const *name, static void __kmp_stg_parse_nested(char const *name, char const *value, void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_dflt_nested); + int nested; + KMP_INFORM(EnvVarDeprecated, name, "OMP_MAX_ACTIVE_LEVELS"); + __kmp_stg_parse_bool(name, value, &nested); + if (nested) { + if (!__kmp_dflt_max_active_levels_set) + __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; + } else { // nesting explicitly turned off + __kmp_dflt_max_active_levels = 1; + __kmp_dflt_max_active_levels_set = true; + } } // __kmp_stg_parse_nested static void __kmp_stg_print_nested(kmp_str_buf_t *buffer, char const *name, void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_dflt_nested); + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print(buffer, " %s", name); + } + __kmp_str_buf_print(buffer, ": deprecated; max-active-levels-var=%d\n", + __kmp_dflt_max_active_levels); } // __kmp_stg_print_nested static void __kmp_parse_nested_num_threads(const char *var, const char *env, @@ -1026,6 +1041,8 @@ static void __kmp_parse_nested_num_threads(const char *var, const char *env, } } } + if (!__kmp_dflt_max_active_levels_set && total > 1) + __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; KMP_DEBUG_ASSERT(total > 0); if (total <= 0) { KMP_WARNING(NthSyntaxError, var, env); @@ -1182,8 +1199,22 @@ static void __kmp_stg_print_task_stealing(kmp_str_buf_t *buffer, static void __kmp_stg_parse_max_active_levels(char const *name, char const *value, void *data) { - __kmp_stg_parse_int(name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, - &__kmp_dflt_max_active_levels); + kmp_uint64 tmp_dflt = 0; + char const *msg = NULL; + if (!__kmp_dflt_max_active_levels_set) { + // Don't overwrite __kmp_dflt_max_active_levels if we get an invalid setting + __kmp_str_to_uint(value, &tmp_dflt, &msg); + if (msg != NULL) { // invalid setting; print warning and ignore + KMP_WARNING(ParseSizeIntWarn, name, value, msg); + } else if (tmp_dflt > KMP_MAX_ACTIVE_LEVELS_LIMIT) { + // invalid setting; print warning and ignore + msg = KMP_I18N_STR(ValueTooLarge); + KMP_WARNING(ParseSizeIntWarn, name, value, msg); + } else { // valid setting + __kmp_dflt_max_active_levels = tmp_dflt; + __kmp_dflt_max_active_levels_set = true; + } + } } // __kmp_stg_parse_max_active_levels static void __kmp_stg_print_max_active_levels(kmp_str_buf_t *buffer, @@ -1240,9 +1271,13 @@ static void __kmp_stg_print_target_offload(kmp_str_buf_t *buffer, value = "MANDATORY"; else if (__kmp_target_offload == tgt_disabled) value = "DISABLED"; - if (value) { - __kmp_str_buf_print(buffer, " %s=%s\n", name, value); + KMP_DEBUG_ASSERT(value); + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print(buffer, " %s", name); } + __kmp_str_buf_print(buffer, "=%s\n", value); } // __kmp_stg_print_target_offload #endif @@ -3162,6 +3197,9 @@ static void __kmp_stg_parse_proc_bind(char const *name, char const *value, } __kmp_nested_proc_bind.used = nelem; + if (nelem > 1 && !__kmp_dflt_max_active_levels_set) + __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; + // Save values in the nested proc_bind array int i = 0; for (;;) { @@ -5248,7 +5286,7 @@ static void __kmp_aux_env_initialize(kmp_env_blk_t *block) { /* OMP_NESTED */ value = __kmp_env_blk_var(block, "OMP_NESTED"); if (value) { - ompc_set_nested(__kmp_dflt_nested); + ompc_set_nested(__kmp_dflt_max_active_levels > 1); } /* OMP_DYNAMIC */