From 9830a5d0c19b8d5165735e39343b7174addd94a0 Mon Sep 17 00:00:00 2001 From: Terry Wilmarth Date: Tue, 13 Dec 2022 12:54:24 -0600 Subject: [PATCH] [OpenMP] Fix for smaller team sizes inside teams construct. When a team nested inside a teams construct is allocated, it is allocated to a size specified by the teams thread_limit. In the case where any mechanism that might not grant the full thread_limit is in use, we may get a smaller team. This possibility was not reflected in the code when using the th_teams_size.nth value stored on the master thread for the team. This value was never updated even when t_nproc on the team itself was different. I added a line to update it shortly before the team is forked. Added a simple teams test that uses KMP_DYNAMIC_MODE=random to mimic allocating teams with sizes <= thread_limit. Eventually, this will segfault without the fix in this commit. Differential Revision: https://reviews.llvm.org/D139960 --- openmp/runtime/src/kmp_runtime.cpp | 7 +++++ openmp/runtime/test/teams/teams_resize.c | 47 ++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 openmp/runtime/test/teams/teams_resize.c diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 057f909..e70393a 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -1479,6 +1479,13 @@ __kmp_fork_in_teams(ident_t *loc, int gtid, kmp_team_t *parent_team, parent_team->t.t_level++; parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save + // If the threads allocated to the team are less than the thread limit, update + // the thread limit here. th_teams_size.nth is specific to this team nested + // in a teams construct, the team is fully created, and we're about to do + // the actual fork. Best to do this here so that the subsequent uses below + // and in the join have the correct value. + master_th->th.th_teams_size.nth = parent_team->t.t_nproc; + #if OMPT_SUPPORT if (ompt_enabled.enabled) { ompt_lw_taskteam_t lw_taskteam; diff --git a/openmp/runtime/test/teams/teams_resize.c b/openmp/runtime/test/teams/teams_resize.c new file mode 100644 index 0000000..5859969 --- /dev/null +++ b/openmp/runtime/test/teams/teams_resize.c @@ -0,0 +1,47 @@ +// RUN: %libomp-compile && env OMP_DYNAMIC=true KMP_DYNAMIC_MODE=random %libomp-run +// UNSUPPORTED: icc + +// This is a super simple unit test to see that teams behave properly when +// parallel regions inside the teams construct cannot allocate teams of +// thread_limit size. + +#include +#include +#include + +#define NUM_TIMES 10 + +int main(int argc, char **argv) { + int num_procs = omp_get_max_threads(); + int num_teams, thread_limit, i; + num_teams = 2; + thread_limit = num_procs / num_teams; + for (i = 0; i < NUM_TIMES; ++i) { +#pragma omp target teams num_teams(num_teams) thread_limit(thread_limit) + { +#pragma omp parallel num_threads(thread_limit) + { + int my_num_threads = omp_get_num_threads(); + int my_num_teams = omp_get_num_teams(); + int my_team_id = omp_get_team_num(); + int my_thread_id = omp_get_thread_num(); + if (my_num_teams < 0 || my_num_teams > num_teams) { + fprintf(stderr, "error: my_num_teams (%d) invalid\n", my_num_teams); + exit(1); + } + if (my_team_id < 0 || my_team_id >= my_num_teams) { + fprintf(stderr, "error: my_team_id (%d) invalid (nteams = %d)\n", + my_team_id, my_num_teams); + exit(1); + } + if (my_thread_id < 0 || my_thread_id >= my_num_threads) { + fprintf(stderr, + "error: my_thread_id (%d) invalid (my_num_threads = %d)\n", + my_thread_id, my_num_threads); + exit(1); + } + } + } + } + return 0; +} -- 2.7.4