KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d switching to "
"kmp_sch_dynamic_chunked\n",
gtid));
- if (pr->u.p.parm1 <= 0)
- pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
+ goto dynamic_init;
break;
} // if
} // case
if ((2L * chunk + 1) * nproc >= tc) {
/* chunk size too large, switch to dynamic */
schedule = kmp_sch_dynamic_chunked;
+ goto dynamic_init;
} else {
// when remaining iters become less than parm2 - switch to dynamic
pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1);
if ((2L * chunk + 1) * nproc >= tc) {
/* chunk size too large, switch to dynamic */
schedule = kmp_sch_dynamic_chunked;
+ goto dynamic_init;
} else {
/* commonly used term: (2 nproc - 1)/(2 nproc) */
DBL x;
break;
case kmp_sch_static_chunked:
case kmp_sch_dynamic_chunked:
+ dynamic_init:
if (pr->u.p.parm1 <= 0)
pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
else if (pr->u.p.parm1 > tc)
pr->u.p.parm1 = tc;
+ // Store the total number of chunks to prevent integer overflow during
+ // bounds calculations in the get next chunk routine.
+ pr->u.p.parm2 = (tc / pr->u.p.parm1) + (tc % pr->u.p.parm1 ? 1 : 0);
KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",
gtid));
break;
case kmp_sch_dynamic_chunked: {
- T chunk = pr->u.p.parm1;
+ UT chunk_number;
+ UT chunk_size = pr->u.p.parm1;
+ UT nchunks = pr->u.p.parm2;
KD_TRACE(
100,
("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n",
gtid));
- init = chunk * test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
- trip = pr->u.p.tc - 1;
-
- if ((status = (init <= trip)) == 0) {
+ chunk_number = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
+ status = (chunk_number < nchunks);
+ if (!status) {
*p_lb = 0;
*p_ub = 0;
if (p_st != NULL)
*p_st = 0;
} else {
+ init = chunk_size * chunk_number;
+ trip = pr->u.p.tc - 1;
start = pr->u.p.lb;
- limit = chunk + init - 1;
incr = pr->u.p.st;
- if ((last = (limit >= trip)) != 0)
+ if ((last = (trip - init < (UT)chunk_size)))
limit = trip;
+ else
+ limit = chunk_size + init - 1;
if (p_st != NULL)
*p_st = incr;
--- /dev/null
+// RUN: %libomp-compile
+// RUN: env OMP_WAIT_POLICY=passive OMP_NUM_THREADS=32 %libomp-run 0 134217728 1 134217728
+//
+// This test makes sure that large chunks sizes are handled correctly
+// including internal runtime calculations which incorporate the chunk size
+// Only one thread should execute all iterations.
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+typedef unsigned long long ull_t;
+
+int main(int argc, char **argv) {
+ int i, j, lb, ub, stride, nthreads, actual_nthreads, chunk;
+ ull_t num_iters = 0;
+ ull_t counted_iters = 0;
+ int errs = 0;
+ if (argc != 5) {
+ fprintf(stderr, "error: incorrect number of arguments\n");
+ fprintf(stderr, "usage: %s <lb> <ub> <stride> <chunk>\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+ lb = atoi(argv[1]);
+ ub = atoi(argv[2]);
+ stride = atoi(argv[3]);
+ chunk = atoi(argv[4]);
+ nthreads = omp_get_max_threads();
+ if (lb >= ub) {
+ fprintf(stderr, "error: lb must be less than ub\n");
+ exit(EXIT_FAILURE);
+ }
+ if (stride <= 0) {
+ fprintf(stderr, "error: stride must be positive integer\n");
+ exit(EXIT_FAILURE);
+ }
+ if (chunk <= 0) {
+ fprintf(stderr, "error: chunk must be positive integer\n");
+ exit(EXIT_FAILURE);
+ }
+ for (i = lb; i < ub; i += stride)
+ num_iters++;
+
+ #pragma omp parallel num_threads(nthreads)
+ {
+ #pragma omp single
+ actual_nthreads = omp_get_num_threads();
+
+ if (actual_nthreads != nthreads) {
+ printf("did not create enough threads, skipping test.\n");
+ } else {
+ #pragma omp for schedule(dynamic, chunk)
+ for (i = lb; i < ub; i += stride) {
+ counted_iters++;
+ }
+ }
+ }
+
+ // Check that the number of iterations executed is correct
+ if (actual_nthreads == nthreads && counted_iters != num_iters) {
+ fprintf(stderr, "error: wrong number of final iterations counted! "
+ "num_iters=%llu, counted_iters=%llu\n",
+ num_iters, counted_iters);
+ exit(EXIT_FAILURE);
+ }
+
+ return EXIT_SUCCESS;
+}