# This flag is always set for POWER8. Don't modify the flag
# USE_OPENMP = 1
+# The OpenMP scheduler to use - by default this is "static" and you
+# will normally not want to change this unless you know that your main
+# workload will involve tasks that have highly unbalanced running times
+# for individual threads. Changing away from "static" may also adversely
+# affect memory access locality in NUMA systems. Setting to "runtime" will
+# allow you to select the scheduler from the environment variable OMP_SCHEDULE
+# CCOMMON_OPT += -DOMP_SCHED=dynamic
+
# You can define maximum number of threads. Basically it should be
# less than actual number of cores. If you don't specify one, it's
# automatically detected by the the script.
# CONSISTENT_FPCSR = 1
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
-# with single thread. You can use this flag to avoid the overhead of multi-threading
-# in small matrix sizes. The default value is 4.
+# with single thread. (Actually in recent versions this is a factor proportional to the
+# number of floating point operations necessary for the given problem size, no longer
+# an individual dimension). You can use this setting to avoid the overhead of multi-
+# threading in small matrix sizes. The default value is 4, but values as high as 50 have
+# been reported to be optimal for certain workloads (50 is the recommended value for Julia).
# GEMM_MULTITHREAD_THRESHOLD = 4
# If you need santy check by comparing reference BLAS. It'll be very
#else
+#ifndef OMP_SCHED
+#define OMP_SCHED static
+#endif
+
int blas_server_avail = 0;
static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
break;
}
-#pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(OMP_SCHED)
for (i = 0; i < num; i ++) {
#ifndef USE_SIMPLE_THREADED_LEVEL3