I used a smaller threshold since the stack size is 1MB on windows.
#define YIELDING sched_yield()
#endif
+/***
+To alloc job_t on heap or statck.
+please https://github.com/xianyi/OpenBLAS/issues/246
+***/
+#if defined(OS_WINDOWS)
+#define GETRF_MEM_ALLOC_THRESHOLD 32
+#define BLAS3_MEM_ALLOC_THRESHOLD 32
+#endif
+
+#ifndef GETRF_MEM_ALLOC_THRESHOLD
+#define GETRF_MEM_ALLOC_THRESHOLD 80
+#endif
+
+#ifndef BLAS3_MEM_ALLOC_THRESHOLD
+#define BLAS3_MEM_ALLOC_THRESHOLD 160
+#endif
+
#ifdef QUAD_PRECISION
#include "common_quad.h"
#endif
//The array of job_t may overflow the stack.
//Instead, use malloc to alloc job_t.
-#if MAX_CPU_NUMBER > 210
+#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
#define USE_ALLOC_HEAP
#endif
//The array of job_t may overflow the stack.
//Instead, use malloc to alloc job_t.
-#if MAX_CPU_NUMBER > 210
+#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
#define USE_ALLOC_HEAP
#endif
//The array of job_t may overflow the stack.
//Instead, use malloc to alloc job_t.
-#if MAX_CPU_NUMBER > 210
+#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
#define USE_ALLOC_HEAP
#endif
//In this case, the recursive getrf_parallel may overflow the stack.
//Instead, use malloc to alloc job_t.
-#if MAX_CPU_NUMBER > 90
+#if MAX_CPU_NUMBER > GETRF_MEM_ALLOC_THRESHOLD
#define USE_ALLOC_HEAP
#endif
//The array of job_t may overflow the stack.
//Instead, use malloc to alloc job_t.
-#if MAX_CPU_NUMBER > 210
+#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
#define USE_ALLOC_HEAP
#endif