# If you want to enable the experimental BFLOAT16 support
# BUILD_HALF = 1
-#
+
+
+# Set the thread number threshold beyond which the job array for the threaded level3 BLAS
+# will be allocated on the heap rather than the stack. (This array alone requires
+# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu
+# counts, but obviously it is not the only item that ends up on the stack.
+# The default value of 32 ensures that the overall requirement is compatible
+# with the default 1MB stacksize imposed by having the Java VM loaded without use
+# of its -Xss parameter.
+# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible
+# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java
+# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code
+# BLAS3_MEM_ALLOC_THRESHOLD = 160
+
+
+
# the below is not yet configurable, use cmake if you need to build only select types
BUILD_SINGLE = 1
BUILD_DOUBLE = 1
#endif
#ifndef BLAS3_MEM_ALLOC_THRESHOLD
-#define BLAS3_MEM_ALLOC_THRESHOLD 160
+#define BLAS3_MEM_ALLOC_THRESHOLD 32
#endif
#ifdef QUAD_PRECISION