--- /dev/null
+/*******************************************************************************
+Copyright (c) 2016, The OpenBLAS Project
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in
+the documentation and/or other materials provided with the
+distribution.
+3. Neither the name of the OpenBLAS project nor the names of
+its contributors may be used to endorse or promote products
+derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************/
+
+#define STACK_ALLOC_PROTECT
+#ifdef STACK_ALLOC_PROTECT
+// Try to detect stack smashing
+#include <assert.h>
+#define STACK_ALLOC_PROTECT_SET volatile BLASLONG stack_check = 0x7ff8010203040506;
+#define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7ff8010203040506);
+#else
+#define STACK_ALLOC_PROTECT_SET
+#define STACK_ALLOC_PROTECT_CHECK
+#endif
+
+#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
+
+/*
+ * Allocate a buffer on the stack if the size is smaller than MAX_STACK_ALLOC.
+ * Stack allocation is much faster than blas_memory_alloc or malloc, particularly
+ * when OpenBLAS is used from a multi-threaded application.
+ * SIZE must be carefully chosen to be:
+ * - as small as possible to maximize the number of stack allocation
+ * - large enough to support all architectures and kernel
+ * Chosing a too small SIZE will lead to a stack smashing.
+ */
+#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
+ /* make it volatile because some function (ex: dgemv_n.S) */ \
+ /* do not restore all register */ \
+ volatile int stack_alloc_size = SIZE; \
+ if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) \
+ stack_alloc_size = 0; \
+ STACK_ALLOC_PROTECT_SET \
+ TYPE stack_buffer[stack_alloc_size]; \
+ BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1);
+#else
+ //Original OpenBLAS/GotoBLAS codes.
+ #define STACK_ALLOC(SIZE, TYPE, BUFFER) BUFFER = (TYPE *)blas_memory_alloc(1)
+#endif
+
+
+#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
+#define STACK_FREE(BUFFER) \
+ STACK_ALLOC_PROTECT_CHECK \
+ if(!stack_alloc_size) \
+ blas_memory_free(BUFFER);
+#else
+#define STACK_FREE(BUFFER) blas_memory_free(BUFFER)
+#endif
+
/*********************************************************************/
#include <stdio.h>
-#include <assert.h>
#include "common.h"
#include "l1param.h"
#ifdef FUNCTION_PROFILE
FLOAT alpha = *ALPHA;
FLOAT beta = *BETA;
FLOAT *buffer;
+ int buffer_size;
#ifdef SMP
int nthreads;
int nthreads_max;
FLOAT *buffer;
blasint lenx, leny;
- int trans;
+ int trans, buffer_size;
blasint info, t;
#ifdef SMP
int nthreads;
if (incx < 0) x -= (lenx - 1) * incx;
if (incy < 0) y -= (leny - 1) * incy;
-#ifdef MAX_STACK_ALLOC
- // make it volatile because some gemv implementation (ex: dgemv_n.S)
- // do not restore all register
- volatile int stack_alloc_size = 0;
- //for gemv_n and gemv_t, try to allocate on stack
- stack_alloc_size = m + n;
-#ifdef ALIGNED_ACCESS
- stack_alloc_size += 3;
-#endif
-// if(stack_alloc_size < 128)
- //dgemv_n.S require a 128 bytes buffer
-// increasing instead of capping 128
-// ABI STACK for windows 288 bytes
- stack_alloc_size += 288 / sizeof(FLOAT) ;
-
- if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT))
- stack_alloc_size = 0;
-
-// stack overflow check
- volatile double stack_check = 3.14159265358979323846;
- FLOAT stack_buffer[stack_alloc_size];
- buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1);
- // printf("stack_alloc_size=%d\n", stack_alloc_size);
-#else
- //Original OpenBLAS/GotoBLAS codes.
- buffer = (FLOAT *)blas_memory_alloc(1);
+ buffer_size = m + n + 128 / sizeof(FLOAT);
+#ifdef WINDOWS_ABI
+ buffer_size += 160 / sizeof(FLOAT) ;
#endif
+ // for alignment
+ buffer_size = (buffer_size + 3) & ~3;
+ STACK_ALLOC(buffer_size, FLOAT, buffer);
#ifdef SMP
}
#endif
-#ifdef MAX_STACK_ALLOC
- // stack overflow check
- assert(stack_check==3.14159265358979323846);
-
- if(!stack_alloc_size){
- blas_memory_free(buffer);
- }
-#else
- blas_memory_free(buffer);
-#endif
-
+ STACK_FREE(buffer);
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
IDEBUG_END;