OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-
#include "common.h"
-
-
#ifndef HAVE_ASM_KERNEL
#include <altivec.h>
+static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
+
static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i)
{
register __vector float valpha_i = {alpha_i, alpha_i,alpha_i, alpha_i};
#endif
- __vector unsigned char swap_mask = { 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
+ __vector unsigned char swap_mask = *((__vector unsigned char*)swap_mask_arr);
register __vector float *vy = (__vector float *) y;
register __vector float *vx = (__vector float *) x;
BLASLONG i=0;
*****************************************************************************/
#include "common.h"
-
#ifndef HAVE_KERNEL_8
#include <altivec.h>
+static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
static void cdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, float *dot)
{
- __vector unsigned char swap_mask = { 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
+ __vector unsigned char swap_mask = *((__vector unsigned char*)swap_mask_arr);
register __vector float *vy = (__vector float *) y;
register __vector float *vx = (__vector float *) x;
BLASLONG i = 0;
BLASLONG i = 0;
BLASLONG ix=0, iy=0;
OPENBLAS_COMPLEX_FLOAT result;
- FLOAT dot[4] __attribute__ ((aligned(16))) = {0.0, 0.0, 0.0, 0.0};
+ FLOAT dot[4] __attribute__((aligned(16))) = {0.0, 0.0, 0.0, 0.0};
if (n <= 0) {
CREAL(result) = 0.0;
#define NBMAX 1024\r
\r
\r
-static const unsigned char swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};\r
+static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};\r
\r
\r
static void cgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y) {\r
BLASLONG m2;\r
BLASLONG m3;\r
BLASLONG n2;\r
-\r
- FLOAT xbuffer[8], *ybuffer;\r
+ FLOAT xbuffer[8] __attribute__((aligned(16)));\r
+ FLOAT *ybuffer;\r
\r
if (m < 1) return (0);\r
if (n < 1) return (0);\r
\r
#define NBMAX 1024 \r
#include <altivec.h> \r
-static const unsigned char swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};\r
+static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};\r
\r
static void cgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) {\r
BLASLONG i;\r
BLASLONG m2;\r
BLASLONG m3;\r
BLASLONG n2;\r
-\r
- FLOAT ybuffer[8], *xbuffer;\r
+ FLOAT ybuffer[8] __attribute__((aligned(16)));\r
+ FLOAT *xbuffer;\r
\r
if (m < 1) return (0);\r
if (n < 1) return (0);\r
BLASLONG m3;
BLASLONG n2;
BLASLONG lda4 = lda << 2;
- FLOAT xbuffer[8] __attribute__ ((aligned (16)));;
+ FLOAT xbuffer[8] __attribute__ ((aligned (16)));
FLOAT *ybuffer;
if ( m < 1 ) return(0);
BLASLONG m1;
BLASLONG m2;
BLASLONG m3;
- BLASLONG n2;
-
- FLOAT ybuffer[8], *xbuffer;
+ BLASLONG n2;
+ FLOAT ybuffer[8] __attribute__((aligned(16)));
+ FLOAT *xbuffer;
if (m < 1) return (0);
if (n < 1) return (0);
BLASLONG n2;\r
BLASLONG lda4 = lda << 2;\r
BLASLONG lda8 = lda << 3;\r
- FLOAT xbuffer[8],*ybuffer;\r
+ FLOAT xbuffer[8] __attribute__((aligned(16)));\r
+ FLOAT *ybuffer;\r
\r
if ( m < 1 ) return(0);\r
if ( n < 1 ) return(0);\r
BLASLONG n2;\r
BLASLONG lda4 = lda << 2;\r
BLASLONG lda8 = lda << 3;\r
- FLOAT xbuffer[8],*ybuffer;\r
+ FLOAT xbuffer[8] __attribute__((aligned(16)));\r
+ FLOAT *ybuffer;\r
\r
if ( m < 1 ) return(0);\r
if ( n < 1 ) return(0);\r
BLASLONG m1;\r
BLASLONG m2;\r
BLASLONG m3;\r
- BLASLONG n2;\r
-\r
- FLOAT ybuffer[8], *xbuffer;\r
-\r
+ BLASLONG n2; \r
+ FLOAT ybuffer[8] __attribute__((aligned(16)));\r
+ FLOAT *xbuffer; \r
if (m < 1) return (0);\r
if (n < 1) return (0);\r
\r
BLASLONG m3;\r
BLASLONG n2;\r
\r
- FLOAT ybuffer[8], *xbuffer;\r
-\r
+ FLOAT ybuffer[8] __attribute__((aligned(16)));\r
+ FLOAT *xbuffer; \r
if (m < 1) return (0);\r
if (n < 1) return (0);\r
\r
BLASLONG m2;
BLASLONG m3;
BLASLONG n2;
-
- FLOAT xbuffer[8], *ybuffer;
+ FLOAT xbuffer[8] __attribute__((aligned(16)));
+ FLOAT *ybuffer;
if (m < 1) return (0);
if (n < 1) return (0);
BLASLONG m2;
BLASLONG m3;
BLASLONG n2;
-
- FLOAT ybuffer[8], *xbuffer;
+ FLOAT ybuffer[8] __attribute__((aligned(16)));
+ FLOAT *xbuffer;
if (m < 1) return (0);
if (n < 1) return (0);