From aa54fe064c1056e4f87266d1d64f10c51d9558a9 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Thu, 7 Aug 2014 22:30:20 +0200 Subject: [PATCH] added zgemv_n c-function --- kernel/x86_64/zgemv_n.c | 302 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 302 insertions(+) create mode 100644 kernel/x86_64/zgemv_n.c diff --git a/kernel/x86_64/zgemv_n.c b/kernel/x86_64/zgemv_n.c new file mode 100644 index 0000000..be5b08d --- /dev/null +++ b/kernel/x86_64/zgemv_n.c @@ -0,0 +1,302 @@ +/*************************************************************************** +Copyright (c) 2014, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +#include "common.h" + + +#define NBMAX 1024 + +#ifndef HAVE_KERNEL_16x4 + +static void zgemv_kernel_16x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) +{ + BLASLONG i; + FLOAT *a0,*a1,*a2,*a3; + a0 = ap[0]; + a1 = ap[1]; + a2 = ap[2]; + a3 = ap[3]; + + for ( i=0; i< 2*n; i+=2 ) + { +#if !defined(CONJ) +#if !defined(XCONJ) + y[i] += a0[i]*x[0] - a0[i+1] * x[1]; + y[i+1] += a0[i]*x[1] + a0[i+1] * x[0]; + y[i] += a1[i]*x[2] - a1[i+1] * x[3]; + y[i+1] += a1[i]*x[3] + a1[i+1] * x[2]; + y[i] += a2[i]*x[4] - a2[i+1] * x[5]; + y[i+1] += a2[i]*x[5] + a2[i+1] * x[4]; + y[i] += a3[i]*x[6] - a3[i+1] * x[7]; + y[i+1] += a3[i]*x[7] + a3[i+1] * x[6]; +#else + y[i] += a0[i]*x[0] + a0[i+1] * x[1]; + y[i+1] += a0[i]*x[1] - a0[i+1] * x[0]; + y[i] += a1[i]*x[2] + a1[i+1] * x[3]; + y[i+1] += a1[i]*x[3] - a1[i+1] * x[2]; + y[i] += a2[i]*x[4] + a2[i+1] * x[5]; + y[i+1] += a2[i]*x[5] - a2[i+1] * x[4]; + y[i] += a3[i]*x[6] + a3[i+1] * x[7]; + y[i+1] += a3[i]*x[7] - a3[i+1] * x[6]; +#endif +#else +#if !defined(XCONJ) + y[i] += a0[i]*x[0] + a0[i+1] * x[1]; + y[i+1] -= a0[i]*x[1] - a0[i+1] * x[0]; + y[i] += a1[i]*x[2] + a1[i+1] * x[3]; + y[i+1] -= a1[i]*x[3] - a1[i+1] * x[2]; + y[i] += a2[i]*x[4] + a2[i+1] * x[5]; + y[i+1] -= a2[i]*x[5] - a2[i+1] * x[4]; + y[i] += a3[i]*x[6] + a3[i+1] * x[7]; + y[i+1] -= a3[i]*x[7] - a3[i+1] * x[6]; + +#else + y[i] += a0[i]*x[0] - a0[i+1] * x[1]; + y[i+1] -= a0[i]*x[1] + a0[i+1] * x[0]; + y[i] += a1[i]*x[2] - a1[i+1] * x[3]; + y[i+1] -= a1[i]*x[3] + a1[i+1] * x[2]; + y[i] += a2[i]*x[4] - a2[i+1] * x[5]; + y[i+1] -= a2[i]*x[5] + a2[i+1] * x[4]; + y[i] += a3[i]*x[6] - a3[i+1] * x[7]; + y[i+1] -= a3[i]*x[7] + a3[i+1] * x[6]; + +#endif +#endif + } +} + +#endif + +static void zgemv_kernel_16x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y) +{ + BLASLONG i; + FLOAT *a0; + a0 = ap; + + for ( i=0; i< 2*n; i+=2 ) + { +#if !defined(CONJ) +#if !defined(XCONJ) + y[i] += a0[i]*x[0] - a0[i+1] * x[1]; + y[i+1] += a0[i]*x[1] + a0[i+1] * x[0]; +#else + y[i] += a0[i]*x[0] + a0[i+1] * x[1]; + y[i+1] += a0[i]*x[1] - a0[i+1] * x[0]; +#endif +#else +#if !defined(XCONJ) + y[i] += a0[i]*x[0] + a0[i+1] * x[1]; + y[i+1] -= a0[i]*x[1] - a0[i+1] * x[0]; + +#else + y[i] += a0[i]*x[0] - a0[i+1] * x[1]; + y[i+1] -= a0[i]*x[1] + a0[i+1] * x[0]; +#endif +#endif + + } +} + + +static void zero_y(BLASLONG n, FLOAT *dest) +{ + BLASLONG i; + for ( i=0; i<2*n; i++ ) + { + *dest = 0.0; + dest++; + } +} + + + +static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest) +{ + BLASLONG i; + for ( i=0; i