From b3938fe371d1806233b06eff23eda4456d2f763a Mon Sep 17 00:00:00 2001 From: wernsaar Date: Sat, 19 Jul 2014 07:15:34 +0200 Subject: [PATCH] don't use this sgemv_n on Windows --- kernel/x86_64/KERNEL.BULLDOZER | 5 ++ kernel/x86_64/KERNEL.PILEDRIVER | 5 ++ kernel/x86_64/sgemv_n_avx.c | 6 ++- kernel/x86_64/sgemv_n_microk_bulldozer.c | 78 ++++++++++++++++---------------- 4 files changed, 53 insertions(+), 41 deletions(-) diff --git a/kernel/x86_64/KERNEL.BULLDOZER b/kernel/x86_64/KERNEL.BULLDOZER index 55932e6..fac8016 100644 --- a/kernel/x86_64/KERNEL.BULLDOZER +++ b/kernel/x86_64/KERNEL.BULLDOZER @@ -1,4 +1,9 @@ +ifdef OS_WINDOWS +SGEMVNKERNEL = ../arm/gemv_n.c +else SGEMVNKERNEL = sgemv_n_avx.c +endif + ZGEMVNKERNEL = zgemv_n_dup.S ZGEMVTKERNEL = zgemv_t.S diff --git a/kernel/x86_64/KERNEL.PILEDRIVER b/kernel/x86_64/KERNEL.PILEDRIVER index 145d9fb..555c805 100644 --- a/kernel/x86_64/KERNEL.PILEDRIVER +++ b/kernel/x86_64/KERNEL.PILEDRIVER @@ -1,4 +1,9 @@ +ifdef OS_WINDOWS +SGEMVNKERNEL = ../arm/gemv_n.c +else SGEMVNKERNEL = sgemv_n_avx.c +endif + ZGEMVNKERNEL = zgemv_n_dup.S ZGEMVTKERNEL = zgemv_t.S diff --git a/kernel/x86_64/sgemv_n_avx.c b/kernel/x86_64/sgemv_n_avx.c index dc8d015..91e3ee4 100644 --- a/kernel/x86_64/sgemv_n_avx.c +++ b/kernel/x86_64/sgemv_n_avx.c @@ -61,8 +61,10 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO FLOAT *a_ptr; FLOAT *x_ptr; FLOAT *y_ptr; - BLASLONG n1,n2; - BLASLONG m1,m2; + BLASLONG n1; + BLASLONG m1; + BLASLONG register m2; + BLASLONG register n2; FLOAT *xbuffer,*ybuffer; xbuffer = buffer; ybuffer = xbuffer + 2048 + 256; diff --git a/kernel/x86_64/sgemv_n_microk_bulldozer.c b/kernel/x86_64/sgemv_n_microk_bulldozer.c index 1cecd96..1b07f02 100644 --- a/kernel/x86_64/sgemv_n_microk_bulldozer.c +++ b/kernel/x86_64/sgemv_n_microk_bulldozer.c @@ -25,13 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ -static void sgemv_kernel_64( long n, float alpha, float *a, long lda, float *x, float *y) +static void sgemv_kernel_64( long n, float alpha, float *a, long lda, float *x, float *y) { float *pre = a + lda*3; - __asm __volatile + __asm__ __volatile__ ( "movq %0, %%rax\n\t" // n -> rax "vbroadcastss %1, %%ymm1\n\t" // alpha -> ymm1 @@ -103,10 +103,10 @@ static void sgemv_kernel_64( long n, float alpha, float *a, long lda, float *x, "m" (x), // 4 "m" (y), // 5 "m" (pre) // 6 - : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", - "xmm0" , "xmm1", - "xmm8", "xmm9", "xmm10", "xmm11", - "xmm12", "xmm13", "xmm14", "xmm15", + : "%rax", "%rcx", "%rdx", "%rsi", "%rdi", "%r8", + "%xmm0", "%xmm1", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); @@ -114,13 +114,13 @@ static void sgemv_kernel_64( long n, float alpha, float *a, long lda, float *x, -static void sgemv_kernel_32( long n, float alpha, float *a, long lda, float *x, float *y) +static void sgemv_kernel_32( long n, float alpha, float *a, long lda, float *x, float *y) { float *pre = a + lda*3; - __asm __volatile + __asm__ __volatile__ ( "movq %0, %%rax\n\t" // n -> rax "vbroadcastss %1, %%xmm1\n\t" // alpha -> xmm1 @@ -190,21 +190,16 @@ static void sgemv_kernel_32( long n, float alpha, float *a, long lda, float *x, "m" (x), // 4 "m" (y), // 5 "m" (pre) // 6 - : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", - "xmm0" , "xmm1", - "xmm8", "xmm9", "xmm10", "xmm11", - "xmm12", "xmm13", "xmm14", "xmm15", - "memory" ); } -static void sgemv_kernel_16( long n, float alpha, float *a, long lda, float *x, float *y) +static void sgemv_kernel_16( long n, float alpha, float *a, long lda, float *x, float *y) { - float *pre = a + lda*1; + float *pre = a + lda*3; - __asm __volatile + __asm__ __volatile__ ( "movq %0, %%rax\n\t" // n -> rax "vbroadcastss %1, %%ymm1\n\t" // alpha -> ymm1 @@ -248,20 +243,21 @@ static void sgemv_kernel_16( long n, float alpha, float *a, long lda, float *x, "m" (x), // 4 "m" (y), // 5 "m" (pre) // 6 - : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", - "xmm0" , "xmm1", - "xmm12", "xmm13", "xmm14", "xmm15", + : "%rax", "%rcx", "%rdx", "%rsi", "%rdi", "%r8", + "%xmm0", "%xmm1", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); } -static void sgemv_kernel_8( long n, float alpha, float *a, long lda, float *x, float *y) +static void sgemv_kernel_8( long n, float alpha, float *a, long lda, float *x, float *y) { - __asm __volatile + __asm__ __volatile__ ( "movq %0, %%rax\n\t" // n -> rax "vbroadcastss %1, %%ymm1\n\t" // alpha -> ymm1 @@ -295,20 +291,21 @@ static void sgemv_kernel_8( long n, float alpha, float *a, long lda, float *x, f "m" (lda), // 3 "m" (x), // 4 "m" (y) // 5 - : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", - "xmm0" , "xmm1", - "xmm12", "xmm13", "xmm14", "xmm15", + : "%rax", "%rcx", "%rdx", "%rsi", "%rdi", "%r8", + "%xmm0", "%xmm1", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); } -static void sgemv_kernel_4( long n, float alpha, float *a, long lda, float *x, float *y) +static void sgemv_kernel_4( long n, float alpha, float *a, long lda, float *x, float *y) { - __asm __volatile + __asm__ __volatile__ ( "movq %0, %%rax\n\t" // n -> rax "vbroadcastss %1, %%xmm1\n\t" // alpha -> xmm1 @@ -342,19 +339,20 @@ static void sgemv_kernel_4( long n, float alpha, float *a, long lda, float *x, f "m" (lda), // 3 "m" (x), // 4 "m" (y) // 5 - : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", - "xmm0" , "xmm1", - "xmm12", "xmm13", "xmm14", "xmm15", + : "%rax", "%rcx", "%rdx", "%rsi", "%rdi", "%r8", + "%xmm0", "%xmm1", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); } -static void sgemv_kernel_2( long n, float alpha, float *a, long lda, float *x, float *y) +static void sgemv_kernel_2( long n, float alpha, float *a, long lda, float *x, float *y) { - __asm __volatile + __asm__ __volatile__ ( "movq %0, %%rax\n\t" // n -> rax "vmovss %1, %%xmm1\n\t" // alpha -> xmm1 @@ -392,9 +390,10 @@ static void sgemv_kernel_2( long n, float alpha, float *a, long lda, float *x, f "m" (lda), // 3 "m" (x), // 4 "m" (y) // 5 - : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", - "xmm0" , "xmm1", - "xmm12", "xmm13", "xmm14", "xmm15", + : "%rax", "%rcx", "%rdx", "%rsi", "%rdi", "%r8", + "%xmm0", "%xmm1", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); @@ -402,11 +401,11 @@ static void sgemv_kernel_2( long n, float alpha, float *a, long lda, float *x, f -static void sgemv_kernel_1( long n, float alpha, float *a, long lda, float *x, float *y) +static void sgemv_kernel_1( long n, float alpha, float *a, long lda, float *x, float *y) { - __asm __volatile + __asm__ __volatile__ ( "movq %0, %%rax\n\t" // n -> rax "vmovss %1, %%xmm1\n\t" // alpha -> xmm1 @@ -440,9 +439,10 @@ static void sgemv_kernel_1( long n, float alpha, float *a, long lda, float *x, f "m" (lda), // 3 "m" (x), // 4 "m" (y) // 5 - : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", - "xmm0" , "xmm1", - "xmm12", "xmm13", "xmm14", "xmm15", + : "%rax", "%rcx", "%rdx", "%rsi", "%rdi", "%r8", + "%xmm0", "%xmm1", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); -- 2.7.4