From: Martin Kroeker Date: Thu, 17 Jan 2019 22:20:32 +0000 (+0100) Subject: Fix declaration of input arguments in the x86_64 microkernels for DOT and AXPY (... X-Git-Tag: v0.3.6^2~76 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d5e6940253b2ee638509de283b8b1d7695fefbbf;p=platform%2Fupstream%2Fopenblas.git Fix declaration of input arguments in the x86_64 microkernels for DOT and AXPY (#1965) * Tag operands 0 and 1 as both input and output For #1964 (basically a continuation of coding problems first seen in #1292) --- diff --git a/kernel/x86_64/caxpy_microk_bulldozer-2.c b/kernel/x86_64/caxpy_microk_bulldozer-2.c index 33bda09..ca22093 100644 --- a/kernel/x86_64/caxpy_microk_bulldozer-2.c +++ b/kernel/x86_64/caxpy_microk_bulldozer-2.c @@ -114,9 +114,9 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha), // 4 @@ -180,10 +180,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha), // 4 diff --git a/kernel/x86_64/caxpy_microk_haswell-2.c b/kernel/x86_64/caxpy_microk_haswell-2.c index 00e2e6a..b605ea3 100644 --- a/kernel/x86_64/caxpy_microk_haswell-2.c +++ b/kernel/x86_64/caxpy_microk_haswell-2.c @@ -112,9 +112,9 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha), // 4 diff --git a/kernel/x86_64/caxpy_microk_sandy-2.c b/kernel/x86_64/caxpy_microk_sandy-2.c index a798fd9..72d37af 100644 --- a/kernel/x86_64/caxpy_microk_sandy-2.c +++ b/kernel/x86_64/caxpy_microk_sandy-2.c @@ -95,10 +95,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha), // 4 diff --git a/kernel/x86_64/caxpy_microk_steamroller-2.c b/kernel/x86_64/caxpy_microk_steamroller-2.c index 87370b0..7ca7af0 100644 --- a/kernel/x86_64/caxpy_microk_steamroller-2.c +++ b/kernel/x86_64/caxpy_microk_steamroller-2.c @@ -113,10 +113,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha), // 4 @@ -181,9 +181,9 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha), // 4 diff --git a/kernel/x86_64/cdot_microk_bulldozer-2.c b/kernel/x86_64/cdot_microk_bulldozer-2.c index f587aa0..1186559 100644 --- a/kernel/x86_64/cdot_microk_bulldozer-2.c +++ b/kernel/x86_64/cdot_microk_bulldozer-2.c @@ -97,9 +97,9 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 @@ -175,10 +175,10 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovups %%xmm4, 16(%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/cdot_microk_haswell-2.c b/kernel/x86_64/cdot_microk_haswell-2.c index fe195a6..8b9d6d1 100644 --- a/kernel/x86_64/cdot_microk_haswell-2.c +++ b/kernel/x86_64/cdot_microk_haswell-2.c @@ -98,9 +98,9 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/cdot_microk_sandy-2.c b/kernel/x86_64/cdot_microk_sandy-2.c index 0181691..fe142c3 100644 --- a/kernel/x86_64/cdot_microk_sandy-2.c +++ b/kernel/x86_64/cdot_microk_sandy-2.c @@ -105,10 +105,10 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovups %%xmm4, 16(%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/cdot_microk_steamroller-2.c b/kernel/x86_64/cdot_microk_steamroller-2.c index 76a3aa0..7350b21 100644 --- a/kernel/x86_64/cdot_microk_steamroller-2.c +++ b/kernel/x86_64/cdot_microk_steamroller-2.c @@ -97,9 +97,9 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 @@ -175,10 +175,10 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovups %%xmm4, 16(%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/daxpy_microk_bulldozer-2.c b/kernel/x86_64/daxpy_microk_bulldozer-2.c index 8c520dc..9c1305b 100644 --- a/kernel/x86_64/daxpy_microk_bulldozer-2.c +++ b/kernel/x86_64/daxpy_microk_bulldozer-2.c @@ -64,9 +64,9 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha) // 4 diff --git a/kernel/x86_64/daxpy_microk_haswell-2.c b/kernel/x86_64/daxpy_microk_haswell-2.c index bbe8b95..f3682e6 100644 --- a/kernel/x86_64/daxpy_microk_haswell-2.c +++ b/kernel/x86_64/daxpy_microk_haswell-2.c @@ -59,10 +59,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha) // 4 diff --git a/kernel/x86_64/daxpy_microk_nehalem-2.c b/kernel/x86_64/daxpy_microk_nehalem-2.c index 943d893..8feb9f2 100644 --- a/kernel/x86_64/daxpy_microk_nehalem-2.c +++ b/kernel/x86_64/daxpy_microk_nehalem-2.c @@ -73,9 +73,9 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha) // 4 diff --git a/kernel/x86_64/daxpy_microk_piledriver-2.c b/kernel/x86_64/daxpy_microk_piledriver-2.c index 95eb953..4b83124 100644 --- a/kernel/x86_64/daxpy_microk_piledriver-2.c +++ b/kernel/x86_64/daxpy_microk_piledriver-2.c @@ -78,10 +78,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "subq $16, %1 \n\t" "jnz 1b \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha) // 4 @@ -140,10 +140,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "subq $16, %1 \n\t" "jnz 1b \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha) // 4 diff --git a/kernel/x86_64/daxpy_microk_sandy-2.c b/kernel/x86_64/daxpy_microk_sandy-2.c index 85e038c..db9a45d 100644 --- a/kernel/x86_64/daxpy_microk_sandy-2.c +++ b/kernel/x86_64/daxpy_microk_sandy-2.c @@ -99,10 +99,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha) // 4 diff --git a/kernel/x86_64/daxpy_microk_steamroller-2.c b/kernel/x86_64/daxpy_microk_steamroller-2.c index e400090..8e63fcc 100644 --- a/kernel/x86_64/daxpy_microk_steamroller-2.c +++ b/kernel/x86_64/daxpy_microk_steamroller-2.c @@ -78,10 +78,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "subq $16, %1 \n\t" "jnz 1b \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha) // 4 @@ -140,10 +140,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "subq $16, %1 \n\t" "jnz 1b \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha) // 4 diff --git a/kernel/x86_64/ddot_microk_bulldozer-2.c b/kernel/x86_64/ddot_microk_bulldozer-2.c index 9756ee4..5590c5b 100644 --- a/kernel/x86_64/ddot_microk_bulldozer-2.c +++ b/kernel/x86_64/ddot_microk_bulldozer-2.c @@ -65,10 +65,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovsd %%xmm4, (%4) \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/ddot_microk_haswell-2.c b/kernel/x86_64/ddot_microk_haswell-2.c index 3657373..dbb5487 100644 --- a/kernel/x86_64/ddot_microk_haswell-2.c +++ b/kernel/x86_64/ddot_microk_haswell-2.c @@ -77,9 +77,9 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/ddot_microk_nehalem-2.c b/kernel/x86_64/ddot_microk_nehalem-2.c index fb5ec9b..e5e234e 100644 --- a/kernel/x86_64/ddot_microk_nehalem-2.c +++ b/kernel/x86_64/ddot_microk_nehalem-2.c @@ -75,10 +75,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "movsd %%xmm4, (%4) \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/ddot_microk_piledriver-2.c b/kernel/x86_64/ddot_microk_piledriver-2.c index ac95088..cc4bcd9 100644 --- a/kernel/x86_64/ddot_microk_piledriver-2.c +++ b/kernel/x86_64/ddot_microk_piledriver-2.c @@ -81,10 +81,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovsd %%xmm4, (%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 @@ -145,10 +145,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovsd %%xmm4, (%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/ddot_microk_sandy-2.c b/kernel/x86_64/ddot_microk_sandy-2.c index 160f956..84493ec 100644 --- a/kernel/x86_64/ddot_microk_sandy-2.c +++ b/kernel/x86_64/ddot_microk_sandy-2.c @@ -81,10 +81,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovsd %%xmm4, (%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/ddot_microk_steamroller-2.c b/kernel/x86_64/ddot_microk_steamroller-2.c index 5ce20b5..27d5244 100644 --- a/kernel/x86_64/ddot_microk_steamroller-2.c +++ b/kernel/x86_64/ddot_microk_steamroller-2.c @@ -78,10 +78,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovsd %%xmm4, (%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/saxpy_microk_haswell-2.c b/kernel/x86_64/saxpy_microk_haswell-2.c index 3a743d6..7099ba4 100644 --- a/kernel/x86_64/saxpy_microk_haswell-2.c +++ b/kernel/x86_64/saxpy_microk_haswell-2.c @@ -59,10 +59,10 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha) // 4 diff --git a/kernel/x86_64/saxpy_microk_nehalem-2.c b/kernel/x86_64/saxpy_microk_nehalem-2.c index 68f68ea..88bbb69 100644 --- a/kernel/x86_64/saxpy_microk_nehalem-2.c +++ b/kernel/x86_64/saxpy_microk_nehalem-2.c @@ -73,9 +73,9 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha) // 4 diff --git a/kernel/x86_64/saxpy_microk_piledriver-2.c b/kernel/x86_64/saxpy_microk_piledriver-2.c index 204cf8b..5feea7f 100644 --- a/kernel/x86_64/saxpy_microk_piledriver-2.c +++ b/kernel/x86_64/saxpy_microk_piledriver-2.c @@ -78,10 +78,10 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha) // 4 @@ -139,10 +139,10 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha) // 4 diff --git a/kernel/x86_64/saxpy_microk_sandy-2.c b/kernel/x86_64/saxpy_microk_sandy-2.c index 0a6bef0..0d448d5 100644 --- a/kernel/x86_64/saxpy_microk_sandy-2.c +++ b/kernel/x86_64/saxpy_microk_sandy-2.c @@ -99,10 +99,10 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha) // 4 diff --git a/kernel/x86_64/sdot_microk_bulldozer-2.c b/kernel/x86_64/sdot_microk_bulldozer-2.c index 36e61b0..8958a33 100644 --- a/kernel/x86_64/sdot_microk_bulldozer-2.c +++ b/kernel/x86_64/sdot_microk_bulldozer-2.c @@ -66,10 +66,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovss %%xmm4, (%4) \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/sdot_microk_haswell-2.c b/kernel/x86_64/sdot_microk_haswell-2.c index df367b6..91dc928 100644 --- a/kernel/x86_64/sdot_microk_haswell-2.c +++ b/kernel/x86_64/sdot_microk_haswell-2.c @@ -79,10 +79,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovss %%xmm4, (%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/sdot_microk_nehalem-2.c b/kernel/x86_64/sdot_microk_nehalem-2.c index 1a27177..5a715d0 100644 --- a/kernel/x86_64/sdot_microk_nehalem-2.c +++ b/kernel/x86_64/sdot_microk_nehalem-2.c @@ -75,10 +75,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "movss %%xmm4, (%4) \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/sdot_microk_sandy-2.c b/kernel/x86_64/sdot_microk_sandy-2.c index ca13536..ae25d5a 100644 --- a/kernel/x86_64/sdot_microk_sandy-2.c +++ b/kernel/x86_64/sdot_microk_sandy-2.c @@ -82,10 +82,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovss %%xmm4, (%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/sdot_microk_steamroller-2.c b/kernel/x86_64/sdot_microk_steamroller-2.c index 6b8b256..bf6a5f2 100644 --- a/kernel/x86_64/sdot_microk_steamroller-2.c +++ b/kernel/x86_64/sdot_microk_steamroller-2.c @@ -80,10 +80,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovss %%xmm4, (%4) \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 @@ -143,10 +143,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovss %%xmm4, (%4) \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/zaxpy_microk_bulldozer-2.c b/kernel/x86_64/zaxpy_microk_bulldozer-2.c index 0e15761..15d3679 100644 --- a/kernel/x86_64/zaxpy_microk_bulldozer-2.c +++ b/kernel/x86_64/zaxpy_microk_bulldozer-2.c @@ -113,10 +113,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha), // 4 @@ -180,10 +180,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha), // 4 diff --git a/kernel/x86_64/zaxpy_microk_haswell-2.c b/kernel/x86_64/zaxpy_microk_haswell-2.c index 30e8b19..89d23da 100644 --- a/kernel/x86_64/zaxpy_microk_haswell-2.c +++ b/kernel/x86_64/zaxpy_microk_haswell-2.c @@ -111,10 +111,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha), // 4 diff --git a/kernel/x86_64/zaxpy_microk_sandy-2.c b/kernel/x86_64/zaxpy_microk_sandy-2.c index 233af14..17b8b24 100644 --- a/kernel/x86_64/zaxpy_microk_sandy-2.c +++ b/kernel/x86_64/zaxpy_microk_sandy-2.c @@ -99,10 +99,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha), // 4 @@ -176,10 +176,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha), // 4 diff --git a/kernel/x86_64/zaxpy_microk_steamroller-2.c b/kernel/x86_64/zaxpy_microk_steamroller-2.c index 728d092..907b1ae 100644 --- a/kernel/x86_64/zaxpy_microk_steamroller-2.c +++ b/kernel/x86_64/zaxpy_microk_steamroller-2.c @@ -113,10 +113,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha), // 4 @@ -180,10 +180,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "jnz 1b \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (alpha), // 4 diff --git a/kernel/x86_64/zdot_microk_bulldozer-2.c b/kernel/x86_64/zdot_microk_bulldozer-2.c index 30a9552..db9a48c 100644 --- a/kernel/x86_64/zdot_microk_bulldozer-2.c +++ b/kernel/x86_64/zdot_microk_bulldozer-2.c @@ -96,10 +96,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovups %%xmm4, 16(%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 @@ -175,10 +175,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovups %%xmm4, 16(%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/zdot_microk_haswell-2.c b/kernel/x86_64/zdot_microk_haswell-2.c index 11056a3..9f2fc2c 100644 --- a/kernel/x86_64/zdot_microk_haswell-2.c +++ b/kernel/x86_64/zdot_microk_haswell-2.c @@ -101,10 +101,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovups %%xmm4, 16(%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 @@ -186,10 +186,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovups %%xmm4, 16(%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/zdot_microk_sandy-2.c b/kernel/x86_64/zdot_microk_sandy-2.c index 87c5b03..33415e2 100644 --- a/kernel/x86_64/zdot_microk_sandy-2.c +++ b/kernel/x86_64/zdot_microk_sandy-2.c @@ -107,10 +107,10 @@ if ( n < 1280 ) "vmovups %%xmm4, 16(%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 @@ -199,10 +199,10 @@ if ( n < 1280 ) "vmovups %%xmm4, 16(%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 diff --git a/kernel/x86_64/zdot_microk_steamroller-2.c b/kernel/x86_64/zdot_microk_steamroller-2.c index 325f74a..87138fe 100644 --- a/kernel/x86_64/zdot_microk_steamroller-2.c +++ b/kernel/x86_64/zdot_microk_steamroller-2.c @@ -95,10 +95,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovups %%xmm4, 16(%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4 @@ -172,10 +172,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vmovups %%xmm4, 16(%4) \n\t" "vzeroupper \n\t" - : - : - "r" (i), // 0 - "r" (n), // 1 + : + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (dot) // 4