USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-static void sgemv_kernel_64( long n, float alpha, float *a, long lda, float *x, float *y)
+static void sgemv_kernel_64( long n, float alpha, float *a, long lda, float *x, float *y)
{
float *pre = a + lda*3;
- __asm __volatile
+ __asm__ __volatile__
(
"movq %0, %%rax\n\t" // n -> rax
"vbroadcastss %1, %%ymm1\n\t" // alpha -> ymm1
"m" (x), // 4
"m" (y), // 5
"m" (pre) // 6
- : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11",
- "xmm0" , "xmm1",
- "xmm8", "xmm9", "xmm10", "xmm11",
- "xmm12", "xmm13", "xmm14", "xmm15",
+ : "%rax", "%rcx", "%rdx", "%rsi", "%rdi", "%r8",
+ "%xmm0", "%xmm1",
+ "%xmm8", "%xmm9", "%xmm10", "%xmm11",
+ "%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);
-static void sgemv_kernel_32( long n, float alpha, float *a, long lda, float *x, float *y)
+static void sgemv_kernel_32( long n, float alpha, float *a, long lda, float *x, float *y)
{
float *pre = a + lda*3;
- __asm __volatile
+ __asm__ __volatile__
(
"movq %0, %%rax\n\t" // n -> rax
"vbroadcastss %1, %%xmm1\n\t" // alpha -> xmm1
"m" (x), // 4
"m" (y), // 5
"m" (pre) // 6
- : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11",
- "xmm0" , "xmm1",
- "xmm8", "xmm9", "xmm10", "xmm11",
- "xmm12", "xmm13", "xmm14", "xmm15",
- "memory"
);
}
-static void sgemv_kernel_16( long n, float alpha, float *a, long lda, float *x, float *y)
+static void sgemv_kernel_16( long n, float alpha, float *a, long lda, float *x, float *y)
{
- float *pre = a + lda*1;
+ float *pre = a + lda*3;
- __asm __volatile
+ __asm__ __volatile__
(
"movq %0, %%rax\n\t" // n -> rax
"vbroadcastss %1, %%ymm1\n\t" // alpha -> ymm1
"m" (x), // 4
"m" (y), // 5
"m" (pre) // 6
- : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11",
- "xmm0" , "xmm1",
- "xmm12", "xmm13", "xmm14", "xmm15",
+ : "%rax", "%rcx", "%rdx", "%rsi", "%rdi", "%r8",
+ "%xmm0", "%xmm1",
+ "%xmm8", "%xmm9", "%xmm10", "%xmm11",
+ "%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);
}
-static void sgemv_kernel_8( long n, float alpha, float *a, long lda, float *x, float *y)
+static void sgemv_kernel_8( long n, float alpha, float *a, long lda, float *x, float *y)
{
- __asm __volatile
+ __asm__ __volatile__
(
"movq %0, %%rax\n\t" // n -> rax
"vbroadcastss %1, %%ymm1\n\t" // alpha -> ymm1
"m" (lda), // 3
"m" (x), // 4
"m" (y) // 5
- : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11",
- "xmm0" , "xmm1",
- "xmm12", "xmm13", "xmm14", "xmm15",
+ : "%rax", "%rcx", "%rdx", "%rsi", "%rdi", "%r8",
+ "%xmm0", "%xmm1",
+ "%xmm8", "%xmm9", "%xmm10", "%xmm11",
+ "%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);
}
-static void sgemv_kernel_4( long n, float alpha, float *a, long lda, float *x, float *y)
+static void sgemv_kernel_4( long n, float alpha, float *a, long lda, float *x, float *y)
{
- __asm __volatile
+ __asm__ __volatile__
(
"movq %0, %%rax\n\t" // n -> rax
"vbroadcastss %1, %%xmm1\n\t" // alpha -> xmm1
"m" (lda), // 3
"m" (x), // 4
"m" (y) // 5
- : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11",
- "xmm0" , "xmm1",
- "xmm12", "xmm13", "xmm14", "xmm15",
+ : "%rax", "%rcx", "%rdx", "%rsi", "%rdi", "%r8",
+ "%xmm0", "%xmm1",
+ "%xmm8", "%xmm9", "%xmm10", "%xmm11",
+ "%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);
}
-static void sgemv_kernel_2( long n, float alpha, float *a, long lda, float *x, float *y)
+static void sgemv_kernel_2( long n, float alpha, float *a, long lda, float *x, float *y)
{
- __asm __volatile
+ __asm__ __volatile__
(
"movq %0, %%rax\n\t" // n -> rax
"vmovss %1, %%xmm1\n\t" // alpha -> xmm1
"m" (lda), // 3
"m" (x), // 4
"m" (y) // 5
- : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11",
- "xmm0" , "xmm1",
- "xmm12", "xmm13", "xmm14", "xmm15",
+ : "%rax", "%rcx", "%rdx", "%rsi", "%rdi", "%r8",
+ "%xmm0", "%xmm1",
+ "%xmm8", "%xmm9", "%xmm10", "%xmm11",
+ "%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);
-static void sgemv_kernel_1( long n, float alpha, float *a, long lda, float *x, float *y)
+static void sgemv_kernel_1( long n, float alpha, float *a, long lda, float *x, float *y)
{
- __asm __volatile
+ __asm__ __volatile__
(
"movq %0, %%rax\n\t" // n -> rax
"vmovss %1, %%xmm1\n\t" // alpha -> xmm1
"m" (lda), // 3
"m" (x), // 4
"m" (y) // 5
- : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11",
- "xmm0" , "xmm1",
- "xmm12", "xmm13", "xmm14", "xmm15",
+ : "%rax", "%rcx", "%rdx", "%rsi", "%rdi", "%r8",
+ "%xmm0", "%xmm1",
+ "%xmm8", "%xmm9", "%xmm10", "%xmm11",
+ "%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);