Avoid performance penalty in sparc optimized memcpy/memset.

author David S. Miller <davem@davemloft.net>

Thu, 31 May 2012 21:19:30 +0000 (14:19 -0700)

committer David S. Miller <davem@davemloft.net>

Thu, 31 May 2012 21:19:30 +0000 (14:19 -0700)
author David S. Miller <davem@davemloft.net>
Thu, 31 May 2012 21:19:30 +0000 (14:19 -0700)
committer David S. Miller <davem@davemloft.net>
Thu, 31 May 2012 21:19:30 +0000 (14:19 -0700)
diff --git a/ChangeLog b/ChangeLog

index 4d71f26..b0a4689 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2012-05-31  David S. Miller  <davem@davemloft.net>
+
+       * sysdeps/sparc/sparc64/memcpy.S: Use fsrc2 to move 64-bit
+       values between float registers.
+       * sysdeps/sparc/sparc64/memset.S: Likewise.
+       * sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S: Likewise.
+
  2012-05-31  Mike Frysinger  <vapier@gentoo.org>
  
         * debug/Makefile (CFLAGS-tst-longjmp_chk.c): Delete
diff --git a/sysdeps/sparc/sparc64/memcpy.S b/sysdeps/sparc/sparc64/memcpy.S

index 668ebec..8371088 100644 (file)
--- a/sysdeps/sparc/sparc64/memcpy.S
+++ b/sysdeps/sparc/sparc64/memcpy.S
@@ -79,7 +79,7 @@
  #define UNEVEN_VISCHUNK(dest, f0, f1, left)                    \
         subcc           %left, 8, %left;                        \
         bl,pn           %xcc, 205f;                             \
-        fsrc1          %f0, %f1;                               \
+        fsrc2          %f0, %f1;                               \
         ba,a,pt         %xcc, 204f;
  
         /* Macros for non-VIS memcpy code. */
@@ -162,7 +162,7 @@ ENTRY(__memcpy_large)
  3:     andcc           %o0, 0x38, %g5                  /* IEU1         Group           */
  201:   be,pt           %icc, 202f                      /* CTI                          */
          mov            64, %g1                         /* IEU0                         */
-       fmovd           %f0, %f2                        /* FPU                          */
+       fsrc2           %f0, %f2                        /* FPU                          */
         sub             %g1, %g5, %g5                   /* IEU0         Group           */
         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
         ldd             [%g1], %f4                      /* Load         Group           */
@@ -193,7 +193,7 @@ ENTRY(__memcpy_large)
         andn            %o1, (0x40 - 1), %o1            /* IEU1                         */
         and             %g2, 7, %g2                     /* IEU0         Group           */
         andncc          %g3, 0x7, %g3                   /* IEU1                         */
-       fmovd           %f0, %f2                        /* FPU                          */
+       fsrc2           %f0, %f2                        /* FPU                          */
         sub             %g3, 0x10, %g3                  /* IEU0         Group           */
         sub             %o2, %g6, %o2                   /* IEU1                         */
         alignaddr       %g1, %g0, %g0                   /* GRU          Group           */
@@ -541,7 +541,7 @@ ENTRY(memcpy)
          stb            %g5, [%o0 - 1]                  /* Store                        */
  2:     andn            %o2, 7, %g5                     /* IEU0         Group           */
         and             %o2, 7, %o2                     /* IEU1                         */
-       fmovd           %f0, %f2                        /* FPU                          */
+       fsrc2           %f0, %f2                        /* FPU                          */
         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
         ldd             [%g1], %f4                      /* Load         Group           */
  1:     ldd             [%g1 + 0x8], %f6                /* Load         Group           */
diff --git a/sysdeps/sparc/sparc64/memset.S b/sysdeps/sparc/sparc64/memset.S

index b9c52aa..5e92936 100644 (file)
--- a/sysdeps/sparc/sparc64/memset.S
+++ b/sysdeps/sparc/sparc64/memset.S
@@ -109,16 +109,16 @@ ENTRY(memset)
         membar          #StoreStore | #LoadStore
         andcc           %o3, 0xc0, %g5
         and             %o2, 0x3f, %o2
-       fmovd           %f0, %f2
-       fmovd           %f0, %f4
+       fsrc2           %f0, %f2
+       fsrc2           %f0, %f4
         andn            %o3, 0xff, %o3
-       fmovd           %f0, %f6
+       fsrc2           %f0, %f6
         cmp             %g5, 64
-       fmovd           %f0, %f8
-       fmovd           %f0, %f10
-       fmovd           %f0, %f12
+       fsrc2           %f0, %f8
+       fsrc2           %f0, %f10
+       fsrc2           %f0, %f12
         brz,pn          %g5, 10f
-        fmovd          %f0, %f14
+        fsrc2          %f0, %f14
         be,pn           %icc, 2f
          stda           %f0, [%o0 + 0x00] %asi
         cmp             %g5, 128
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S

index 0e9442d..fb815e5 100644 (file)
--- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S
@@ -58,49 +58,49 @@
         faligndata      %x7, %x8, %f14;
  
  #define FREG_MOVE_1(x0) \
-       fmovd           %x0, %f0;
+       fsrc2           %x0, %f0;
  #define FREG_MOVE_2(x0, x1) \
-       fmovd           %x0, %f0; \
-       fmovd           %x1, %f2;
+       fsrc2           %x0, %f0; \
+       fsrc2           %x1, %f2;
  #define FREG_MOVE_3(x0, x1, x2) \
-       fmovd           %x0, %f0; \
-       fmovd           %x1, %f2; \
-       fmovd           %x2, %f4;
+       fsrc2           %x0, %f0; \
+       fsrc2           %x1, %f2; \
+       fsrc2           %x2, %f4;
  #define FREG_MOVE_4(x0, x1, x2, x3) \
-       fmovd           %x0, %f0; \
-       fmovd           %x1, %f2; \
-       fmovd           %x2, %f4; \
-       fmovd           %x3, %f6;
+       fsrc2           %x0, %f0; \
+       fsrc2           %x1, %f2; \
+       fsrc2           %x2, %f4; \
+       fsrc2           %x3, %f6;
  #define FREG_MOVE_5(x0, x1, x2, x3, x4) \
-       fmovd           %x0, %f0; \
-       fmovd           %x1, %f2; \
-       fmovd           %x2, %f4; \
-       fmovd           %x3, %f6; \
-       fmovd           %x4, %f8;
+       fsrc2           %x0, %f0; \
+       fsrc2           %x1, %f2; \
+       fsrc2           %x2, %f4; \
+       fsrc2           %x3, %f6; \
+       fsrc2           %x4, %f8;
  #define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \
-       fmovd           %x0, %f0; \
-       fmovd           %x1, %f2; \
-       fmovd           %x2, %f4; \
-       fmovd           %x3, %f6; \
-       fmovd           %x4, %f8; \
-       fmovd           %x5, %f10;
+       fsrc2           %x0, %f0; \
+       fsrc2           %x1, %f2; \
+       fsrc2           %x2, %f4; \
+       fsrc2           %x3, %f6; \
+       fsrc2           %x4, %f8; \
+       fsrc2           %x5, %f10;
  #define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \
-       fmovd           %x0, %f0; \
-       fmovd           %x1, %f2; \
-       fmovd           %x2, %f4; \
-       fmovd           %x3, %f6; \
-       fmovd           %x4, %f8; \
-       fmovd           %x5, %f10; \
-       fmovd           %x6, %f12;
+       fsrc2           %x0, %f0; \
+       fsrc2           %x1, %f2; \
+       fsrc2           %x2, %f4; \
+       fsrc2           %x3, %f6; \
+       fsrc2           %x4, %f8; \
+       fsrc2           %x5, %f10; \
+       fsrc2           %x6, %f12;
  #define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \
-       fmovd           %x0, %f0; \
-       fmovd           %x1, %f2; \
-       fmovd           %x2, %f4; \
-       fmovd           %x3, %f6; \
-       fmovd           %x4, %f8; \
-       fmovd           %x5, %f10; \
-       fmovd           %x6, %f12; \
-       fmovd           %x7, %f14;
+       fsrc2           %x0, %f0; \
+       fsrc2           %x1, %f2; \
+       fsrc2           %x2, %f4; \
+       fsrc2           %x3, %f6; \
+       fsrc2           %x4, %f8; \
+       fsrc2           %x5, %f10; \
+       fsrc2           %x6, %f12; \
+       fsrc2           %x7, %f14;
  #define FREG_LOAD_1(base, x0) \
         LOAD(ldd, base + 0x00, %x0)
  #define FREG_LOAD_2(base, x0, x1) \
author	David S. Miller <davem@davemloft.net>
	Thu, 31 May 2012 21:19:30 +0000 (14:19 -0700)
committer	David S. Miller <davem@davemloft.net>
	Thu, 31 May 2012 21:19:30 +0000 (14:19 -0700)
ChangeLog		patch \| blob \| history
sysdeps/sparc/sparc64/memcpy.S		patch \| blob \| history
sysdeps/sparc/sparc64/memset.S		patch \| blob \| history
sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S		patch \| blob \| history