PowerPC LE setjmp/longjmp

author Anton Blanchard <anton@au1.ibm.com>

Sat, 17 Aug 2013 09:04:40 +0000 (18:34 +0930)

committer Alan Modra <amodra@gmail.com>

Fri, 4 Oct 2013 01:07:59 +0000 (10:37 +0930)
author Anton Blanchard <anton@au1.ibm.com>
Sat, 17 Aug 2013 09:04:40 +0000 (18:34 +0930)
committer Alan Modra <amodra@gmail.com>
Fri, 4 Oct 2013 01:07:59 +0000 (10:37 +0930)
diff --git a/ChangeLog b/ChangeLog

index 89f5105..c106857 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2013-10-04  Anton Blanchard <anton@au1.ibm.com>
+           Alistair Popple <alistair@ozlabs.au.ibm.com>
+           Alan Modra <amodra@gmail.com>
+
+       [BZ #15723]
+       * sysdeps/powerpc/jmpbuf-offsets.h: Comment fix.
+       * sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S: Correct
+       _dl_hwcap access for little-endian.
+       * sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Likewise.  Don't
+       destroy vmx regs when saving unaligned.
+       * sysdeps/powerpc/powerpc64/__longjmp-common.S: Correct CR load.
+       * sysdeps/powerpc/powerpc64/setjmp-common.S: Likewise CR save.  Don't
+       destroy vmx regs when saving unaligned.
+
  2013-10-04  Alan Modra  <amodra@gmail.com>
  
         * sysdeps/powerpc/powerpc32/power4/hp-timing.h (HP_TIMING_NOW):
diff --git a/sysdeps/powerpc/jmpbuf-offsets.h b/sysdeps/powerpc/jmpbuf-offsets.h

index 64c658a..f2116bd 100644 (file)
--- a/sysdeps/powerpc/jmpbuf-offsets.h
+++ b/sysdeps/powerpc/jmpbuf-offsets.h
@@ -21,12 +21,10 @@
  #define JB_LR     2  /* The address we will return to */
  #if __WORDSIZE == 64
  # define JB_GPRS   3  /* GPRs 14 through 31 are saved, 18*2 words total.  */
-# define JB_CR     21 /* Condition code registers with the VRSAVE at */
-                       /* offset 172 (low half of the double word.  */
+# define JB_CR     21 /* Shared dword with VRSAVE.  CR word at offset 172.  */
  # define JB_FPRS   22 /* FPRs 14 through 31 are saved, 18*2 words total.  */
  # define JB_SIZE   (64 * 8) /* As per PPC64-VMX ABI.  */
-# define JB_VRSAVE 21 /* VRSAVE shares a double word with the CR at offset */
-                       /* 168 (high half of the double word).  */
+# define JB_VRSAVE 21 /* Shared dword with CR.  VRSAVE word at offset 168.  */
  # define JB_VRS    40 /* VRs 20 through 31 are saved, 12*4 words total.  */
  #else
  # define JB_GPRS   3  /* GPRs 14 through 31 are saved, 18 in total.  */
diff --git a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S

index 9d34cd9..d02aa57 100644 (file)
--- a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
+++ b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
@@ -43,16 +43,16 @@ ENTRY (__longjmp)
  #   endif
         mtlr    r6
         cfi_same_value (lr)
-       lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
+       lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
  #  else
         lwz     r5,_dl_hwcap@got(r5)
         mtlr    r6
         cfi_same_value (lr)
-       lwz     r5,4(r5)
+       lwz     r5,LOWORD(r5)
  #  endif
  # else
-       lis     r5,(_dl_hwcap+4)@ha
-       lwz     r5,(_dl_hwcap+4)@l(r5)
+       lis     r5,(_dl_hwcap+LOWORD)@ha
+       lwz     r5,(_dl_hwcap+LOWORD)@l(r5)
  # endif
         andis.  r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
         beq     L(no_vmx)
diff --git a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S

index 46ea2b0..f324406 100644 (file)
--- a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
+++ b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
@@ -94,14 +94,14 @@ ENTRY (__sigsetjmp)
  #   else
         lwz     r5,_rtld_global_ro@got(r5)
  #   endif
-       lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
+       lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
  #  else
         lwz     r5,_dl_hwcap@got(r5)
-       lwz     r5,4(r5)
+       lwz     r5,LOWORD(r5)
  #  endif
  # else
-       lis     r6,(_dl_hwcap+4)@ha
-       lwz     r5,(_dl_hwcap+4)@l(r6)
+       lis     r6,(_dl_hwcap+LOWORD)@ha
+       lwz     r5,(_dl_hwcap+LOWORD)@l(r6)
  # endif
         andis.  r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
         beq     L(no_vmx)
@@ -111,44 +111,43 @@ ENTRY (__sigsetjmp)
         stw     r0,((JB_VRSAVE)*4)(3)
         addi    r6,r5,16
         beq+    L(aligned_save_vmx)
-       lvsr    v0,0,r5
-       vspltisb v1,-1         /* set v1 to all 1's */
-       vspltisb v2,0          /* set v2 to all 0's */
-       vperm   v3,v2,v1,v0   /* v3 contains shift mask with num all 1 bytes on left = misalignment  */
  
+       lvsr    v0,0,r5
+       lvsl    v1,0,r5
+       addi    r6,r5,-16
  
-       /* Special case for v20 we need to preserve what is in save area below v20 before obliterating it */
-       lvx     v5,0,r5
-       vperm   v20,v20,v20,v0
-       vsel    v5,v5,v20,v3
-       vsel    v20,v20,v2,v3
-       stvx    v5,0,r5
+# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
+       addi    addgpr,addgpr,32;                                        \
+       vperm   tmpvr,prevvr,savevr,shiftvr;                             \
+       stvx    tmpvr,0,savegpr
  
-#define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
-       addi    addgpr,addgpr,32; \
-       vperm   savevr,savevr,savevr,shiftvr; \
-       vsel    hivr,prev_savevr,savevr,maskvr; \
-       stvx    hivr,0,savegpr;
+       /*
+        * We have to be careful not to corrupt the data below v20 and
+        * above v31. To keep things simple we just rotate both ends in
+        * the opposite direction to our main permute so we can use
+        * the common macro.
+        */
  
-       save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
-       save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
-       save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
-       save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
-       save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
-       save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
-       save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
-       save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
-       save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
-       save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
+       /* load and rotate data below v20 */
+       lvx     v2,0,r5
+       vperm   v2,v2,v2,v1
+       save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
+       save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
+       save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
+       save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
+       save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
+       save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
+       save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
+       save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
+       save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
+       save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
+       save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
+       save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
+       /* load and rotate data above v31 */
+       lvx     v2,0,r6
+       vperm   v2,v2,v2,v1
+       save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
  
-       /* Special case for r31 we need to preserve what is in save area above v31 before obliterating it */
-       addi    r5,r5,32
-       vperm   v31,v31,v31,v0
-       lvx     v4,0,r5
-       vsel    v5,v30,v31,v3
-       stvx    v5,0,r6
-       vsel    v4,v31,v4,v3
-       stvx    v4,0,r5
         b       L(no_vmx)
  
  L(aligned_save_vmx):
diff --git a/sysdeps/powerpc/powerpc64/__longjmp-common.S b/sysdeps/powerpc/powerpc64/__longjmp-common.S

index 70c3704..4f1e3c8 100644 (file)
--- a/sysdeps/powerpc/powerpc64/__longjmp-common.S
+++ b/sysdeps/powerpc/powerpc64/__longjmp-common.S
@@ -57,7 +57,7 @@ ENTRY (__longjmp)
         beq     L(no_vmx)
         la      r5,((JB_VRS)*8)(3)
         andi.   r6,r5,0xf
-       lwz     r0,((JB_VRSAVE)*8)(3)
+       lwz     r0,((JB_VRSAVE)*8)(3)   /* 32-bit VRSAVE.  */
         mtspr   VRSAVE,r0
         beq+    L(aligned_restore_vmx)
         addi    r6,r5,16
@@ -153,7 +153,7 @@ L(no_vmx):
         lfd fp21,((JB_FPRS+7)*8)(r3)
         ld r22,((JB_GPRS+8)*8)(r3)
         lfd fp22,((JB_FPRS+8)*8)(r3)
-       ld r0,(JB_CR*8)(r3)
+       lwz r0,((JB_CR*8)+4)(r3)        /* 32-bit CR.  */
         ld r23,((JB_GPRS+9)*8)(r3)
         lfd fp23,((JB_FPRS+9)*8)(r3)
         ld r24,((JB_GPRS+10)*8)(r3)
diff --git a/sysdeps/powerpc/powerpc64/setjmp-common.S b/sysdeps/powerpc/powerpc64/setjmp-common.S

index 58ec610..1829b9a 100644 (file)
--- a/sysdeps/powerpc/powerpc64/setjmp-common.S
+++ b/sysdeps/powerpc/powerpc64/setjmp-common.S
@@ -95,7 +95,7 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)):
         mfcr r0
         std  r16,((JB_GPRS+2)*8)(3)
         stfd fp16,((JB_FPRS+2)*8)(3)
-       std  r0,(JB_CR*8)(3)
+       stw  r0,((JB_CR*8)+4)(3)        /* 32-bit CR.  */
         std  r17,((JB_GPRS+3)*8)(3)
         stfd fp17,((JB_FPRS+3)*8)(3)
         std  r18,((JB_GPRS+4)*8)(3)
@@ -139,50 +139,46 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)):
         la      r5,((JB_VRS)*8)(3)
         andi.   r6,r5,0xf
         mfspr   r0,VRSAVE
-       stw     r0,((JB_VRSAVE)*8)(3)
+       stw     r0,((JB_VRSAVE)*8)(3)   /* 32-bit VRSAVE.  */
         addi    r6,r5,16
         beq+    L(aligned_save_vmx)
-       lvsr    v0,0,r5
-       vspltisb v1,-1         /* set v1 to all 1's */
-       vspltisb v2,0          /* set v2 to all 0's */
-       vperm   v3,v2,v1,v0   /* v3 contains shift mask with num all 1 bytes
-                                on left = misalignment  */
  
+       lvsr    v0,0,r5
+       lvsl    v1,0,r5
+       addi    r6,r5,-16
  
-       /* Special case for v20 we need to preserve what is in save area
-          below v20 before obliterating it */
-       lvx     v5,0,r5
-       vperm   v20,v20,v20,v0
-       vsel    v5,v5,v20,v3
-       vsel    v20,v20,v2,v3
-       stvx    v5,0,r5
+# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
+       addi    addgpr,addgpr,32;                                        \
+       vperm   tmpvr,prevvr,savevr,shiftvr;                             \
+       stvx    tmpvr,0,savegpr
  
-# define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
-       addi    addgpr,addgpr,32; \
-       vperm   savevr,savevr,savevr,shiftvr; \
-       vsel    hivr,prev_savevr,savevr,maskvr; \
-       stvx    hivr,0,savegpr;
+       /*
+        * We have to be careful not to corrupt the data below v20 and
+        * above v31. To keep things simple we just rotate both ends in
+        * the opposite direction to our main permute so we can use
+        * the common macro.
+        */
  
-       save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
-       save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
-       save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
-       save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
-       save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
-       save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
-       save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
-       save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
-       save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
-       save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
+       /* load and rotate data below v20 */
+       lvx     v2,0,r5
+       vperm   v2,v2,v2,v1
+       save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
+       save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
+       save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
+       save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
+       save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
+       save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
+       save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
+       save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
+       save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
+       save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
+       save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
+       save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
+       /* load and rotate data above v31 */
+       lvx     v2,0,r6
+       vperm   v2,v2,v2,v1
+       save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
  
-       /* Special case for r31 we need to preserve what is in save area
-          above v31 before obliterating it */
-       addi    r5,r5,32
-       vperm   v31,v31,v31,v0
-       lvx     v4,0,r5
-       vsel    v5,v30,v31,v3
-       stvx    v5,0,r6
-       vsel    v4,v31,v4,v3
-       stvx    v4,0,r5
         b       L(no_vmx)
  
  L(aligned_save_vmx):
author	Anton Blanchard <anton@au1.ibm.com>
	Sat, 17 Aug 2013 09:04:40 +0000 (18:34 +0930)
committer	Alan Modra <amodra@gmail.com>
	Fri, 4 Oct 2013 01:07:59 +0000 (10:37 +0930)
ChangeLog		patch \| blob \| history
sysdeps/powerpc/jmpbuf-offsets.h		patch \| blob \| history
sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S		patch \| blob \| history
sysdeps/powerpc/powerpc32/fpu/setjmp-common.S		patch \| blob \| history
sysdeps/powerpc/powerpc64/__longjmp-common.S		patch \| blob \| history
sysdeps/powerpc/powerpc64/setjmp-common.S		patch \| blob \| history