+2004-06-13 Kaz Kojima <kkojima@rr.iij4u.or.jp>
+
+ * sysdeps/unix/sysv/linux/sh/bits/pthreadtypes.h (pthread_cond_t):
+ Add __data.__futex field, reshuffle __data.__clock.
+ * sysdeps/unix/sysv/linux/sh/pthread_cond_signal.S
+ (__pthread_cond_signal): Increment __futex at the same time as
+ __wakeup_seq or __total_seq. Pass address of __futex instead of
+ address of low 32-bits of __wakeup_seq to futex syscall.
+ * sysdeps/unix/sysv/linux/sh/pthread_cond_wait.S
+ (__pthread_cond_wait): Likewise. Pass __futex value from before
+ releasing internal lock to FUTEX_WAIT.
+ * sysdeps/unix/sysv/linux/sh/pthread_cond_timedwait.S
+ (__pthread_cond_timedwait): Likewise.
+ * sysdeps/unix/sysv/linux/sh/pthread_cond_broadcast.S
+ (FUTEX_CMP_REQUEUE): Define.
+ (__pthread_cond_broadcast): Set __futex to 2 * __total_seq.
+ Use FUTEX_CMP_REQUEUE operation instead of FUTEX_REQUEUE.
+ Pass __futex value from before the unlock and __futex address instead
+ of address of low 32-bits of __wakeup_seq to futex syscall.
+ Fallback to FUTEX_WAKE all on any errors.
+
2004-06-08 Jakub Jelinek <jakub@redhat.com>
* pthread_mutexattr_getpshared.c (pthread_mutex_getpshared): Fix
struct
{
int __lock;
- int __clock;
+ unsigned int __futex;
unsigned long long int __total_seq;
unsigned long long int __wakeup_seq;
unsigned long long int __woken_seq;
void *__mutex;
+ int __clock;
unsigned int __broadcast_seq;
} __data;
char __size[__SIZEOF_PTHREAD_COND_T];
#define FUTEX_WAIT 0
#define FUTEX_WAKE 1
#define FUTEX_REQUEUE 3
+#define FUTEX_CMP_REQUEUE 4
#define EINVAL 22
.type __pthread_cond_broadcast, @function
.align 5
__pthread_cond_broadcast:
+ mov.l r10, @-r15
mov.l r9, @-r15
mov.l r8, @-r15
sts.l pr, @-r15
mov.l @(broadcast_seq,r8), r2
add #1, r2
mov.l r2, @(broadcast_seq,r8)
+ add r1, r1
+ mov r1, r10
+ mov.l r10, @(cond_futex,r8)
/* Get the address of the mutex used. */
mov.l @(dep_mutex,r8), r9
cmp/eq r0, r9
mov r8, r4
bt/s 9f
- add #wakeup_seq, r4
+ add #cond_futex, r4
/* Wake up all threads. */
- mov #FUTEX_REQUEUE, r5
+ mov #FUTEX_CMP_REQUEUE, r5
mov #1, r6
mov #-1, r7
shlr r7 /* r7 = 0x7fffffff */
# if MUTEX_FUTEX != 0
add #MUTEX_FUTEX, r0
# endif
+ mov r10, r1
mov #SYS_futex, r3
extu.b r3, r3
- trapa #0x15
+ trapa #0x16
SYSCALL_INST_PAD
+ /* For any kind of error, which mainly is EAGAIN, we try again
+ with WAKE. The general test also covers running on old
+ kernels. */
+ mov r0, r1
+ mov #-12, r2
+ shad r2, r1
+ not r1, r1
+ tst r1, r1
+ mov r8, r4
+ bt/s 9f
+ add #cond_futex, r4
+
10:
mov #0, r0
lds.l @r15+, pr
mov.l @r15+, r8
+ mov.l @r15+, r9
rts
- mov.l @r15+, r9
+ mov.l @r15+, r10
4:
/* Unlock. */
mov #0, r0
lds.l @r15+, pr
mov.l @r15+, r8
+ mov.l @r15+, r9
rts
- mov.l @r15+, r9
+ mov.l @r15+, r10
1:
/* Initial locking failed. */
addc r3, r1
mov.l r0,@(wakeup_seq,r8)
mov.l r1,@(wakeup_seq+4,r8)
+ mov.l @(cond_futex,r8),r0
+ add r2, r0
+ mov.l r0,@(cond_futex,r8)
/* Wake up one thread. */
mov r8, r4
- add #wakeup_seq, r4
+ add #cond_futex, r4
mov #FUTEX_WAKE, r5
mov #1, r6
mov #0, r7
addc r3, r1
mov.l r0,@(total_seq,r8)
mov.l r1,@(total_seq+4,r8)
+ mov.l @(cond_futex,r8), r0
+ add r2, r0
+ mov.l r0, @(cond_futex,r8)
+
/* Get and store current wakeup_seq value. */
mov.l @(wakeup_seq,r8), r10
/* Store relative timeout. */
mov.l r2, @(16,r15)
mov.l r3, @(20,r15)
+ mov.l @(cond_futex,r8), r1
+ mov.l r1, @(8,r15)
/* Unlock. */
#if cond_lock != 0
mov r15, r7
add #16, r7
mov #FUTEX_WAIT, r5
- mov r10, r6
+ mov.l @(8,r15), r6
mov r8, r4
- add #wakeup_seq, r4
+ add #cond_futex, r4
mov #SYS_futex, r3
extu.b r3, r3
trapa #0x14
addc r3, r1
mov.l r0,@(wakeup_seq,r8)
mov.l r1,@(wakeup_seq+4,r8)
+ mov.l @(cond_futex,r8),r0
+ add r2, r0
+ mov.l r0,@(cond_futex,r8)
mov #ETIMEDOUT, r0
bra 14f
mov.l r0, @(24,r15)
addc r3, r1
mov.l r0,@(wakeup_seq,r8)
mov.l r1,@(wakeup_seq+4,r8)
+ mov.l @(cond_futex,r8),r0
+ add r2, r0
+ mov.l r0,@(cond_futex,r8)
clrt
mov.l @(woken_seq,r8),r0
2:
/* Wake up all waiters to make sure no signal gets lost. */
mov r8, r4
- add #wakeup_seq, r4
+ add #cond_futex, r4
mov #FUTEX_WAKE, r5
mov #-1, r6
shlr r6 /* r6 = 0x7fffffff */
addc r3, r1
mov.l r0,@(total_seq,r8)
mov.l r1,@(total_seq+4,r8)
+ mov.l @(cond_futex,r8),r0
+ add r2, r0
+ mov.l r0,@(cond_futex,r8)
/* Get and store current wakeup_seq value. */
mov.l @(wakeup_seq,r8), r10
mov.l r0, @(4,r15)
8:
+ mov.l @(cond_futex,r8),r0
+ mov.l r0, @(8,r15)
+
/* Unlock. */
#if cond_lock != 0
DEC (@(cond_lock,r8), r2)
mov #0, r7
mov #FUTEX_WAIT, r5
- mov r10, r6
+ mov.l @(8,r15), r6
mov r8, r4
- add #wakeup_seq, r4
+ add #cond_futex, r4
mov #SYS_futex, r3
extu.b r3, r3
trapa #0x14
addc r3, r1
mov.l r0,@(wakeup_seq,r8)
mov.l r1,@(wakeup_seq+4,r8)
+ mov.l @(cond_futex,r8),r0
+ add r2, r0
+ mov.l r0,@(cond_futex,r8)
clrt
mov.l @(woken_seq,r8),r0
2:
/* Wake up all waiters to make sure no signal gets lost. */
mov r8, r4
- add #wakeup_seq, r4
+ add #cond_futex, r4
mov #FUTEX_WAKE, r5
mov #-1, r6
shlr r6 /* r6 = 0x7fffffff */
#include "math_private.h"
/*********************************************************************/
-/* An ultimate aqrt routine. Given an IEEE double machine number x */
+/* An ultimate sqrt routine. Given an IEEE double machine number x */
/* it computes the correctly rounded (to nearest) value of square */
/* root of x. */
/*********************************************************************/
rt1 = 4.99999999495955425917856814202739E-01,
rt2 = 3.75017500867345182581453026130850E-01,
rt3 = 3.12523626554518656309172508769531E-01;
- static const double big = 134217728.0, big1 = 134217729.0;
+ static const double big = 134217728.0;
double y,t,del,res,res1,hy,z,zz,p,hx,tx,ty,s;
mynumber a,c={{0,0}};
int4 k;
}
}
else {
- if (k>0x7ff00000) /* x -> infinity */
- return (big1-big1)/(big-big);
- if (k<0x00100000) { /* x -> -infinity */
- if (x==0) return x;
- if (k<0) return (big1-big1)/(big-big);
- else return tm256.x*__ieee754_sqrt(x*t512.x);
- }
- else return (a.i[LOW_HALF]==0)?x:(big1-big1)/(big-big);
+ if ((k & 0x7ff00000) == 0x7ff00000)
+ return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
+ if (x==0) return x; /* sqrt(+0)=+0, sqrt(-0)=-0 */
+ if (k<0) return (x-x)/(x-x); /* sqrt(-ve)=sNaN */
+ return tm256.x*__ieee754_sqrt(x*t512.x);
}
}