From 42e69bcf1137fccfd7a95645a9d316c6490b9ff9 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sun, 19 Jul 2009 20:56:40 -0700 Subject: [PATCH] Support requeueing for condvars using PI mutex. x86-64 only. Add support for the new FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI options of futex. --- nptl/ChangeLog | 9 +++ nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h | 2 + .../sysv/linux/x86_64/pthread_cond_broadcast.S | 26 ++++++-- .../unix/sysv/linux/x86_64/pthread_cond_signal.S | 47 +++++++++++---- .../sysv/linux/x86_64/pthread_cond_timedwait.S | 69 ++++++++++++++++++---- .../unix/sysv/linux/x86_64/pthread_cond_wait.S | 60 ++++++++++++++++--- 6 files changed, 177 insertions(+), 36 deletions(-) diff --git a/nptl/ChangeLog b/nptl/ChangeLog index 785100d..c747be4 100644 --- a/nptl/ChangeLog +++ b/nptl/ChangeLog @@ -1,5 +1,14 @@ 2009-07-19 Ulrich Drepper + * sysdeps/unix/sysv/linux/x86_64/lowlevellock.h: Define + FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI. + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S: If mutex + is a PI mutex, then use FUTEX_CMP_REQUEUE_PI. + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S: Likewise. + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: If mutex + is a PI mutex, then use FUTEX_WAIT_REQUEUE_PI. + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise. + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S (__pthread_cond_timedwait): Make more robust. diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h index 0b7e3bb..9b15bfb 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h @@ -54,6 +54,8 @@ #define FUTEX_TRYLOCK_PI 8 #define FUTEX_WAIT_BITSET 9 #define FUTEX_WAKE_BITSET 10 +#define FUTEX_WAIT_REQUEUE_PI 11 +#define FUTEX_CMP_REQUEUE_PI 12 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S index 6155255..0f10ec9 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 +/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 2002. @@ -70,12 +70,14 @@ __pthread_cond_broadcast: 8: cmpq $-1, %r8 je 9f - /* XXX: The kernel so far doesn't support requeue to PI futex. */ - /* XXX: The kernel only supports FUTEX_CMP_REQUEUE to the same - type of futex (private resp. shared). */ - testl $(PI_BIT | PS_BIT), MUTEX_KIND(%r8) + /* Do not use requeue for pshared condvars. */ + testl $PS_BIT, MUTEX_KIND(%r8) jne 9f + /* Requeue to a PI mutex if the PI bit is set. */ + testl $PI_BIT, MUTEX_KIND(%r8) + jne 81f + /* Wake up all threads. */ #ifdef __ASSUME_PRIVATE_FUTEX movl $(FUTEX_CMP_REQUEUE|FUTEX_PRIVATE_FLAG), %esi @@ -97,6 +99,20 @@ __pthread_cond_broadcast: 10: xorl %eax, %eax retq + /* Wake up all threads. */ +81: movl $(FUTEX_CMP_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi + movl $SYS_futex, %eax + movl $1, %edx + movl $0x7fffffff, %r10d + syscall + + /* For any kind of error, which mainly is EAGAIN, we try again + with WAKE. The general test also covers running on old + kernels. */ + cmpq $-4095, %rax + jb 10b + jmp 9f + .align 16 /* Unlock. */ 4: LOCK diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S index 8f65f2c..f1050fe 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc. +/* Copyright (C) 2002-2005, 2007, 2009 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 2002. @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -56,19 +57,23 @@ __pthread_cond_signal: /* Wake up one thread. */ cmpq $-1, dep_mutex(%r8) + movl $FUTEX_WAKE_OP, %esi movl $1, %edx + movl $SYS_futex, %eax + je 8f + + /* Get the address of the mutex used. */ + movq dep_mutex(%r8), %rcx + testl $PI_BIT, MUTEX_KIND(%rcx) + jne 9f + #ifdef __ASSUME_PRIVATE_FUTEX - movl $FUTEX_WAKE_OP, %eax movl $(FUTEX_WAKE_OP|FUTEX_PRIVATE_FLAG), %esi - cmove %eax, %esi #else - movl $0, %eax - movl %fs:PRIVATE_FUTEX, %esi - cmove %eax, %esi - orl $FUTEX_WAKE_OP, %esi + orl %fs:PRIVATE_FUTEX, %esi #endif - movl $1, %r10d - movl $SYS_futex, %eax + +8: movl $1, %r10d #if cond_lock != 0 addq $cond_lock, %r8 #endif @@ -85,9 +90,27 @@ __pthread_cond_signal: xorl %eax, %eax retq -7: /* %esi should be either FUTEX_WAKE_OP or - FUTEX_WAKE_OP|FUTEX_PRIVATE_FLAG from the previous syscall. */ - xorl $(FUTEX_WAKE ^ FUTEX_WAKE_OP), %esi + /* Wake up one thread and requeue none in the PI Mutex case. */ +9: movl $(FUTEX_CMP_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi + movq %rcx, %r8 + xorq %r10, %r10 + movl (%rdi), %r9d // XXX Can this be right? + syscall + + leaq -cond_futex(%rdi), %r8 + + /* For any kind of error, we try again with WAKE. + The general test also covers running on old kernels. */ + cmpq $-4095, %rax + jb 4f + +7: +#ifdef __ASSUME_PRIVATE_FUTEX + andl $FUTEX_PRIVATE_FLAG, %esi +#else + andl %fs:PRIVATE_FUTEX, %esi +#endif + orl $FUTEX_WAKE, %esi movl $SYS_futex, %eax /* %rdx should be 1 already from $FUTEX_WAKE_OP syscall. movl $1, %edx */ diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S index 1b19fdb..f81466e 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -58,6 +59,9 @@ __pthread_cond_timedwait: pushq %r14 cfi_adjust_cfa_offset(8) cfi_rel_offset(%r14, 0) + pushq %r15 + cfi_adjust_cfa_offset(8) + cfi_rel_offset(%r15, 0) #ifdef __ASSUME_FUTEX_CLOCK_REALTIME # define FRAME_SIZE 32 #else @@ -160,9 +164,41 @@ __pthread_cond_timedwait: movl $FUTEX_WAIT_BITSET, %eax movl $(FUTEX_WAIT_BITSET|FUTEX_PRIVATE_FLAG), %esi cmove %eax, %esi + je 60f + + movq dep_mutex(%rdi), %r8 + /* Requeue to a PI mutex if the PI bit is set. */ + testl $PI_BIT, MUTEX_KIND(%r8) + je 60f + + movl $(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi + xorl %eax, %eax /* The following only works like this because we only support two clocks, represented using a single bit. */ + testl $1, cond_nwaiters(%rdi) + movl $FUTEX_CLOCK_REALTIME, %edx + cmove %edx, %eax + orl %eax, %esi + movq %r12, %rdx + addq $cond_futex, %rdi + movl $SYS_futex, %eax + syscall + + movl $1, %r15d +#ifdef __ASSUME_REQUEUE_PI + jmp 62f +#else + cmpq $-4095, %rax + jnae 62f + + movl $(FUTEX_WAIT_BITSET|FUTEX_PRIVATE_FLAG), %esi + subq $cond_futex, %rdi +#endif + +60: xorl %r15d, %r15d xorl %eax, %eax + /* The following only works like this because we only support + two clocks, represented using a single bit. */ testl $1, cond_nwaiters(%rdi) movl $FUTEX_CLOCK_REALTIME, %edx movl $0xffffffff, %r9d @@ -172,7 +208,7 @@ __pthread_cond_timedwait: addq $cond_futex, %rdi movl $SYS_futex, %eax syscall - movq %rax, %r14 +62: movq %rax, %r14 movl (%rsp), %edi callq __pthread_disable_asynccancel @@ -253,14 +289,23 @@ __pthread_cond_timedwait: #endif jne 40f -41: movq 16(%rsp), %rdi + /* If requeue_pi is used the kernel performs the locking of the + mutex. */ +41: xorl %eax, %eax + testl %r15d, %r15d + jnz 63f + + movq 16(%rsp), %rdi callq __pthread_mutex_cond_lock - testq %rax, %rax +63: testq %rax, %rax cmoveq %r14, %rax 48: addq $FRAME_SIZE, %rsp cfi_adjust_cfa_offset(-FRAME_SIZE) + popq %r15 + cfi_adjust_cfa_offset(-8) + cfi_restore(%r15) popq %r14 cfi_adjust_cfa_offset(-8) cfi_restore(%r14) @@ -274,10 +319,11 @@ __pthread_cond_timedwait: retq /* Initial locking failed. */ -31: cfi_adjust_cfa_offset(3 * 8 + FRAME_SIZE) - cfi_rel_offset(%r12, FRAME_SIZE + 16) - cfi_rel_offset(%r13, FRAME_SIZE + 8) - cfi_rel_offset(%r14, FRAME_SIZE) +31: cfi_adjust_cfa_offset(4 * 8 + FRAME_SIZE) + cfi_rel_offset(%r12, FRAME_SIZE + 24) + cfi_rel_offset(%r13, FRAME_SIZE + 16) + cfi_rel_offset(%r14, FRAME_SIZE + 8) + cfi_rel_offset(%r15, FRAME_SIZE) #if cond_lock != 0 addq $cond_lock, %rdi #endif @@ -353,6 +399,8 @@ __pthread_cond_timedwait: #ifndef __ASSUME_FUTEX_CLOCK_REALTIME .Lreltmo: + xorl %r15d, %r15d + /* Get internal lock. */ movl $1, %esi xorl %eax, %eax @@ -716,9 +764,10 @@ __condvar_cleanup2: callq __pthread_mutex_cond_lock movq 24(%rsp), %rdi - movq FRAME_SIZE(%rsp), %r14 - movq FRAME_SIZE+8(%rsp), %r13 - movq FRAME_SIZE+16(%rsp), %r12 + movq FRAME_SIZE(%rsp), %r15 + movq FRAME_SIZE+8(%rsp), %r14 + movq FRAME_SIZE+16(%rsp), %r13 + movq FRAME_SIZE+24(%rsp), %r12 .LcallUR: call _Unwind_Resume@PLT hlt diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S index c3c879c..e6323ea 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -47,6 +48,9 @@ __pthread_cond_wait: pushq %r12 cfi_adjust_cfa_offset(8) cfi_rel_offset(%r12, 0) + pushq %r13 + cfi_adjust_cfa_offset(8) + cfi_rel_offset(%r13, 0) #define FRAME_SIZE 32 subq $FRAME_SIZE, %rsp cfi_adjust_cfa_offset(FRAME_SIZE) @@ -124,24 +128,48 @@ __pthread_cond_wait: movq 8(%rsp), %rdi xorq %r10, %r10 movq %r12, %rdx - addq $cond_futex-cond_lock, %rdi + // XXX reverse + lea + addq $cond_futex, %rdi cmpq $-1, dep_mutex-cond_futex(%rdi) #ifdef __ASSUME_PRIVATE_FUTEX movl $FUTEX_WAIT, %eax movl $(FUTEX_WAIT|FUTEX_PRIVATE_FLAG), %esi cmove %eax, %esi #else - movl $FUTEX_WAIT, %eax + movl $0, %eax movl %fs:PRIVATE_FUTEX, %esi cmove %eax, %esi # if FUTEX_WAIT != 0 +# error "cc destroyed by following orl" orl $FUTEX_WAIT, %esi # endif #endif + je 60f + + movq dep_mutex-cond_futex(%rdi), %r8 + /* Requeue to a PI mutex if the PI bit is set. */ + testl $PI_BIT, MUTEX_KIND(%r8) + je 60f + + movl $(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi + movl $SYS_futex, %eax + syscall + + movl $1, %r13d +#ifdef __ASSUME_REQUEUE_PI + jmp 62f +#else + cmpq $-4095, %rax + jnae 62f + + movl $(FUTEX_WAIT|FUTEX_PRIVATE_FLAG), %esi +#endif + +60: xorl %r13d, %r13d movl $SYS_futex, %eax syscall - movl (%rsp), %edi +62: movl (%rsp), %edi callq __pthread_disable_asynccancel .LcleanupEND: @@ -209,11 +237,21 @@ __pthread_cond_wait: #endif jne 10f -11: movq 16(%rsp), %rdi + /* If requeue_pi is used the kernel performs the locking of the + mutex. */ +11: xorl %eax, %eax + testl %r13d, %r13d + jnz 14f + + movq 16(%rsp), %rdi callq __pthread_mutex_cond_lock + 14: addq $FRAME_SIZE, %rsp cfi_adjust_cfa_offset(-FRAME_SIZE) + popq %r13 + cfi_adjust_cfa_offset(-8) + cfi_restore(%r13) popq %r12 cfi_adjust_cfa_offset(-8) cfi_restore(%r12) @@ -223,8 +261,9 @@ __pthread_cond_wait: /* Initial locking failed. */ 1: - cfi_adjust_cfa_offset(8 + FRAME_SIZE) - cfi_rel_offset(%r12, FRAME_SIZE) + cfi_adjust_cfa_offset(16 + FRAME_SIZE) + cfi_rel_offset(%r12, FRAME_SIZE + 8) + cfi_rel_offset(%r13, FRAME_SIZE) #if cond_lock != 0 addq $cond_lock, %rdi #endif @@ -308,9 +347,11 @@ versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait, __condvar_cleanup1: /* Stack frame: - rsp + 40 + rsp + 48 + +--------------------------+ + rsp + 40 | %r12 | +--------------------------+ - rsp + 32 | %r12 | + rsp + 32 | %r13 | +--------------------------+ rsp + 24 | unused | +--------------------------+ @@ -431,7 +472,8 @@ __condvar_cleanup1: callq __pthread_mutex_cond_lock movq 24(%rsp), %rdi - movq 32(%rsp), %r12 + movq 40(%rsp), %r12 + movq 32(%rsp), %r13 .LcallUR: call _Unwind_Resume@PLT hlt -- 2.7.4