From 30b1954abb02aa7ba8136fe728820cd769052efb Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 18 Jul 2009 08:09:39 -0700 Subject: [PATCH] Optimize x86-64 pthread_cond_wait. Instead of actively registering an unwind buffer we now use the exception handling functionality of the gcc runtime. --- nptl/ChangeLog | 6 + .../sysv/linux/x86_64/pthread_cond_timedwait.S | 125 ++++++++ .../unix/sysv/linux/x86_64/pthread_cond_wait.S | 328 ++++++++++++--------- 3 files changed, 315 insertions(+), 144 deletions(-) diff --git a/nptl/ChangeLog b/nptl/ChangeLog index 98d3a4d..33fc8a1 100644 --- a/nptl/ChangeLog +++ b/nptl/ChangeLog @@ -1,3 +1,9 @@ +2009-07-18 Ulrich Drepper + + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S + (__pthread_cond_wait): Convert to using exception handler instead of + registered unwind buffer. + 2009-07-17 Ulrich Drepper * sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S (sem_timedwait): diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S index ddcf106..2b53591 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S @@ -31,6 +31,131 @@ .text + + .align 16 + .type __condvar_cleanup, @function + .globl __condvar_cleanup + .hidden __condvar_cleanup +__condvar_cleanup: + cfi_startproc + pushq %r12 + cfi_adjust_cfa_offset(8) + cfi_rel_offset(%r12, 0) + + /* Get internal lock. */ + movq %rdi, %r8 + movq 8(%rdi), %rdi + movl $1, %esi + xorl %eax, %eax + LOCK +#if cond_lock == 0 + cmpxchgl %esi, (%rdi) +#else + cmpxchgl %esi, cond_lock(%rdi) +#endif + jz 1f + +#if cond_lock != 0 + addq $cond_lock, %rdi +#endif + cmpq $-1, dep_mutex-cond_lock(%rdi) + movl $LLL_PRIVATE, %eax + movl $LLL_SHARED, %esi + cmovne %eax, %esi + callq __lll_lock_wait +#if cond_lock != 0 + subq $cond_lock, %rdi +#endif + +1: movl broadcast_seq(%rdi), %edx + cmpl 4(%r8), %edx + jne 3f + + /* We increment the wakeup_seq counter only if it is lower than + total_seq. If this is not the case the thread was woken and + then canceled. In this case we ignore the signal. */ + movq total_seq(%rdi), %rax + cmpq wakeup_seq(%rdi), %rax + jbe 6f + incq wakeup_seq(%rdi) + incl cond_futex(%rdi) +6: incq woken_seq(%rdi) + +3: subl $(1 << nwaiters_shift), cond_nwaiters(%rdi) + + /* Wake up a thread which wants to destroy the condvar object. */ + xorq %r12, %r12 + cmpq $0xffffffffffffffff, total_seq(%rdi) + jne 4f + movl cond_nwaiters(%rdi), %eax + andl $~((1 << nwaiters_shift) - 1), %eax + jne 4f + + addq $cond_nwaiters, %rdi + cmpq $-1, dep_mutex-cond_nwaiters(%rdi) + movl $1, %edx +#ifdef __ASSUME_PRIVATE_FUTEX + movl $FUTEX_WAKE, %eax + movl $(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi + cmove %eax, %esi +#else + movl $0, %eax + movl %fs:PRIVATE_FUTEX, %esi + cmove %eax, %esi + orl $FUTEX_WAKE, %esi +#endif + movl $SYS_futex, %eax + syscall + subq $cond_nwaiters, %rdi + movl $1, %r12d + +4: LOCK +#if cond_lock == 0 + decl (%rdi) +#else + decl cond_lock(%rdi) +#endif + je 2f +#if cond_lock != 0 + addq $cond_lock, %rdi +#endif + cmpq $-1, dep_mutex-cond_lock(%rdi) + movl $LLL_PRIVATE, %eax + movl $LLL_SHARED, %esi + cmovne %eax, %esi + callq __lll_unlock_wake + + /* Wake up all waiters to make sure no signal gets lost. */ +2: testq %r12, %r12 + jnz 5f + addq $cond_futex, %rdi + cmpq $-1, dep_mutex-cond_futex(%rdi) + movl $0x7fffffff, %edx +#ifdef __ASSUME_PRIVATE_FUTEX + movl $FUTEX_WAKE, %eax + movl $(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi + cmove %eax, %esi +#else + movl $0, %eax + movl %fs:PRIVATE_FUTEX, %esi + cmove %eax, %esi + orl $FUTEX_WAKE, %esi +#endif + movl $SYS_futex, %eax + syscall + +5: movq 16(%r8), %rdi + callq __pthread_mutex_cond_lock + + popq %r12 + cfi_adjust_cfa_offset(-8) + cfi_restore(%r12) + + retq + cfi_endproc + .size __condvar_cleanup, .-__condvar_cleanup + + /* int pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec *abstime) */ .globl __pthread_cond_timedwait diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S index 146a414..c3c879c 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S @@ -28,148 +28,32 @@ .text - .align 16 - .type __condvar_cleanup, @function - .globl __condvar_cleanup - .hidden __condvar_cleanup -__condvar_cleanup: - cfi_startproc - pushq %r12 - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%r12, 0) - - /* Get internal lock. */ - movq %rdi, %r8 - movq 8(%rdi), %rdi - movl $1, %esi - xorl %eax, %eax - LOCK -#if cond_lock == 0 - cmpxchgl %esi, (%rdi) -#else - cmpxchgl %esi, cond_lock(%rdi) -#endif - jz 1f - -#if cond_lock != 0 - addq $cond_lock, %rdi -#endif - cmpq $-1, dep_mutex-cond_lock(%rdi) - movl $LLL_PRIVATE, %eax - movl $LLL_SHARED, %esi - cmovne %eax, %esi - callq __lll_lock_wait -#if cond_lock != 0 - subq $cond_lock, %rdi -#endif - -1: movl broadcast_seq(%rdi), %edx - cmpl 4(%r8), %edx - jne 3f - - /* We increment the wakeup_seq counter only if it is lower than - total_seq. If this is not the case the thread was woken and - then canceled. In this case we ignore the signal. */ - movq total_seq(%rdi), %rax - cmpq wakeup_seq(%rdi), %rax - jbe 6f - incq wakeup_seq(%rdi) - incl cond_futex(%rdi) -6: incq woken_seq(%rdi) - -3: subl $(1 << nwaiters_shift), cond_nwaiters(%rdi) - - /* Wake up a thread which wants to destroy the condvar object. */ - xorq %r12, %r12 - cmpq $0xffffffffffffffff, total_seq(%rdi) - jne 4f - movl cond_nwaiters(%rdi), %eax - andl $~((1 << nwaiters_shift) - 1), %eax - jne 4f - - addq $cond_nwaiters, %rdi - cmpq $-1, dep_mutex-cond_nwaiters(%rdi) - movl $1, %edx -#ifdef __ASSUME_PRIVATE_FUTEX - movl $FUTEX_WAKE, %eax - movl $(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi - cmove %eax, %esi -#else - movl $0, %eax - movl %fs:PRIVATE_FUTEX, %esi - cmove %eax, %esi - orl $FUTEX_WAKE, %esi -#endif - movl $SYS_futex, %eax - syscall - subq $cond_nwaiters, %rdi - movl $1, %r12d - -4: LOCK -#if cond_lock == 0 - decl (%rdi) -#else - decl cond_lock(%rdi) -#endif - je 2f -#if cond_lock != 0 - addq $cond_lock, %rdi -#endif - cmpq $-1, dep_mutex-cond_lock(%rdi) - movl $LLL_PRIVATE, %eax - movl $LLL_SHARED, %esi - cmovne %eax, %esi - callq __lll_unlock_wake - - /* Wake up all waiters to make sure no signal gets lost. */ -2: testq %r12, %r12 - jnz 5f - addq $cond_futex, %rdi - cmpq $-1, dep_mutex-cond_futex(%rdi) - movl $0x7fffffff, %edx -#ifdef __ASSUME_PRIVATE_FUTEX - movl $FUTEX_WAKE, %eax - movl $(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi - cmove %eax, %esi -#else - movl $0, %eax - movl %fs:PRIVATE_FUTEX, %esi - cmove %eax, %esi - orl $FUTEX_WAKE, %esi -#endif - movl $SYS_futex, %eax - syscall - -5: movq 16(%r8), %rdi - callq __pthread_mutex_cond_lock - - popq %r12 - cfi_adjust_cfa_offset(-8) - cfi_restore(%r12) - - retq - cfi_endproc - .size __condvar_cleanup, .-__condvar_cleanup - - /* int pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex) */ .globl __pthread_cond_wait .type __pthread_cond_wait, @function .align 16 __pthread_cond_wait: +.LSTARTCODE: cfi_startproc +#ifdef SHARED + cfi_personality(DW_EH_PE_pcrel | DW_EH_PE_sdata4 | DW_EH_PE_indirect, + DW.ref.__gcc_personality_v0) + cfi_lsda(DW_EH_PE_pcrel | DW_EH_PE_sdata4, .LexceptSTART) +#else + cfi_personality(DW_EH_PE_udata4, __gcc_personality_v0) + cfi_lsda(DW_EH_PE_udata4, .LexceptSTART) +#endif + pushq %r12 cfi_adjust_cfa_offset(8) cfi_rel_offset(%r12, 0) -#define FRAME_SIZE 64 +#define FRAME_SIZE 32 subq $FRAME_SIZE, %rsp cfi_adjust_cfa_offset(FRAME_SIZE) /* Stack frame: - rsp + 64 - +--------------------------+ - rsp + 32 | cleanup buffer | + rsp + 32 +--------------------------+ rsp + 24 | old wake_seq value | +--------------------------+ @@ -216,16 +100,6 @@ __pthread_cond_wait: incl cond_futex(%rdi) addl $(1 << nwaiters_shift), cond_nwaiters(%rdi) - /* Install cancellation handler. */ -#ifdef PIC - leaq __condvar_cleanup(%rip), %rsi -#else - leaq __condvar_cleanup, %rsi -#endif - leaq 32(%rsp), %rdi - movq %rsp, %rdx - callq __pthread_cleanup_push - /* Get and store current wakeup_seq value. */ movq 8(%rsp), %rdi movq wakeup_seq(%rdi), %r9 @@ -243,6 +117,7 @@ __pthread_cond_wait: #endif jne 3f +.LcleanupSTART: 4: callq __pthread_enable_asynccancel movl %eax, (%rsp) @@ -268,6 +143,7 @@ __pthread_cond_wait: movl (%rsp), %edi callq __pthread_disable_asynccancel +.LcleanupEND: /* Lock. */ movq 8(%rsp), %rdi @@ -333,11 +209,7 @@ __pthread_cond_wait: #endif jne 10f - /* Remove cancellation handler. */ -11: movq 32+CLEANUP_PREV(%rsp), %rdx - movq %rdx, %fs:CLEANUP - - movq 16(%rsp), %rdi +11: movq 16(%rsp), %rdi callq __pthread_mutex_cond_lock 14: addq $FRAME_SIZE, %rsp cfi_adjust_cfa_offset(-FRAME_SIZE) @@ -424,7 +296,175 @@ __pthread_cond_wait: 13: movq %r10, %rax jmp 14b - cfi_endproc .size __pthread_cond_wait, .-__pthread_cond_wait versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait, GLIBC_2_3_2) + + + .align 16 + .type __condvar_cleanup1, @function + .globl __condvar_cleanup1 + .hidden __condvar_cleanup1 +__condvar_cleanup1: + /* Stack frame: + + rsp + 40 + +--------------------------+ + rsp + 32 | %r12 | + +--------------------------+ + rsp + 24 | unused | + +--------------------------+ + rsp + 16 | mutex pointer | + +--------------------------+ + rsp + 8 | condvar pointer | + +--------------------------+ + rsp + 4 | old broadcast_seq value | + +--------------------------+ + rsp + 0 | old cancellation mode | + +--------------------------+ + */ + + movq %rax, 24(%rsp) + + /* Get internal lock. */ + movq 8(%rsp), %rdi + movl $1, %esi + xorl %eax, %eax + LOCK +#if cond_lock == 0 + cmpxchgl %esi, (%rdi) +#else + cmpxchgl %esi, cond_lock(%rdi) +#endif + jz 1f + +#if cond_lock != 0 + addq $cond_lock, %rdi +#endif + cmpq $-1, dep_mutex-cond_lock(%rdi) + movl $LLL_PRIVATE, %eax + movl $LLL_SHARED, %esi + cmovne %eax, %esi + callq __lll_lock_wait +#if cond_lock != 0 + subq $cond_lock, %rdi +#endif + +1: movl broadcast_seq(%rdi), %edx + cmpl 4(%rsp), %edx + jne 3f + + /* We increment the wakeup_seq counter only if it is lower than + total_seq. If this is not the case the thread was woken and + then canceled. In this case we ignore the signal. */ + movq total_seq(%rdi), %rax + cmpq wakeup_seq(%rdi), %rax + jbe 6f + incq wakeup_seq(%rdi) + incl cond_futex(%rdi) +6: incq woken_seq(%rdi) + +3: subl $(1 << nwaiters_shift), cond_nwaiters(%rdi) + + /* Wake up a thread which wants to destroy the condvar object. */ + xorq %r12, %r12 + cmpq $0xffffffffffffffff, total_seq(%rdi) + jne 4f + movl cond_nwaiters(%rdi), %eax + andl $~((1 << nwaiters_shift) - 1), %eax + jne 4f + + cmpq $-1, dep_mutex(%rdi) + leaq cond_nwaiters(%rdi), %rdi + movl $1, %edx +#ifdef __ASSUME_PRIVATE_FUTEX + movl $FUTEX_WAKE, %eax + movl $(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi + cmove %eax, %esi +#else + movl $0, %eax + movl %fs:PRIVATE_FUTEX, %esi + cmove %eax, %esi + orl $FUTEX_WAKE, %esi +#endif + movl $SYS_futex, %eax + syscall + subq $cond_nwaiters, %rdi + movl $1, %r12d + +4: LOCK +#if cond_lock == 0 + decl (%rdi) +#else + decl cond_lock(%rdi) +#endif + je 2f +#if cond_lock != 0 + addq $cond_lock, %rdi +#endif + cmpq $-1, dep_mutex-cond_lock(%rdi) + movl $LLL_PRIVATE, %eax + movl $LLL_SHARED, %esi + cmovne %eax, %esi + callq __lll_unlock_wake + + /* Wake up all waiters to make sure no signal gets lost. */ +2: testq %r12, %r12 + jnz 5f + addq $cond_futex, %rdi + cmpq $-1, dep_mutex-cond_futex(%rdi) + movl $0x7fffffff, %edx +#ifdef __ASSUME_PRIVATE_FUTEX + movl $FUTEX_WAKE, %eax + movl $(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi + cmove %eax, %esi +#else + movl $0, %eax + movl %fs:PRIVATE_FUTEX, %esi + cmove %eax, %esi + orl $FUTEX_WAKE, %esi +#endif + movl $SYS_futex, %eax + syscall + +5: movq 16(%rsp), %rdi + callq __pthread_mutex_cond_lock + + movq 24(%rsp), %rdi + movq 32(%rsp), %r12 +.LcallUR: + call _Unwind_Resume@PLT + hlt +.LENDCODE: + cfi_endproc + .size __condvar_cleanup1, .-__condvar_cleanup1 + + + .section .gcc_except_table,"a",@progbits +.LexceptSTART: + .byte DW_EH_PE_omit # @LPStart format + .byte DW_EH_PE_omit # @TType format + .byte DW_EH_PE_uleb128 # call-site format + .uleb128 .Lcstend-.Lcstbegin +.Lcstbegin: + .uleb128 .LcleanupSTART-.LSTARTCODE + .uleb128 .LcleanupEND-.LcleanupSTART + .uleb128 __condvar_cleanup1-.LSTARTCODE + .uleb128 0 + .uleb128 .LcallUR-.LSTARTCODE + .uleb128 .LENDCODE-.LcallUR + .uleb128 0 + .uleb128 0 +.Lcstend: + + +#ifdef SHARED + .hidden DW.ref.__gcc_personality_v0 + .weak DW.ref.__gcc_personality_v0 + .section .gnu.linkonce.d.DW.ref.__gcc_personality_v0,"aw",@progbits + .align 8 + .type DW.ref.__gcc_personality_v0, @object + .size DW.ref.__gcc_personality_v0, 8 +DW.ref.__gcc_personality_v0: + .quad __gcc_personality_v0 +#endif -- 2.7.4