2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
8 * Native Client threads library
16 #include <sys/errno.h>
17 #include <sys/unistd.h>
19 #include "native_client/src/include/nacl_base.h"
21 #include "native_client/src/untrusted/irt/irt_interfaces.h"
22 #include "native_client/src/untrusted/nacl/nacl_irt.h"
23 #include "native_client/src/untrusted/nacl/tls.h"
24 #include "native_client/src/untrusted/nacl/tls_params.h"
25 #include "native_client/src/untrusted/pthread/pthread.h"
26 #include "native_client/src/untrusted/pthread/pthread_internal.h"
27 #include "native_client/src/untrusted/pthread/pthread_types.h"
29 #include "native_client/src/untrusted/valgrind/dynamic_annotations.h"
31 #if defined(NACL_IN_IRT)
32 # include "native_client/src/untrusted/irt/irt_private.h"
35 #define FUN_TO_VOID_PTR(a) ((void *) (uintptr_t) (a))
38 * ABI tables for underyling NaCl thread interfaces.
40 static struct nacl_irt_thread irt_thread;
43 * These days, the thread_create() syscall/IRT call will align the
44 * stack for us, but for compatibility with older, released x86
45 * versions of NaCl where thread_create() does not align the stack, we
46 * align the stack ourselves.
49 static const uint32_t kStackAlignment = 32;
50 static const uint32_t kStackPadBelowAlign = 4; /* Return address size */
51 #elif defined(__x86_64__)
52 static const uint32_t kStackAlignment = 32;
53 static const uint32_t kStackPadBelowAlign = 8; /* Return address size */
55 static const uint32_t kStackAlignment = 1;
56 static const uint32_t kStackPadBelowAlign = 0;
59 typedef struct nc_thread_cleanup_handler {
60 struct nc_thread_cleanup_handler *previous;
61 void (*handler_function)(void *arg);
63 } nc_thread_cleanup_handler;
65 static __thread nc_thread_cleanup_handler *__nc_cleanup_handlers = NULL;
67 #define TDB_SIZE (sizeof(struct nc_combined_tdb))
69 static inline char *align(uint32_t offset, uint32_t alignment) {
70 return (char *) ((offset + alignment - 1) & ~(alignment - 1));
73 /* Thread management global variables. */
74 const int __nc_kMaxCachedMemoryBlocks = 50;
76 int __nc_thread_initialized;
78 /* Mutex used to synchronize thread management code. */
79 pthread_mutex_t __nc_thread_management_lock;
82 * Condition variable that gets signaled when all the threads
83 * except the main thread have terminated.
85 static pthread_cond_t __nc_last_thread_cond;
86 pthread_t __nc_initial_thread_id;
88 /* Number of threads currently running in this NaCl module. */
89 int __nc_running_threads_counter = 1;
91 /* We have two queues of memory blocks - one for each type. */
92 STAILQ_HEAD(tailhead, entry) __nc_thread_memory_blocks[2];
93 /* We need a counter for each queue to keep track of number of blocks. */
94 int __nc_memory_block_counter[2];
96 /* Internal functions */
98 static inline void nc_abort(void) {
99 while (1) *(volatile int *) 0 = 0; /* Crash. */
102 static inline nc_thread_descriptor_t *nc_get_tdb(void) {
104 * Fetch the thread-specific data pointer. This is usually just
105 * a wrapper around __libnacl_irt_tls.tls_get() but we don't use
106 * that here so that the IRT build can override the definition.
108 return (void *) ((char *) __nacl_read_tp_inline()
109 + __nacl_tp_tdb_offset(TDB_SIZE));
112 static void nc_thread_starter(void) {
113 nc_thread_descriptor_t *tdb = nc_get_tdb();
114 __newlib_thread_init();
115 #if defined(NACL_IN_IRT)
116 g_is_irt_internal_thread = 1;
118 void *retval = tdb->start_func(tdb->state);
121 * Free handler list to prevent memory leak in case function returns
122 * without calling pthread_cleanup_pop(), although doing that is unspecified
125 while (NULL != __nc_cleanup_handlers) {
126 pthread_cleanup_pop(0);
129 /* If the function returns, terminate the thread. */
130 pthread_exit(retval);
132 /* TODO(gregoryd) - add assert */
135 static nc_thread_memory_block_t *nc_allocate_memory_block_mu(
136 nc_thread_memory_block_type_t type,
138 struct tailhead *head;
139 nc_thread_memory_block_t *node;
140 /* Assume the lock is held!!! */
141 if (type >= MAX_MEMORY_TYPE)
143 head = &__nc_thread_memory_blocks[type];
145 /* We need to know the size even if we find a free node - to memset it to 0 */
147 case THREAD_STACK_MEMORY:
148 required_size = required_size + kStackAlignment - 1;
150 case TLS_AND_TDB_MEMORY:
152 case MAX_MEMORY_TYPE:
157 if (!STAILQ_EMPTY(head)) {
158 /* Try to get one from queue. */
159 nc_thread_memory_block_t *node = STAILQ_FIRST(head);
162 * On average the memory blocks will be marked as not used in the same order
163 * as they are added to the queue, therefore there is no need to check the
164 * next queue entries if the first one is still in use.
166 if (0 == node->is_used && node->size >= required_size) {
168 * This will only re-use the first node possibly, and could be
169 * improved to provide the stack with a best-fit algorithm if needed.
170 * TODO: we should scan all nodes to see if there is one that fits
171 * before allocating another.
172 * http://code.google.com/p/nativeclient/issues/detail?id=1569
174 int size = node->size;
175 STAILQ_REMOVE_HEAD(head, entries);
176 --__nc_memory_block_counter[type];
178 memset(node, 0,sizeof(*node));
184 while (__nc_memory_block_counter[type] > __nc_kMaxCachedMemoryBlocks) {
186 * We have too many blocks in the queue - try to release some.
187 * The maximum number of memory blocks to keep in the queue
188 * is almost arbitrary and can be tuned.
189 * The main limitation is that if we keep too many
190 * blocks in the queue, the NaCl app will run out of memory,
191 * since the default thread stack size is 512K.
192 * TODO(gregoryd): we might give up reusing stack entries once we
193 * support variable stack size.
195 nc_thread_memory_block_t *tmp = STAILQ_FIRST(head);
196 if (0 == tmp->is_used) {
197 STAILQ_REMOVE_HEAD(head, entries);
198 --__nc_memory_block_counter[type];
202 * Stop once we find a block that is still in use,
203 * since probably there is no point to continue.
210 /* No available blocks of the required type/size - allocate one. */
211 node = malloc(MEMORY_BLOCK_ALLOCATION_SIZE(required_size));
213 memset(node, 0, sizeof(*node));
214 node->size = required_size;
220 static void nc_free_memory_block_mu(nc_thread_memory_block_type_t type,
221 nc_thread_memory_block_t *node) {
222 /* Assume the lock is held!!! */
223 struct tailhead *head = &__nc_thread_memory_blocks[type];
224 STAILQ_INSERT_TAIL(head, node, entries);
225 ++__nc_memory_block_counter[type];
228 static void nc_release_basic_data_mu(nc_basic_thread_data_t *basic_data) {
229 /* join_condvar can be initialized only if tls_node exists. */
230 pthread_cond_destroy(&basic_data->join_condvar);
234 static void nc_release_tls_node(nc_thread_memory_block_t *block,
235 nc_thread_descriptor_t *tdb) {
237 if (NULL != tdb->basic_data) {
238 tdb->basic_data->tdb = NULL;
241 nc_free_memory_block_mu(TLS_AND_TDB_MEMORY, block);
245 /* Initialize a newly allocated TDB to some default values. */
246 static void nc_tdb_init(nc_thread_descriptor_t *tdb,
247 nc_basic_thread_data_t *basic_data) {
249 tdb->joinable = PTHREAD_CREATE_JOINABLE;
250 tdb->join_waiting = 0;
251 tdb->stack_node = NULL;
252 tdb->tls_node = NULL;
253 tdb->start_func = NULL;
255 tdb->irt_thread_data = NULL;
256 tdb->basic_data = basic_data;
258 basic_data->retval = NULL;
259 basic_data->status = THREAD_RUNNING;
260 if (pthread_cond_init(&basic_data->join_condvar, NULL) != 0)
262 basic_data->tdb = tdb;
265 /* Initializes all globals except for the initial thread structure. */
266 void __nc_initialize_globals(void) {
268 * Fetch the ABI tables from the IRT. If we don't have these, all is lost.
270 __nc_initialize_interfaces(&irt_thread);
272 if (pthread_mutex_init(&__nc_thread_management_lock, NULL) != 0)
276 * Tell ThreadSanitizer to not generate happens-before arcs between uses of
277 * this mutex. Otherwise we miss to many real races.
278 * When not running under ThreadSanitizer, this is just a call to an empty
281 ANNOTATE_NOT_HAPPENS_BEFORE_MUTEX(&__nc_thread_management_lock);
283 if (pthread_cond_init(&__nc_last_thread_cond, NULL) != 0)
285 STAILQ_INIT(&__nc_thread_memory_blocks[0]);
286 STAILQ_INIT(&__nc_thread_memory_blocks[1]);
288 __nc_thread_initialized = 1;
291 #if defined(NACL_IN_IRT)
294 * This is used by the IRT for user threads. We initialize all fields
295 * so that we get predictable behaviour in case some IRT code does an
296 * unsupported pthread operation on a user thread.
298 void __nc_initialize_unjoinable_thread(struct nc_combined_tdb *tdb) {
299 nc_tdb_init(&tdb->tdb, &tdb->basic_data);
300 tdb->tdb.joinable = 0;
306 * Will be called from the library startup code,
307 * which always happens on the application's main thread.
309 void __pthread_initialize(void) {
310 __pthread_initialize_minimal(TDB_SIZE);
312 struct nc_combined_tdb *tdb = (struct nc_combined_tdb *) nc_get_tdb();
313 nc_tdb_init(&tdb->tdb, &tdb->basic_data);
314 __nc_initial_thread_id = &tdb->basic_data;
316 __nc_initialize_globals();
322 /* pthread functions */
324 int pthread_create(pthread_t *thread_id,
325 const pthread_attr_t *attr,
326 void *(*start_routine)(void *),
330 /* Declare the variables outside of the while scope. */
331 nc_thread_memory_block_t *stack_node = NULL;
332 char *thread_stack = NULL;
333 nc_thread_descriptor_t *new_tdb = NULL;
334 nc_basic_thread_data_t *new_basic_data = NULL;
335 nc_thread_memory_block_t *tls_node = NULL;
336 size_t stacksize = PTHREAD_STACK_DEFAULT;
339 /* TODO(gregoryd) - right now a single lock is used, try to optimize? */
340 pthread_mutex_lock(&__nc_thread_management_lock);
343 /* Allocate the combined TLS + TDB block---see tls.h for explanation. */
345 tls_node = nc_allocate_memory_block_mu(TLS_AND_TDB_MEMORY,
346 __nacl_tls_combined_size(TDB_SIZE));
347 if (NULL == tls_node)
350 new_tp = __nacl_tls_initialize_memory(nc_memory_block_to_payload(tls_node),
353 new_tdb = (nc_thread_descriptor_t *)
354 ((char *) new_tp + __nacl_tp_tdb_offset(TDB_SIZE));
357 * TODO(gregoryd): consider creating a pool of basic_data structs,
358 * similar to stack and TLS+TDB (probably when adding the support for
359 * variable stack size).
361 new_basic_data = malloc(sizeof(*new_basic_data));
362 if (NULL == new_basic_data) {
364 * The tdb should be zero intialized.
365 * This just re-emphasizes this requirement.
367 new_tdb->basic_data = NULL;
371 nc_tdb_init(new_tdb, new_basic_data);
372 new_tdb->tls_node = tls_node;
375 * All the required members of the tdb must be initialized before
376 * the thread is started and actually before the global lock is released,
377 * since another thread can call pthread_join() or pthread_detach().
379 new_tdb->start_func = start_routine;
380 new_tdb->state = arg;
382 new_tdb->joinable = attr->joinable;
383 stacksize = attr->stacksize;
386 /* Allocate the stack for the thread. */
387 stack_node = nc_allocate_memory_block_mu(THREAD_STACK_MEMORY, stacksize);
388 if (NULL == stack_node) {
392 thread_stack = align((uint32_t) nc_memory_block_to_payload(stack_node),
394 new_tdb->stack_node = stack_node;
400 pthread_mutex_unlock(&__nc_thread_management_lock);
401 goto ret; /* error */
405 * Speculatively increase the thread count. If thread creation
406 * fails, we will decrease it back. This way the thread count will
407 * never be lower than the actual number of threads, but can briefly
408 * be higher than that.
410 ++__nc_running_threads_counter;
413 * Save the new thread id. This can not be done after the syscall,
414 * because the child thread could have already finished by that
415 * time. If thread creation fails, it will be overriden with -1
418 *thread_id = new_basic_data;
420 pthread_mutex_unlock(&__nc_thread_management_lock);
423 * Calculate the top-of-stack location. The very first location is a
424 * zero address of architecture-dependent width, needed to satisfy the
425 * normal ABI alignment requirements for the stack. (On some machines
426 * this is the dummy return address of the thread-start function.)
428 * Both thread_stack and stacksize are multiples of 16.
430 esp = (void *) (thread_stack + stacksize - kStackPadBelowAlign);
431 memset(esp, 0, kStackPadBelowAlign);
433 /* Start the thread. */
434 retval = irt_thread.thread_create(
435 FUN_TO_VOID_PTR(nc_thread_starter), esp, new_tp);
437 pthread_mutex_lock(&__nc_thread_management_lock);
438 /* TODO(gregoryd) : replace with atomic decrement? */
439 --__nc_running_threads_counter;
440 pthread_mutex_unlock(&__nc_thread_management_lock);
448 /* Failed to create a thread. */
449 pthread_mutex_lock(&__nc_thread_management_lock);
451 nc_release_tls_node(tls_node, new_tdb);
452 if (new_basic_data) {
453 nc_release_basic_data_mu(new_basic_data);
456 stack_node->is_used = 0;
457 nc_free_memory_block_mu(THREAD_STACK_MEMORY, stack_node);
460 pthread_mutex_unlock(&__nc_thread_management_lock);
461 *thread_id = NACL_PTHREAD_ILLEGAL_THREAD_ID;
467 static int wait_for_threads(void) {
468 pthread_mutex_lock(&__nc_thread_management_lock);
470 while (1 != __nc_running_threads_counter) {
471 pthread_cond_wait(&__nc_last_thread_cond, &__nc_thread_management_lock);
473 ANNOTATE_CONDVAR_LOCK_WAIT(&__nc_last_thread_cond,
474 &__nc_thread_management_lock);
476 pthread_mutex_unlock(&__nc_thread_management_lock);
480 void pthread_cleanup_push(void (*routine)(void *), void *arg) {
481 nc_thread_cleanup_handler *handler =
482 (nc_thread_cleanup_handler *)malloc(sizeof(*handler));
483 handler->handler_function = routine;
484 handler->handler_arg = arg;
485 handler->previous = __nc_cleanup_handlers;
486 __nc_cleanup_handlers = handler;
489 void pthread_cleanup_pop(int execute) {
490 if (NULL != __nc_cleanup_handlers) {
491 nc_thread_cleanup_handler *handler = __nc_cleanup_handlers;
492 __nc_cleanup_handlers = handler->previous;
494 handler->handler_function(handler->handler_arg);
499 void pthread_exit(void *retval) {
500 /* Get all we need from the tdb before releasing it. */
501 nc_thread_descriptor_t *tdb = nc_get_tdb();
502 nc_thread_memory_block_t *stack_node = tdb->stack_node;
503 int32_t *is_used = &stack_node->is_used;
504 nc_basic_thread_data_t *basic_data = tdb->basic_data;
505 int joinable = tdb->joinable;
507 /* Call cleanup handlers. */
508 while (NULL != __nc_cleanup_handlers) {
509 pthread_cleanup_pop(1);
512 /* Call the destruction functions for TSD. */
515 __newlib_thread_exit();
517 if (__nc_initial_thread_id != basic_data) {
518 pthread_mutex_lock(&__nc_thread_management_lock);
519 --__nc_running_threads_counter;
520 pthread_mutex_unlock(&__nc_thread_management_lock);
522 /* This is the main thread - wait for other threads to complete. */
527 pthread_mutex_lock(&__nc_thread_management_lock);
529 basic_data->retval = retval;
532 /* If somebody is waiting for this thread, signal. */
533 basic_data->status = THREAD_TERMINATED;
534 pthread_cond_signal(&basic_data->join_condvar);
537 * We can release TLS+TDB - thread id and its return value are still
538 * kept in basic_data.
540 nc_release_tls_node(tdb->tls_node, tdb);
543 nc_release_basic_data_mu(basic_data);
546 /* Now add the stack to the list but keep it marked as used. */
547 nc_free_memory_block_mu(THREAD_STACK_MEMORY, stack_node);
549 if (1 == __nc_running_threads_counter) {
550 pthread_cond_signal(&__nc_last_thread_cond);
553 pthread_mutex_unlock(&__nc_thread_management_lock);
554 irt_thread.thread_exit(is_used);
558 int pthread_join(pthread_t thread_id, void **thread_return) {
560 nc_basic_thread_data_t *basic_data = thread_id;
561 if (pthread_self() == thread_id) {
565 pthread_mutex_lock(&__nc_thread_management_lock);
567 if (basic_data->tdb != NULL) {
568 /* The thread is still running. */
569 nc_thread_descriptor_t *joined_tdb = basic_data->tdb;
570 if (!joined_tdb->joinable || joined_tdb->join_waiting) {
571 /* The thread is detached or another thread is waiting to join. */
575 joined_tdb->join_waiting = 1;
576 /* Wait till the thread terminates. */
577 while (THREAD_TERMINATED != basic_data->status) {
578 pthread_cond_wait(&basic_data->join_condvar,
579 &__nc_thread_management_lock);
582 ANNOTATE_CONDVAR_LOCK_WAIT(&basic_data->join_condvar,
583 &__nc_thread_management_lock);
584 /* The thread has already terminated. */
585 /* Save the return value. */
586 if (thread_return != NULL) {
587 *thread_return = basic_data->retval;
590 /* Release the resources. */
591 nc_release_basic_data_mu(basic_data);
595 pthread_mutex_unlock(&__nc_thread_management_lock);
601 int pthread_detach(pthread_t thread_id) {
603 nc_basic_thread_data_t *basic_data = thread_id;
604 nc_thread_descriptor_t *detached_tdb;
606 * TODO(gregoryd) - can be optimized using InterlockedExchange
607 * once it's available.
609 pthread_mutex_lock(&__nc_thread_management_lock);
610 detached_tdb = basic_data->tdb;
612 if (NULL == detached_tdb) {
613 /* The thread has already terminated. */
614 nc_release_basic_data_mu(basic_data);
616 if (!detached_tdb->join_waiting) {
617 if (detached_tdb->joinable) {
618 detached_tdb->joinable = 0;
620 /* Already detached. */
624 /* Another thread is already waiting to join - do nothing. */
627 pthread_mutex_unlock(&__nc_thread_management_lock);
631 int pthread_kill(pthread_t thread_id,
633 /* This function is currently unimplemented. */
637 pthread_t pthread_self(void) {
638 /* Get the tdb pointer from gs and use it to return the thread handle. */
639 nc_thread_descriptor_t *tdb = nc_get_tdb();
640 return tdb->basic_data;
643 int pthread_equal(pthread_t thread1, pthread_t thread2) {
644 return (thread1 == thread2);
647 int pthread_setschedprio(pthread_t thread_id, int prio) {
648 if (thread_id != pthread_self()) {
650 * We can only support changing our own priority.
654 return irt_thread.thread_nice(prio);
657 int pthread_attr_init(pthread_attr_t *attr) {
661 attr->joinable = PTHREAD_CREATE_JOINABLE;
662 attr->stacksize = PTHREAD_STACK_DEFAULT;
666 int pthread_attr_destroy(pthread_attr_t *attr) {
670 /* Nothing to destroy. */
674 int pthread_attr_setdetachstate(pthread_attr_t *attr,
679 attr->joinable = detachstate;
683 int pthread_attr_getdetachstate(pthread_attr_t *attr,
688 return attr->joinable;
691 int pthread_attr_setstacksize(pthread_attr_t *attr,
696 if (PTHREAD_STACK_MIN < stacksize) {
697 attr->stacksize = stacksize;
699 attr->stacksize = PTHREAD_STACK_MIN;
704 int pthread_attr_getstacksize(pthread_attr_t *attr,
709 *stacksize = attr->stacksize;
713 void __local_lock_init(_LOCK_T *lock);
714 void __local_lock_init_recursive(_LOCK_T *lock);
715 void __local_lock_close(_LOCK_T *lock);
716 void __local_lock_close_recursive(_LOCK_T *lock);
717 void __local_lock_acquire(_LOCK_T *lock);
718 void __local_lock_acquire_recursive(_LOCK_T *lock);
719 int __local_lock_try_acquire(_LOCK_T *lock);
720 int __local_lock_try_acquire_recursive(_LOCK_T *lock);
721 void __local_lock_release(_LOCK_T *lock);
722 void __local_lock_release_recursive(_LOCK_T *lock);
724 void __local_lock_init(_LOCK_T *lock) {
726 pthread_mutexattr_t attr;
727 pthread_mutexattr_init(&attr);
728 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_FAST_NP);
729 pthread_mutex_init((pthread_mutex_t*)lock, &attr);
733 void __local_lock_init_recursive(_LOCK_T *lock) {
735 pthread_mutexattr_t attr;
736 pthread_mutexattr_init(&attr);
737 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE_NP);
738 pthread_mutex_init((pthread_mutex_t*)lock, &attr);
742 void __local_lock_close(_LOCK_T *lock) {
744 pthread_mutex_destroy((pthread_mutex_t*)lock);
748 void __local_lock_close_recursive(_LOCK_T *lock) {
749 __local_lock_close(lock);
752 void __local_lock_acquire(_LOCK_T *lock) {
753 if (!__nc_thread_initialized) {
755 * pthread library is not initialized yet - there is only one thread.
756 * Calling pthread_mutex_lock will cause an access violation because it
757 * will attempt to access the TDB which is not initialized yet.
762 pthread_mutex_lock((pthread_mutex_t*)lock);
766 void __local_lock_acquire_recursive(_LOCK_T *lock) {
767 __local_lock_acquire(lock);
770 int __local_lock_try_acquire(_LOCK_T *lock) {
771 if (!__nc_thread_initialized) {
773 * pthread library is not initialized yet - there is only one thread.
774 * Calling pthread_mutex_lock will cause an access violation because it
775 * will attempt to access the TDB which is not initialized yet.
781 return pthread_mutex_trylock((pthread_mutex_t*)lock);
787 int __local_lock_try_acquire_recursive(_LOCK_T *lock) {
788 return __local_lock_try_acquire(lock);
791 void __local_lock_release(_LOCK_T *lock) {
792 if (!__nc_thread_initialized) {
794 * pthread library is not initialized yet - there is only one thread.
795 * Calling pthread_mutex_lock will cause an access violation because it
796 * will attempt to access the TDB which is not initialized yet
797 * NOTE: there is no race condition here because the value of the counter
798 * cannot change while the lock is held - the startup process is
805 pthread_mutex_unlock((pthread_mutex_t*)lock);
809 void __local_lock_release_recursive(_LOCK_T *lock) {
810 __local_lock_release(lock);
814 * We include this directly in this file rather than compiling it
815 * separately because there is some code (e.g. libstdc++) that uses weak
816 * references to all pthread functions, but conditionalizes its calls only
817 * on one symbol. So if these functions are in another file in a library
818 * archive, they might not be linked in by static linking.
820 #include "native_client/src/untrusted/pthread/nc_tsd.c"