2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
8 * Native Client threads library
16 #include <sys/errno.h>
17 #include <sys/unistd.h>
19 #include "native_client/src/include/nacl_base.h"
21 #include "native_client/src/untrusted/irt/irt_interfaces.h"
22 #include "native_client/src/untrusted/nacl/nacl_irt.h"
23 #include "native_client/src/untrusted/nacl/tls.h"
24 #include "native_client/src/untrusted/nacl/tls_params.h"
25 #include "native_client/src/untrusted/pthread/pthread.h"
26 #include "native_client/src/untrusted/pthread/pthread_internal.h"
27 #include "native_client/src/untrusted/pthread/pthread_types.h"
29 #include "native_client/src/untrusted/valgrind/dynamic_annotations.h"
31 #if defined(NACL_IN_IRT)
32 # include "native_client/src/untrusted/irt/irt_private.h"
36 * ABI tables for underyling NaCl thread interfaces.
38 static struct nacl_irt_thread irt_thread;
41 * These days, the thread_create() syscall/IRT call will align the
42 * stack for us, but for compatibility with older, released x86
43 * versions of NaCl where thread_create() does not align the stack, we
44 * align the stack ourselves.
47 static const uint32_t kStackAlignment = 32;
48 static const uint32_t kStackPadBelowAlign = 4; /* Return address size */
49 #elif defined(__x86_64__)
50 static const uint32_t kStackAlignment = 32;
51 static const uint32_t kStackPadBelowAlign = 8; /* Return address size */
53 static const uint32_t kStackAlignment = 1;
54 static const uint32_t kStackPadBelowAlign = 0;
57 typedef struct nc_thread_cleanup_handler {
58 struct nc_thread_cleanup_handler *previous;
59 void (*handler_function)(void *arg);
61 } nc_thread_cleanup_handler;
63 static __thread nc_thread_cleanup_handler *__nc_cleanup_handlers = NULL;
65 #define TDB_SIZE (sizeof(struct nc_combined_tdb))
67 static inline char *align(uint32_t offset, uint32_t alignment) {
68 return (char *) ((offset + alignment - 1) & ~(alignment - 1));
71 /* Thread management global variables. */
72 const int __nc_kMaxCachedMemoryBlocks = 50;
74 int __nc_thread_initialized;
76 /* Mutex used to synchronize thread management code. */
77 pthread_mutex_t __nc_thread_management_lock;
80 * Condition variable that gets signaled when all the threads
81 * except the main thread have terminated.
83 static pthread_cond_t __nc_last_thread_cond;
84 pthread_t __nc_initial_thread_id;
86 /* Number of threads currently running in this NaCl module. */
87 int __nc_running_threads_counter = 1;
89 /* We have two queues of memory blocks - one for each type. */
90 STAILQ_HEAD(tailhead, entry) __nc_thread_memory_blocks[2];
91 /* We need a counter for each queue to keep track of number of blocks. */
92 int __nc_memory_block_counter[2];
94 /* Internal functions */
96 static inline nc_thread_descriptor_t *nc_get_tdb(void) {
98 * Fetch the thread-specific data pointer. This is usually just
99 * a wrapper around __libnacl_irt_tls.tls_get() but we don't use
100 * that here so that the IRT build can override the definition.
102 return (void *) ((char *) __nacl_read_tp_inline()
103 + __nacl_tp_tdb_offset(TDB_SIZE));
106 static void nc_thread_starter(void) {
107 nc_thread_descriptor_t *tdb = nc_get_tdb();
108 __newlib_thread_init();
109 #if defined(NACL_IN_IRT)
110 g_is_irt_internal_thread = 1;
112 void *retval = tdb->start_func(tdb->state);
115 * Free handler list to prevent memory leak in case function returns
116 * without calling pthread_cleanup_pop(), although doing that is unspecified
119 while (NULL != __nc_cleanup_handlers) {
120 pthread_cleanup_pop(0);
123 /* If the function returns, terminate the thread. */
124 pthread_exit(retval);
126 /* TODO(gregoryd) - add assert */
129 static nc_thread_memory_block_t *nc_allocate_memory_block_mu(
130 nc_thread_memory_block_type_t type,
132 struct tailhead *head;
133 nc_thread_memory_block_t *node;
134 /* Assume the lock is held!!! */
135 if (type >= MAX_MEMORY_TYPE)
137 head = &__nc_thread_memory_blocks[type];
139 /* We need to know the size even if we find a free node - to memset it to 0 */
141 case THREAD_STACK_MEMORY:
142 required_size = required_size + kStackAlignment - 1;
144 case TLS_AND_TDB_MEMORY:
146 case MAX_MEMORY_TYPE:
151 if (!STAILQ_EMPTY(head)) {
152 /* Try to get one from queue. */
153 nc_thread_memory_block_t *node = STAILQ_FIRST(head);
156 * On average the memory blocks will be marked as not used in the same order
157 * as they are added to the queue, therefore there is no need to check the
158 * next queue entries if the first one is still in use.
160 if (0 == node->is_used && node->size >= required_size) {
162 * This will only re-use the first node possibly, and could be
163 * improved to provide the stack with a best-fit algorithm if needed.
164 * TODO: we should scan all nodes to see if there is one that fits
165 * before allocating another.
166 * http://code.google.com/p/nativeclient/issues/detail?id=1569
168 int size = node->size;
169 STAILQ_REMOVE_HEAD(head, entries);
170 --__nc_memory_block_counter[type];
172 memset(node, 0,sizeof(*node));
178 while (__nc_memory_block_counter[type] > __nc_kMaxCachedMemoryBlocks) {
180 * We have too many blocks in the queue - try to release some.
181 * The maximum number of memory blocks to keep in the queue
182 * is almost arbitrary and can be tuned.
183 * The main limitation is that if we keep too many
184 * blocks in the queue, the NaCl app will run out of memory,
185 * since the default thread stack size is 512K.
186 * TODO(gregoryd): we might give up reusing stack entries once we
187 * support variable stack size.
189 nc_thread_memory_block_t *tmp = STAILQ_FIRST(head);
190 if (0 == tmp->is_used) {
191 STAILQ_REMOVE_HEAD(head, entries);
192 --__nc_memory_block_counter[type];
196 * Stop once we find a block that is still in use,
197 * since probably there is no point to continue.
204 /* No available blocks of the required type/size - allocate one. */
205 node = malloc(MEMORY_BLOCK_ALLOCATION_SIZE(required_size));
207 memset(node, 0, sizeof(*node));
208 node->size = required_size;
214 static void nc_free_memory_block_mu(nc_thread_memory_block_type_t type,
215 nc_thread_memory_block_t *node) {
216 /* Assume the lock is held!!! */
217 struct tailhead *head = &__nc_thread_memory_blocks[type];
218 STAILQ_INSERT_TAIL(head, node, entries);
219 ++__nc_memory_block_counter[type];
222 static void nc_release_basic_data_mu(nc_basic_thread_data_t *basic_data) {
223 /* join_condvar can be initialized only if tls_node exists. */
224 pthread_cond_destroy(&basic_data->join_condvar);
228 static void nc_release_tls_node(nc_thread_memory_block_t *block,
229 nc_thread_descriptor_t *tdb) {
231 if (NULL != tdb->basic_data) {
232 tdb->basic_data->tdb = NULL;
235 nc_free_memory_block_mu(TLS_AND_TDB_MEMORY, block);
239 /* Initialize a newly allocated TDB to some default values. */
240 static void nc_tdb_init(nc_thread_descriptor_t *tdb,
241 nc_basic_thread_data_t *basic_data) {
243 tdb->joinable = PTHREAD_CREATE_JOINABLE;
244 tdb->join_waiting = 0;
245 tdb->stack_node = NULL;
246 tdb->tls_node = NULL;
247 tdb->start_func = NULL;
249 tdb->irt_thread_data = NULL;
250 tdb->basic_data = basic_data;
252 basic_data->retval = NULL;
253 basic_data->status = THREAD_RUNNING;
254 if (pthread_cond_init(&basic_data->join_condvar, NULL) != 0)
256 basic_data->tdb = tdb;
259 /* Initializes all globals except for the initial thread structure. */
260 void __nc_initialize_globals(void) {
262 * Fetch the ABI tables from the IRT. If we don't have these, all is lost.
264 __nc_initialize_interfaces(&irt_thread);
266 if (pthread_mutex_init(&__nc_thread_management_lock, NULL) != 0)
270 * Tell ThreadSanitizer to not generate happens-before arcs between uses of
271 * this mutex. Otherwise we miss to many real races.
272 * When not running under ThreadSanitizer, this is just a call to an empty
275 ANNOTATE_NOT_HAPPENS_BEFORE_MUTEX(&__nc_thread_management_lock);
277 if (pthread_cond_init(&__nc_last_thread_cond, NULL) != 0)
279 STAILQ_INIT(&__nc_thread_memory_blocks[0]);
280 STAILQ_INIT(&__nc_thread_memory_blocks[1]);
282 __nc_thread_initialized = 1;
285 #if defined(NACL_IN_IRT)
288 * This is used by the IRT for user threads. We initialize all fields
289 * so that we get predictable behaviour in case some IRT code does an
290 * unsupported pthread operation on a user thread.
292 void __nc_initialize_unjoinable_thread(struct nc_combined_tdb *tdb) {
293 nc_tdb_init(&tdb->tdb, &tdb->basic_data);
294 tdb->tdb.joinable = 0;
300 * Will be called from the library startup code,
301 * which always happens on the application's main thread.
303 void __pthread_initialize(void) {
304 __pthread_initialize_minimal(TDB_SIZE);
306 struct nc_combined_tdb *tdb = (struct nc_combined_tdb *) nc_get_tdb();
307 nc_tdb_init(&tdb->tdb, &tdb->basic_data);
308 __nc_initial_thread_id = &tdb->basic_data;
310 __nc_initialize_globals();
316 /* pthread functions */
318 int pthread_create(pthread_t *thread_id,
319 const pthread_attr_t *attr,
320 void *(*start_routine)(void *),
324 /* Declare the variables outside of the while scope. */
325 nc_thread_memory_block_t *stack_node = NULL;
326 char *thread_stack = NULL;
327 nc_thread_descriptor_t *new_tdb = NULL;
328 nc_basic_thread_data_t *new_basic_data = NULL;
329 nc_thread_memory_block_t *tls_node = NULL;
330 size_t stacksize = PTHREAD_STACK_DEFAULT;
333 /* TODO(gregoryd) - right now a single lock is used, try to optimize? */
334 pthread_mutex_lock(&__nc_thread_management_lock);
337 /* Allocate the combined TLS + TDB block---see tls.h for explanation. */
339 tls_node = nc_allocate_memory_block_mu(TLS_AND_TDB_MEMORY,
340 __nacl_tls_combined_size(TDB_SIZE));
341 if (NULL == tls_node)
344 new_tp = __nacl_tls_initialize_memory(nc_memory_block_to_payload(tls_node),
347 new_tdb = (nc_thread_descriptor_t *)
348 ((char *) new_tp + __nacl_tp_tdb_offset(TDB_SIZE));
351 * TODO(gregoryd): consider creating a pool of basic_data structs,
352 * similar to stack and TLS+TDB (probably when adding the support for
353 * variable stack size).
355 new_basic_data = malloc(sizeof(*new_basic_data));
356 if (NULL == new_basic_data) {
358 * The tdb should be zero intialized.
359 * This just re-emphasizes this requirement.
361 new_tdb->basic_data = NULL;
365 nc_tdb_init(new_tdb, new_basic_data);
366 new_tdb->tls_node = tls_node;
369 * All the required members of the tdb must be initialized before
370 * the thread is started and actually before the global lock is released,
371 * since another thread can call pthread_join() or pthread_detach().
373 new_tdb->start_func = start_routine;
374 new_tdb->state = arg;
376 new_tdb->joinable = attr->joinable;
377 stacksize = attr->stacksize;
380 /* Allocate the stack for the thread. */
381 stack_node = nc_allocate_memory_block_mu(THREAD_STACK_MEMORY, stacksize);
382 if (NULL == stack_node) {
386 thread_stack = align((uint32_t) nc_memory_block_to_payload(stack_node),
388 new_tdb->stack_node = stack_node;
394 pthread_mutex_unlock(&__nc_thread_management_lock);
395 goto ret; /* error */
399 * Speculatively increase the thread count. If thread creation
400 * fails, we will decrease it back. This way the thread count will
401 * never be lower than the actual number of threads, but can briefly
402 * be higher than that.
404 ++__nc_running_threads_counter;
407 * Save the new thread id. This can not be done after the syscall,
408 * because the child thread could have already finished by that
409 * time. If thread creation fails, it will be overriden with -1
412 *thread_id = new_basic_data;
414 pthread_mutex_unlock(&__nc_thread_management_lock);
417 * Calculate the top-of-stack location. The very first location is a
418 * zero address of architecture-dependent width, needed to satisfy the
419 * normal ABI alignment requirements for the stack. (On some machines
420 * this is the dummy return address of the thread-start function.)
422 * Both thread_stack and stacksize are multiples of 16.
424 esp = (void *) (thread_stack + stacksize - kStackPadBelowAlign);
425 memset(esp, 0, kStackPadBelowAlign);
427 /* Start the thread. */
428 retval = irt_thread.thread_create(nc_thread_starter, esp, new_tp);
430 pthread_mutex_lock(&__nc_thread_management_lock);
431 /* TODO(gregoryd) : replace with atomic decrement? */
432 --__nc_running_threads_counter;
433 pthread_mutex_unlock(&__nc_thread_management_lock);
441 /* Failed to create a thread. */
442 pthread_mutex_lock(&__nc_thread_management_lock);
444 nc_release_tls_node(tls_node, new_tdb);
445 if (new_basic_data) {
446 nc_release_basic_data_mu(new_basic_data);
449 stack_node->is_used = 0;
450 nc_free_memory_block_mu(THREAD_STACK_MEMORY, stack_node);
453 pthread_mutex_unlock(&__nc_thread_management_lock);
454 *thread_id = NACL_PTHREAD_ILLEGAL_THREAD_ID;
460 static int wait_for_threads(void) {
461 pthread_mutex_lock(&__nc_thread_management_lock);
463 while (1 != __nc_running_threads_counter) {
464 pthread_cond_wait(&__nc_last_thread_cond, &__nc_thread_management_lock);
466 ANNOTATE_CONDVAR_LOCK_WAIT(&__nc_last_thread_cond,
467 &__nc_thread_management_lock);
469 pthread_mutex_unlock(&__nc_thread_management_lock);
473 void pthread_cleanup_push(void (*routine)(void *), void *arg) {
474 nc_thread_cleanup_handler *handler =
475 (nc_thread_cleanup_handler *)malloc(sizeof(*handler));
476 handler->handler_function = routine;
477 handler->handler_arg = arg;
478 handler->previous = __nc_cleanup_handlers;
479 __nc_cleanup_handlers = handler;
482 void pthread_cleanup_pop(int execute) {
483 if (NULL != __nc_cleanup_handlers) {
484 nc_thread_cleanup_handler *handler = __nc_cleanup_handlers;
485 __nc_cleanup_handlers = handler->previous;
487 handler->handler_function(handler->handler_arg);
492 void pthread_exit(void *retval) {
493 /* Get all we need from the tdb before releasing it. */
494 nc_thread_descriptor_t *tdb = nc_get_tdb();
495 nc_thread_memory_block_t *stack_node = tdb->stack_node;
496 int32_t *is_used = &stack_node->is_used;
497 nc_basic_thread_data_t *basic_data = tdb->basic_data;
498 int joinable = tdb->joinable;
500 /* Call cleanup handlers. */
501 while (NULL != __nc_cleanup_handlers) {
502 pthread_cleanup_pop(1);
505 /* Call the destruction functions for TSD. */
508 __newlib_thread_exit();
510 if (__nc_initial_thread_id != basic_data) {
511 pthread_mutex_lock(&__nc_thread_management_lock);
512 --__nc_running_threads_counter;
513 pthread_mutex_unlock(&__nc_thread_management_lock);
515 /* This is the main thread - wait for other threads to complete. */
520 pthread_mutex_lock(&__nc_thread_management_lock);
522 basic_data->retval = retval;
525 /* If somebody is waiting for this thread, signal. */
526 basic_data->status = THREAD_TERMINATED;
527 pthread_cond_signal(&basic_data->join_condvar);
530 * We can release TLS+TDB - thread id and its return value are still
531 * kept in basic_data.
533 nc_release_tls_node(tdb->tls_node, tdb);
536 nc_release_basic_data_mu(basic_data);
539 /* Now add the stack to the list but keep it marked as used. */
540 nc_free_memory_block_mu(THREAD_STACK_MEMORY, stack_node);
542 if (1 == __nc_running_threads_counter) {
543 pthread_cond_signal(&__nc_last_thread_cond);
546 pthread_mutex_unlock(&__nc_thread_management_lock);
547 irt_thread.thread_exit(is_used);
551 int pthread_join(pthread_t thread_id, void **thread_return) {
553 nc_basic_thread_data_t *basic_data = thread_id;
554 if (pthread_self() == thread_id) {
558 pthread_mutex_lock(&__nc_thread_management_lock);
560 if (basic_data->tdb != NULL) {
561 /* The thread is still running. */
562 nc_thread_descriptor_t *joined_tdb = basic_data->tdb;
563 if (!joined_tdb->joinable || joined_tdb->join_waiting) {
564 /* The thread is detached or another thread is waiting to join. */
568 joined_tdb->join_waiting = 1;
569 /* Wait till the thread terminates. */
570 while (THREAD_TERMINATED != basic_data->status) {
571 pthread_cond_wait(&basic_data->join_condvar,
572 &__nc_thread_management_lock);
575 ANNOTATE_CONDVAR_LOCK_WAIT(&basic_data->join_condvar,
576 &__nc_thread_management_lock);
577 /* The thread has already terminated. */
578 /* Save the return value. */
579 if (thread_return != NULL) {
580 *thread_return = basic_data->retval;
583 /* Release the resources. */
584 nc_release_basic_data_mu(basic_data);
588 pthread_mutex_unlock(&__nc_thread_management_lock);
594 int pthread_detach(pthread_t thread_id) {
596 nc_basic_thread_data_t *basic_data = thread_id;
597 nc_thread_descriptor_t *detached_tdb;
599 * TODO(gregoryd) - can be optimized using InterlockedExchange
600 * once it's available.
602 pthread_mutex_lock(&__nc_thread_management_lock);
603 detached_tdb = basic_data->tdb;
605 if (NULL == detached_tdb) {
606 /* The thread has already terminated. */
607 nc_release_basic_data_mu(basic_data);
609 if (!detached_tdb->join_waiting) {
610 if (detached_tdb->joinable) {
611 detached_tdb->joinable = 0;
613 /* Already detached. */
617 /* Another thread is already waiting to join - do nothing. */
620 pthread_mutex_unlock(&__nc_thread_management_lock);
624 int pthread_kill(pthread_t thread_id,
626 /* This function is currently unimplemented. */
630 pthread_t pthread_self(void) {
631 /* Get the tdb pointer from gs and use it to return the thread handle. */
632 nc_thread_descriptor_t *tdb = nc_get_tdb();
633 return tdb->basic_data;
636 int pthread_equal(pthread_t thread1, pthread_t thread2) {
637 return (thread1 == thread2);
640 int pthread_setschedprio(pthread_t thread_id, int prio) {
641 if (thread_id != pthread_self()) {
643 * We can only support changing our own priority.
647 return irt_thread.thread_nice(prio);
650 int pthread_attr_init(pthread_attr_t *attr) {
654 attr->joinable = PTHREAD_CREATE_JOINABLE;
655 attr->stacksize = PTHREAD_STACK_DEFAULT;
659 int pthread_attr_destroy(pthread_attr_t *attr) {
663 /* Nothing to destroy. */
667 int pthread_attr_setdetachstate(pthread_attr_t *attr,
672 attr->joinable = detachstate;
676 int pthread_attr_getdetachstate(const pthread_attr_t *attr,
681 return attr->joinable;
684 int pthread_attr_setstacksize(pthread_attr_t *attr,
689 if (PTHREAD_STACK_MIN < stacksize) {
690 attr->stacksize = stacksize;
692 attr->stacksize = PTHREAD_STACK_MIN;
697 int pthread_attr_getstacksize(const pthread_attr_t *attr,
702 *stacksize = attr->stacksize;
706 void __local_lock_init(_LOCK_T *lock);
707 void __local_lock_init_recursive(_LOCK_T *lock);
708 void __local_lock_close(_LOCK_T *lock);
709 void __local_lock_close_recursive(_LOCK_T *lock);
710 void __local_lock_acquire(_LOCK_T *lock);
711 void __local_lock_acquire_recursive(_LOCK_T *lock);
712 int __local_lock_try_acquire(_LOCK_T *lock);
713 int __local_lock_try_acquire_recursive(_LOCK_T *lock);
714 void __local_lock_release(_LOCK_T *lock);
715 void __local_lock_release_recursive(_LOCK_T *lock);
717 void __local_lock_init(_LOCK_T *lock) {
719 pthread_mutexattr_t attr;
720 pthread_mutexattr_init(&attr);
721 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_FAST_NP);
722 pthread_mutex_init((pthread_mutex_t*)lock, &attr);
726 void __local_lock_init_recursive(_LOCK_T *lock) {
728 pthread_mutexattr_t attr;
729 pthread_mutexattr_init(&attr);
730 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE_NP);
731 pthread_mutex_init((pthread_mutex_t*)lock, &attr);
735 void __local_lock_close(_LOCK_T *lock) {
737 pthread_mutex_destroy((pthread_mutex_t*)lock);
741 void __local_lock_close_recursive(_LOCK_T *lock) {
742 __local_lock_close(lock);
745 void __local_lock_acquire(_LOCK_T *lock) {
746 if (!__nc_thread_initialized) {
748 * pthread library is not initialized yet - there is only one thread.
749 * Calling pthread_mutex_lock will cause an access violation because it
750 * will attempt to access the TDB which is not initialized yet.
755 pthread_mutex_lock((pthread_mutex_t*)lock);
759 void __local_lock_acquire_recursive(_LOCK_T *lock) {
760 __local_lock_acquire(lock);
763 int __local_lock_try_acquire(_LOCK_T *lock) {
764 if (!__nc_thread_initialized) {
766 * pthread library is not initialized yet - there is only one thread.
767 * Calling pthread_mutex_lock will cause an access violation because it
768 * will attempt to access the TDB which is not initialized yet.
774 return pthread_mutex_trylock((pthread_mutex_t*)lock);
780 int __local_lock_try_acquire_recursive(_LOCK_T *lock) {
781 return __local_lock_try_acquire(lock);
784 void __local_lock_release(_LOCK_T *lock) {
785 if (!__nc_thread_initialized) {
787 * pthread library is not initialized yet - there is only one thread.
788 * Calling pthread_mutex_lock will cause an access violation because it
789 * will attempt to access the TDB which is not initialized yet
790 * NOTE: there is no race condition here because the value of the counter
791 * cannot change while the lock is held - the startup process is
798 pthread_mutex_unlock((pthread_mutex_t*)lock);
802 void __local_lock_release_recursive(_LOCK_T *lock) {
803 __local_lock_release(lock);
807 * We include this directly in this file rather than compiling it
808 * separately because there is some code (e.g. libstdc++) that uses weak
809 * references to all pthread functions, but conditionalizes its calls only
810 * on one symbol. So if these functions are in another file in a library
811 * archive, they might not be linked in by static linking.
813 /* @IGNORE_LINES_FOR_CODE_HYGIENE[1] */
814 #include "native_client/src/untrusted/pthread/nc_tsd.c"