pthread_workqueue-0.8.2/src/posix/manager.c

   1 /*-
   2  * Copyright (c) 2011, Joakim Johansson <jocke@tbricks.com>
   3  * Copyright (c) 2010, Mark Heily <mark@heily.com>
   4  * Copyright (c) 2009, Stacey Son <sson@freebsd.org>
   5  * Copyright (c) 2000-2008, Apple Inc.
   6  * All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice unmodified, this list of conditions, and the following
  13  *    disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  *
  29  */
  30
  31 #include "platform.h"
  32 #include "private.h"
  33 #include "pthread_workqueue.h"
  34 #include "thread_info.h"
  35 #include "thread_rt.h"
  36
  37 #include <sys/time.h>
  38
  39 /* Environment setting */
  40 unsigned int PWQ_RT_THREADS = 0;
  41 time_t PWQ_SPIN_USEC = 10000; // The number of microseconds we should spin loop if desired
  42 unsigned int PWQ_SPIN_THREADS = 0; // The number of threads that should be kept spinning
  43 unsigned volatile int current_threads_spinning = 0; // The number of threads currently spinning
  44
  45 /* Tunable constants */
  46
  47 #define WORKER_IDLE_SECONDS_THRESHOLD 5
  48
  49 /* Function prototypes */
  50 static unsigned int get_load_average(void);
  51 static void * worker_main(void *arg);
  52 static void * overcommit_worker_main(void *arg);
  53 static unsigned int get_process_limit(void);
  54 static void manager_start(void);
  55
  56 static unsigned int      cpu_count;
  57 static unsigned int      worker_min;
  58 static unsigned int      worker_idle_threshold; // we don't go down below this if we had to increase # workers
  59
  60 /* Overcommit */
  61 static struct _pthread_workqueue *ocwq[PTHREAD_WORKQUEUE_MAX];
  62 static int               ocwq_mask;
  63 static pthread_mutex_t   ocwq_mtx;
  64 static pthread_cond_t    ocwq_has_work;
  65 static unsigned int      ocwq_idle_threads;
  66
  67 /* Non-overcommit */
  68 static struct _pthread_workqueue *wqlist[PTHREAD_WORKQUEUE_MAX];
  69 static volatile unsigned int     wqlist_mask; // mask of currently pending workqueues, atomics used for manipulation
  70 static pthread_mutex_t   wqlist_mtx;
  71
  72 static pthread_cond_t    wqlist_has_work;
  73 static int               wqlist_has_manager;
  74 static pthread_attr_t    detached_attr;
  75
  76 static struct {
  77     volatile unsigned int load,
  78                     count,
  79                     idle;
  80     unsigned int    sb_wake_pending;
  81     pthread_mutex_t sb_wake_mtx;
  82     pthread_cond_t  sb_wake_cond;
  83 } scoreboard;
  84
  85 static unsigned int
  86 worker_idle_threshold_per_cpu(void)
  87 {
  88     switch (cpu_count)
  89     {
  90         case 0:
  91         case 1:
  92         case 2:
  93         case 4:
  94           return 2;
  95         case 6:
  96           return 3;
  97         case 8:
  98         case 12:
  99           return 4;
 100         case 16:
 101         case 24:
 102           return 6;
 103         case 32:
 104         case 64:
 105           return 8;
 106         default:
 107             return cpu_count / 4;
 108     }
 109
 110     return 2;
 111 }
 112
 113 static void
 114 manager_reinit(void)
 115 {
 116     if (manager_init() < 0)
 117         abort();
 118 }
 119
 120 int
 121 manager_init(void)
 122 {
 123     wqlist_has_manager = 0;
 124     pthread_cond_init(&wqlist_has_work, NULL);
 125
 126     pthread_mutex_init(&wqlist_mtx, NULL);
 127     wqlist_mask = 0;
 128
 129     pthread_cond_init(&ocwq_has_work, NULL);
 130     pthread_mutex_init(&ocwq_mtx, NULL);
 131     ocwq_mask = 0;
 132     ocwq_idle_threads = 0;
 133
 134     witem_cache_init();
 135
 136     cpu_count = (PWQ_ACTIVE_CPU > 0) ? (PWQ_ACTIVE_CPU) : (unsigned int) sysconf(_SC_NPROCESSORS_ONLN);
 137
 138     pthread_attr_init(&detached_attr);
 139     pthread_attr_setdetachstate(&detached_attr, PTHREAD_CREATE_DETACHED);
 140
 141     /* Initialize the scoreboard */
 142     pthread_cond_init(&scoreboard.sb_wake_cond, NULL);
 143     pthread_mutex_init(&scoreboard.sb_wake_mtx, NULL);
 144
 145     /* Determine the initial thread pool constraints */
 146     worker_min = 2; // we can start with a small amount, worker_idle_threshold will be used as new dynamic low watermark
 147     worker_idle_threshold = worker_idle_threshold_per_cpu();
 148
 149     if (pthread_atfork(NULL, NULL, manager_reinit) < 0) {
 150         dbg_perror("pthread_atfork()");
 151         return (-1);
 152     }
 153
 154     return (0);
 155 }
 156
 157 void
 158 manager_workqueue_create(struct _pthread_workqueue *workq)
 159 {
 160     pthread_mutex_lock(&wqlist_mtx);
 161     if (!workq->overcommit && !wqlist_has_manager)
 162         manager_start();
 163
 164     if (workq->overcommit) {
 165         if (ocwq[workq->queueprio] == NULL) {
 166             ocwq[workq->queueprio] = workq;
 167             workq->wqlist_index = workq->queueprio;
 168         } else {
 169             puts("queue already exists\n");
 170             abort();
 171         }
 172     } else {
 173         if (wqlist[workq->queueprio] == NULL) {
 174             wqlist[workq->queueprio] = workq; //FIXME: sort by priority
 175             workq->wqlist_index = workq->queueprio;
 176         } else {
 177             puts("queue already exists\n");
 178             abort();
 179         }
 180     }
 181     pthread_mutex_unlock(&wqlist_mtx);
 182 }
 183
 184 static struct work *
 185 wqlist_scan(int *queue_priority)
 186 {
 187     pthread_workqueue_t workq;
 188     struct work *witem;
 189     int idx;
 190
 191     idx = ffs(wqlist_mask);
 192     if (idx == 0)
 193         return (NULL);
 194
 195     workq = wqlist[idx - 1];
 196
 197     pthread_spin_lock(&workq->mtx);
 198
 199     witem = STAILQ_FIRST(&workq->item_listhead);
 200     if (witem != NULL) {
 201         STAILQ_REMOVE_HEAD(&workq->item_listhead, item_entry);
 202         if (STAILQ_EMPTY(&workq->item_listhead))
 203         {
 204             unsigned int wqlist_index_bit = (0x1 << workq->wqlist_index);
 205             unsigned int new_mask;
 206             // Remove this now empty wq from the mask, the only contention here is with threads performing the same
 207             // operation on another workqueue, so we will not be long
 208             // the 'bit' for this queue is protected by the spin lock, so we will only clear a bit which we have
 209             // ownership for (see additem() below for the corresponding part on the producer side)
 210             do
 211             {
 212                 new_mask = atomic_and(&wqlist_mask, ~(wqlist_index_bit));
 213             } while (new_mask & wqlist_index_bit);
 214         }
 215         if (queue_priority != NULL)
 216             *queue_priority = workq->queueprio;
 217
 218         pthread_spin_unlock(&workq->mtx);
 219         return (witem);
 220     } else {
 221         // this could happen if multiple threads raced and found the same bit with ffs() and
 222         // emptied the queue completely, so we should just bail out
 223         pthread_spin_unlock(&workq->mtx);
 224         return (NULL);
 225     }
 226 }
 227
 228 static void _wakeup_manager(void)
 229 {
 230     dbg_puts("asking manager to wake up");
 231
 232     pthread_mutex_lock(&scoreboard.sb_wake_mtx);
 233     scoreboard.sb_wake_pending = 1;
 234     pthread_cond_signal(&scoreboard.sb_wake_cond);
 235     pthread_mutex_unlock(&scoreboard.sb_wake_mtx);
 236     return;
 237 }
 238
 239 static void *
 240 overcommit_worker_main(void *arg)
 241 {
 242     struct timespec ts;
 243     pthread_workqueue_t workq;
 244     void (*func)(void *);
 245     void *func_arg;
 246     struct work *witem;
 247     int rv, idx;
 248
 249     (void)arg;
 250
 251     pthread_mutex_lock(&ocwq_mtx);
 252
 253     for (;;) {
 254         /* Find the highest priority workqueue that is non-empty */
 255         idx = ffs(ocwq_mask);
 256         if (idx > 0) {
 257             workq = ocwq[idx - 1];
 258             witem = STAILQ_FIRST(&workq->item_listhead);
 259             if (witem != NULL) {
 260                 /* Remove the first work item */
 261                 STAILQ_REMOVE_HEAD(&workq->item_listhead, item_entry);
 262                 if (STAILQ_EMPTY(&workq->item_listhead))
 263                     ocwq_mask &= ~(0x1 << workq->wqlist_index);
 264                 /* Execute the work item */
 265                 pthread_mutex_unlock(&ocwq_mtx);
 266                 func = witem->func;
 267                 func_arg = witem->func_arg;
 268                 witem_free(witem);
 269                 func(func_arg);
 270                 pthread_mutex_lock(&ocwq_mtx);
 271                 continue;
 272             }
 273         }
 274
 275         /* Wait for more work to be available. */
 276         clock_gettime(CLOCK_REALTIME, &ts);
 277         ts.tv_sec += 15;
 278         ocwq_idle_threads++;
 279         dbg_printf("waiting for work (idle=%d)", ocwq_idle_threads);
 280         rv = pthread_cond_timedwait(&ocwq_has_work, &ocwq_mtx, &ts);
 281         if (rv != 0) {
 282             /* Normally, the signaler will decrement the idle counter,
 283                but this path is not taken in response to a signaler.
 284              */
 285             ocwq_idle_threads--;
 286             pthread_mutex_unlock(&ocwq_mtx);
 287
 288             if (rv == ETIMEDOUT) {
 289                 dbg_puts("timeout, no work available");
 290                 break;
 291             } else {
 292                 dbg_perror("pthread_cond_timedwait");
 293                 //TODO: some kind of crash mechanism
 294                 break;
 295             }
 296         }
 297     }
 298
 299     dbg_printf("worker exiting (idle=%d)", ocwq_idle_threads);
 300     pthread_exit(NULL);
 301 }
 302
 303 static void *
 304 worker_main(void *arg)
 305 {
 306     struct work *witem;
 307     void (*func)(void *);
 308     void *func_arg;
 309     int queue_priority = 0;
 310     struct timespec ts_start, ts_now;
 311
 312     (void) arg;
 313     dbg_puts("worker thread started");
 314
 315     if (PWQ_RT_THREADS)
 316         ptwq_set_current_thread_priority(WORKQ_HIGH_PRIOQUEUE); // start at highest priority possible
 317
 318     for (;;) {
 319
 320         witem = wqlist_scan(&queue_priority);
 321
 322         // Only take overhead of sleeping and/or spinning if we
 323         // could not get a witem cheaply using the spinlock above
 324         if (slowpath(!witem))
 325         {
 326             // Optional busy loop for getting the next item for a while if so configured
 327             // We'll only spin limited thread at a time (this is really mostly useful when running
 328             // in low latency configurations using dedicated processor sets)
 329             if ((PWQ_SPIN_THREADS > 0) && (current_threads_spinning <= PWQ_SPIN_THREADS))
 330             {
 331                 atomic_inc(&current_threads_spinning);
 332
 333                 // If we are racing with another thread, let's skip
 334                 // spinning and instead go through the slowpath below
 335
 336                 if (current_threads_spinning <= PWQ_SPIN_THREADS)
 337                 {
 338                     clock_gettime(CLOCK_REALTIME, &ts_start);
 339                     ts_now.tv_sec = ts_start.tv_sec;
 340                     ts_now.tv_nsec = ts_start.tv_nsec;
 341
 342                     // Spin until we get an item or PWQ_SPIN_USEC microseconds passes
 343                     while (!witem && (((ts_now.tv_sec - ts_start.tv_sec) * 1000000) + (((ts_now.tv_nsec - ts_start.tv_nsec) / 1000)) <= PWQ_SPIN_USEC))
 344                     {
 345                         witem = wqlist_scan(&queue_priority);
 346                         if (!witem)
 347                         {
 348                             // Perhaps a hardware pause
 349                             // instruction could be used here to keep the pace down, probably not needed though
 350                             clock_gettime(CLOCK_REALTIME, &ts_now);
 351                         }
 352                     }
 353                 }
 354
 355                 atomic_dec(&current_threads_spinning);
 356             }
 357
 358             // No witem from the busy loop, let's wait for wakeup
 359             if (!witem)
 360             {
 361                 pthread_mutex_lock(&wqlist_mtx);
 362
 363                 /*
 364                   TODO: Consider using pthread_cond_timedwait() so that
 365                   workers can self-terminate if they are idle too long.
 366                   This would also be a failsafe in case there are bugs
 367                   with the scoreboard that cause us to "leak" workers.
 368                  */
 369                 while ((witem = wqlist_scan(&queue_priority)) == NULL)
 370                     pthread_cond_wait(&wqlist_has_work, &wqlist_mtx);
 371
 372                 pthread_mutex_unlock(&wqlist_mtx);
 373             }
 374         }
 375
 376         atomic_dec(&scoreboard.idle);
 377
 378         if (slowpath(witem->func == NULL)) {
 379             dbg_puts("worker exiting..");
 380             atomic_dec(&scoreboard.count);
 381             witem_free(witem);
 382             pthread_exit(0);
 383         }
 384
 385         dbg_printf("count=%u idle=%u wake_pending=%u",
 386             scoreboard.count, scoreboard.idle,  scoreboard.sb_wake_pending);
 387
 388         /* Force the manager thread to wakeup if all workers are busy */
 389         if (slowpath(scoreboard.idle == 0 && !scoreboard.sb_wake_pending))
 390             _wakeup_manager();
 391
 392         // If using RT threads, decrease thread prio if we aren't a high prio queue
 393         if (PWQ_RT_THREADS && (queue_priority != WORKQ_HIGH_PRIOQUEUE))
 394             ptwq_set_current_thread_priority(queue_priority);
 395
 396         /* Invoke the callback function, free witem first for possible reuse */
 397         func = witem->func;
 398         func_arg = witem->func_arg;
 399         witem_free(witem);
 400
 401         func(func_arg);
 402
 403         atomic_inc(&scoreboard.idle); // initial inc was one in worker_start, this is to avoid a race
 404
 405         // Only take the overhead and change RT priority back if it was not a high priority queue being serviced
 406         if (PWQ_RT_THREADS && (queue_priority != WORKQ_HIGH_PRIOQUEUE))
 407             ptwq_set_current_thread_priority(WORKQ_HIGH_PRIOQUEUE);
 408     }
 409
 410     /* NOTREACHED */
 411     return (NULL);
 412 }
 413
 414 static int
 415 worker_start(void)
 416 {
 417     pthread_t tid;
 418
 419     dbg_puts("Spawning another worker");
 420
 421     atomic_inc(&scoreboard.idle);
 422     atomic_inc(&scoreboard.count);
 423
 424     if (pthread_create(&tid, &detached_attr, worker_main, NULL) != 0) {
 425         dbg_perror("pthread_create(3)");
 426         atomic_dec(&scoreboard.idle);
 427         atomic_dec(&scoreboard.count);
 428         return (-1);
 429     }
 430
 431     return (0);
 432 }
 433
 434 static int
 435 worker_stop(void)
 436 {
 437     struct work *witem;
 438     pthread_workqueue_t workq;
 439     int i;
 440     unsigned int wqlist_index_bit, new_mask;
 441
 442     witem = witem_alloc(NULL, NULL);
 443
 444     pthread_mutex_lock(&wqlist_mtx);
 445     for (i = 0; i < PTHREAD_WORKQUEUE_MAX; i++) {
 446         workq = wqlist[i];
 447         if (workq == NULL)
 448             continue;
 449
 450         wqlist_index_bit = (0x1 << workq->wqlist_index);
 451
 452         pthread_spin_lock(&workq->mtx);
 453
 454         do
 455         {
 456             new_mask = atomic_or(&wqlist_mask, wqlist_index_bit);
 457         } while (!(new_mask & wqlist_index_bit));
 458
 459         STAILQ_INSERT_TAIL(&workq->item_listhead, witem, item_entry);
 460
 461         pthread_spin_unlock(&workq->mtx);
 462
 463         pthread_cond_signal(&wqlist_has_work);
 464         pthread_mutex_unlock(&wqlist_mtx);
 465
 466         return (0);
 467     }
 468
 469     /* FIXME: this means there are no workqueues.. should never happen */
 470     dbg_puts("Attempting to add a workitem without a workqueue");
 471     abort();
 472
 473     return (-1);
 474 }
 475
 476 static void *
 477 manager_main(void *unused __attribute__ ((unused)))
 478 {
 479     unsigned int load_max = cpu_count;
 480     unsigned int worker_max, current_thread_count = 0;
 481     unsigned int worker_idle_seconds_accumulated = 0;
 482     unsigned int max_threads_to_stop = 0;
 483     unsigned int i;
 484     int cond_wait_rv = 0;
 485     sigset_t sigmask;
 486     struct timespec   ts;
 487     struct timeval    tp;
 488
 489     worker_max = get_process_limit();
 490     scoreboard.load = get_load_average();
 491
 492     /* Block all signals */
 493     sigfillset(&sigmask);
 494     pthread_sigmask(SIG_BLOCK, &sigmask, NULL);
 495
 496     /* Create the minimum number of workers */
 497     scoreboard.count = 0;
 498     for (i = 0; i < worker_min; i++)
 499         worker_start();
 500
 501     for (;;) {
 502
 503         pthread_mutex_lock(&scoreboard.sb_wake_mtx);
 504
 505         dbg_puts("manager is sleeping");
 506
 507         (void) gettimeofday(&tp, NULL); // TODO - error checking
 508
 509         /* Convert from timeval to timespec */
 510         ts.tv_sec  = tp.tv_sec;
 511         ts.tv_nsec = tp.tv_usec * 1000;
 512         ts.tv_sec += 1; // wake once per second and check if we have too many idle threads...
 513
 514         // We should only sleep on the condition if there are no pending signal, spurious wakeup is also ok
 515         if (scoreboard.sb_wake_pending == 0)
 516             cond_wait_rv = pthread_cond_timedwait(&scoreboard.sb_wake_cond, &scoreboard.sb_wake_mtx, &ts);
 517
 518         scoreboard.sb_wake_pending = 0; // we must set this before spawning any new threads below, or we race...
 519
 520         dbg_puts("manager is awake");
 521
 522         dbg_printf("load=%u idle=%u workers=%u max_workers=%u worker_min = %u",
 523                    scoreboard.load, scoreboard.idle, scoreboard.count, worker_max, worker_min);
 524
 525         // If no workers available, check if we should create a new one
 526         if (scoreboard.idle == 0 && (scoreboard.count > 0)) // last part required for an extremely unlikely race at startup
 527         {
 528             scoreboard.load = get_load_average();
 529
 530             if ((scoreboard.load < load_max) && (scoreboard.count < worker_max))
 531             {
 532                 if (scoreboard.count < worker_idle_threshold) // allow cheap rampup up to worker_idle_threshold without going to /proc
 533                 {
 534                     worker_start();
 535                 }
 536                 else // check through /proc, will be a bit more expensive in terms of latency
 537                 if (threads_runnable(&current_thread_count) == 0)
 538                 {
 539                     // only start thread if we have less runnable threads than cpus
 540                     if (current_thread_count >= cpu_count)
 541                     {
 542                         dbg_printf("Not spawning worker thread, thread_runnable = %d >= cpu_count = %d",
 543                                    current_thread_count, cpu_count);
 544                     }
 545                     else
 546                     {
 547                         worker_start();
 548                     }
 549                 }
 550                 else // always start thread if we can't get runnable count
 551                 {
 552                     worker_start();
 553                 }
 554             }
 555             else // high load, allow rampup up to worker_idle_threshold regardless of this
 556             {
 557                 if (scoreboard.count < worker_idle_threshold)
 558                 {
 559                     worker_start();
 560                 }
 561             }
 562         }
 563         else
 564         {
 565             if (cond_wait_rv == ETIMEDOUT) // Only check for ramp down on the 'timer tick'
 566             {
 567                 if ((scoreboard.idle - worker_idle_threshold) > 0) // only accumulate if there are 'too many' idle threads
 568                 {
 569                     worker_idle_seconds_accumulated += scoreboard.idle; // keep track of many idle 'thread seconds' we have
 570
 571                     dbg_printf("worker_idle_seconds_accumulated = %d, scoreboard.idle = %d, scoreboard.count = %d\n",
 572                        worker_idle_seconds_accumulated, scoreboard.idle, scoreboard.count);
 573                 }
 574
 575                 // Only consider ramp down if we have accumulated enough thread 'idle seconds'
 576                 // this logic will ensure that a large number of idle threads will ramp down faster
 577                 max_threads_to_stop = worker_idle_seconds_accumulated / WORKER_IDLE_SECONDS_THRESHOLD;
 578
 579                 if (max_threads_to_stop > 0)
 580                 {
 581                     worker_idle_seconds_accumulated = 0;
 582
 583                     if (max_threads_to_stop > (scoreboard.idle - worker_idle_threshold))
 584                         max_threads_to_stop = (scoreboard.idle - worker_idle_threshold);
 585
 586                     // Only stop threads if we actually have 'too many' idle ones in the pool
 587                     if (scoreboard.idle > worker_idle_threshold)
 588                     {
 589                         for (i = 0; i < max_threads_to_stop; i++)
 590                         {
 591                             dbg_puts("Removing one thread from the thread pool");
 592                             worker_stop();
 593                         }
 594                     }
 595                 }
 596             }
 597         }
 598
 599         pthread_mutex_unlock(&scoreboard.sb_wake_mtx);
 600     }
 601
 602     /*NOTREACHED*/
 603     return (NULL);
 604 }
 605
 606 static void
 607 manager_start(void)
 608 {
 609     pthread_t tid;
 610     int rv;
 611
 612     dbg_puts("starting the manager thread");
 613
 614     do {
 615         rv = pthread_create(&tid, &detached_attr, manager_main, NULL);
 616         if (rv == EAGAIN) {
 617             sleep(1);
 618         } else if (rv != 0) {
 619             /* FIXME: not nice */
 620             dbg_printf("thread creation failed, rv=%d", rv);
 621             abort();
 622         }
 623     } while (rv != 0);
 624
 625     wqlist_has_manager = 1;
 626 }
 627
 628 void
 629 manager_workqueue_additem(struct _pthread_workqueue *workq, struct work *witem)
 630 {
 631     unsigned int wqlist_index_bit = (0x1 << workq->wqlist_index);
 632
 633     if (workq->overcommit) {
 634         pthread_t tid;
 635
 636         pthread_mutex_lock(&ocwq_mtx);
 637         pthread_spin_lock(&workq->mtx);
 638         STAILQ_INSERT_TAIL(&workq->item_listhead, witem, item_entry);
 639         pthread_spin_unlock(&workq->mtx);
 640         ocwq_mask |= wqlist_index_bit;
 641         if (ocwq_idle_threads > 0) {
 642             dbg_puts("signaling an idle worker");
 643             pthread_cond_signal(&ocwq_has_work);
 644             ocwq_idle_threads--;
 645         } else {
 646             (void)pthread_create(&tid, &detached_attr, overcommit_worker_main, NULL);
 647         }
 648         pthread_mutex_unlock(&ocwq_mtx);
 649     } else {
 650         pthread_spin_lock(&workq->mtx);
 651
 652         // Only set the mask for the first item added to the workqueue.
 653         if (STAILQ_EMPTY(&workq->item_listhead))
 654         {
 655             unsigned int new_mask;
 656
 657             // The only possible contention here are with threads performing the same
 658             // operation on another workqueue, so we will not be blocked long...
 659             // Threads operating on the same workqueue will be serialized by the spinlock so it is very unlikely.
 660             do
 661             {
 662                 new_mask = atomic_or(&wqlist_mask, wqlist_index_bit);
 663             } while (!(new_mask & wqlist_index_bit));
 664         }
 665
 666         STAILQ_INSERT_TAIL(&workq->item_listhead, witem, item_entry);
 667
 668         pthread_spin_unlock(&workq->mtx);
 669
 670         // Only signal thread wakeup if there are idle threads available
 671         // and no other thread have managed to race us and empty the wqlist on our behalf already
 672         if ((scoreboard.idle > 0)) // && ((wqlist_mask & wqlist_index_bit) != 0)) // disabling this fringe optimization for now
 673         {
 674             pthread_mutex_lock(&wqlist_mtx);
 675             pthread_cond_signal(&wqlist_has_work);
 676             pthread_mutex_unlock(&wqlist_mtx);
 677         }
 678     }
 679 }
 680
 681
 682 static unsigned int
 683 get_process_limit(void)
 684 {
 685 #if __linux__
 686     struct rlimit rlim;
 687
 688     if (getrlimit(RLIMIT_NPROC, &rlim) < 0) {
 689         dbg_perror("getrlimit(2)");
 690         return (50);
 691     } else {
 692         return (rlim.rlim_max);
 693     }
 694 #else
 695     /* Solaris doesn't define this limit anywhere I can see.. */
 696     return (64);
 697 #endif
 698 }
 699
 700 static unsigned int
 701 get_load_average(void)
 702 {
 703     double loadavg;
 704
 705     /* TODO: proper error handling */
 706     if (getloadavg(&loadavg, 1) != 1) {
 707         dbg_perror("getloadavg(3)");
 708         return (1);
 709     }
 710     if (loadavg > INT_MAX || loadavg < 0)
 711         loadavg = 1;
 712
 713     return ((int) loadavg);
 714 }
 715
 716 unsigned long
 717 manager_peek(const char *key)
 718 {
 719     uint64_t rv;
 720
 721     if (strcmp(key, "combined_idle") == 0) {
 722         rv = scoreboard.idle;
 723         if (scoreboard.idle > worker_min)
 724             rv -= worker_min;
 725         rv += ocwq_idle_threads;
 726     } else if (strcmp(key, "idle") == 0) {
 727         rv = scoreboard.idle;
 728         if (scoreboard.idle > worker_min)
 729             rv -= worker_min;
 730     } else if (strcmp(key, "ocomm_idle") == 0) {
 731         rv = ocwq_idle_threads;
 732     } else {
 733         dbg_printf("invalid key: %s", key);
 734         abort();
 735     }
 736
 737     return rv;
 738 }