libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <signal.h>
   7 #include <stdlib.h>
   8 #include <pthread.h>
   9 #include <unistd.h>
  10
  11 #include "config.h"
  12
  13 #ifdef HAVE_DL_ITERATE_PHDR
  14 #include <link.h>
  15 #endif
  16
  17 #include "runtime.h"
  18 #include "arch.h"
  19 #include "defs.h"
  20 #include "malloc.h"
  21 #include "race.h"
  22 #include "go-type.h"
  23 #include "go-defer.h"
  24
  25 #ifdef USING_SPLIT_STACK
  26
  27 /* FIXME: These are not declared anywhere.  */
  28
  29 extern void __splitstack_getcontext(void *context[10]);
  30
  31 extern void __splitstack_setcontext(void *context[10]);
  32
  33 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  34
  35 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  36
  37 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  38                                void **);
  39
  40 extern void __splitstack_block_signals (int *, int *);
  41
  42 extern void __splitstack_block_signals_context (void *context[10], int *,
  43                                                 int *);
  44
  45 #endif
  46
  47 #ifndef PTHREAD_STACK_MIN
  48 # define PTHREAD_STACK_MIN 8192
  49 #endif
  50
  51 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  52 # define StackMin PTHREAD_STACK_MIN
  53 #else
  54 # define StackMin 2 * 1024 * 1024
  55 #endif
  56
  57 uintptr runtime_stacks_sys;
  58
  59 static void gtraceback(G*);
  60
  61 #ifdef __rtems__
  62 #define __thread
  63 #endif
  64
  65 static __thread G *g;
  66 static __thread M *m;
  67
  68 #ifndef SETCONTEXT_CLOBBERS_TLS
  69
  70 static inline void
  71 initcontext(void)
  72 {
  73 }
  74
  75 static inline void
  76 fixcontext(ucontext_t *c __attribute__ ((unused)))
  77 {
  78 }
  79
  80 #else
  81
  82 # if defined(__x86_64__) && defined(__sun__)
  83
  84 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
  85 // register to that of the thread which called getcontext.  The effect
  86 // is that the address of all __thread variables changes.  This bug
  87 // also affects pthread_self() and pthread_getspecific.  We work
  88 // around it by clobbering the context field directly to keep %fs the
  89 // same.
  90
  91 static __thread greg_t fs;
  92
  93 static inline void
  94 initcontext(void)
  95 {
  96         ucontext_t c;
  97
  98         getcontext(&c);
  99         fs = c.uc_mcontext.gregs[REG_FSBASE];
 100 }
 101
 102 static inline void
 103 fixcontext(ucontext_t* c)
 104 {
 105         c->uc_mcontext.gregs[REG_FSBASE] = fs;
 106 }
 107
 108 # elif defined(__NetBSD__)
 109
 110 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
 111 // and restore it ourselves.
 112
 113 static __thread __greg_t tlsbase;
 114
 115 static inline void
 116 initcontext(void)
 117 {
 118         ucontext_t c;
 119
 120         getcontext(&c);
 121         tlsbase = c.uc_mcontext._mc_tlsbase;
 122 }
 123
 124 static inline void
 125 fixcontext(ucontext_t* c)
 126 {
 127         c->uc_mcontext._mc_tlsbase = tlsbase;
 128 }
 129
 130 # else
 131
 132 #  error unknown case for SETCONTEXT_CLOBBERS_TLS
 133
 134 # endif
 135
 136 #endif
 137
 138 // We can not always refer to the TLS variables directly.  The
 139 // compiler will call tls_get_addr to get the address of the variable,
 140 // and it may hold it in a register across a call to schedule.  When
 141 // we get back from the call we may be running in a different thread,
 142 // in which case the register now points to the TLS variable for a
 143 // different thread.  We use non-inlinable functions to avoid this
 144 // when necessary.
 145
 146 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
 147
 148 G*
 149 runtime_g(void)
 150 {
 151         return g;
 152 }
 153
 154 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
 155
 156 M*
 157 runtime_m(void)
 158 {
 159         return m;
 160 }
 161
 162 // Set m and g.
 163 void
 164 runtime_setmg(M* mp, G* gp)
 165 {
 166         m = mp;
 167         g = gp;
 168 }
 169
 170 // The static TLS size.  See runtime_newm.
 171 static int tlssize;
 172
 173 // Start a new thread.
 174 static void
 175 runtime_newosproc(M *mp)
 176 {
 177         pthread_attr_t attr;
 178         size_t stacksize;
 179         sigset_t clear, old;
 180         pthread_t tid;
 181         int ret;
 182
 183         if(pthread_attr_init(&attr) != 0)
 184                 runtime_throw("pthread_attr_init");
 185         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
 186                 runtime_throw("pthread_attr_setdetachstate");
 187
 188         stacksize = PTHREAD_STACK_MIN;
 189
 190         // With glibc before version 2.16 the static TLS size is taken
 191         // out of the stack size, and we get an error or a crash if
 192         // there is not enough stack space left.  Add it back in if we
 193         // can, in case the program uses a lot of TLS space.  FIXME:
 194         // This can be disabled in glibc 2.16 and later, if the bug is
 195         // indeed fixed then.
 196         stacksize += tlssize;
 197
 198         if(pthread_attr_setstacksize(&attr, stacksize) != 0)
 199                 runtime_throw("pthread_attr_setstacksize");
 200
 201         // Block signals during pthread_create so that the new thread
 202         // starts with signals disabled.  It will enable them in minit.
 203         sigfillset(&clear);
 204
 205 #ifdef SIGTRAP
 206         // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
 207         sigdelset(&clear, SIGTRAP);
 208 #endif
 209
 210         sigemptyset(&old);
 211         sigprocmask(SIG_BLOCK, &clear, &old);
 212         ret = pthread_create(&tid, &attr, runtime_mstart, mp);
 213         sigprocmask(SIG_SETMASK, &old, nil);
 214
 215         if (ret != 0)
 216                 runtime_throw("pthread_create");
 217 }
 218
 219 // First function run by a new goroutine.  This replaces gogocall.
 220 static void
 221 kickoff(void)
 222 {
 223         void (*fn)(void*);
 224
 225         if(g->traceback != nil)
 226                 gtraceback(g);
 227
 228         fn = (void (*)(void*))(g->entry);
 229         fn(g->param);
 230         runtime_goexit();
 231 }
 232
 233 // Switch context to a different goroutine.  This is like longjmp.
 234 static void runtime_gogo(G*) __attribute__ ((noinline));
 235 static void
 236 runtime_gogo(G* newg)
 237 {
 238 #ifdef USING_SPLIT_STACK
 239         __splitstack_setcontext(&newg->stack_context[0]);
 240 #endif
 241         g = newg;
 242         newg->fromgogo = true;
 243         fixcontext(&newg->context);
 244         setcontext(&newg->context);
 245         runtime_throw("gogo setcontext returned");
 246 }
 247
 248 // Save context and call fn passing g as a parameter.  This is like
 249 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 250 // g->fromgogo as a code.  It will be true if we got here via
 251 // setcontext.  g == nil the first time this is called in a new m.
 252 static void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 253 static void
 254 runtime_mcall(void (*pfn)(G*))
 255 {
 256         M *mp;
 257         G *gp;
 258 #ifndef USING_SPLIT_STACK
 259         int i;
 260 #endif
 261
 262         // Ensure that all registers are on the stack for the garbage
 263         // collector.
 264         __builtin_unwind_init();
 265
 266         mp = m;
 267         gp = g;
 268         if(gp == mp->g0)
 269                 runtime_throw("runtime: mcall called on m->g0 stack");
 270
 271         if(gp != nil) {
 272
 273 #ifdef USING_SPLIT_STACK
 274                 __splitstack_getcontext(&g->stack_context[0]);
 275 #else
 276                 gp->gcnext_sp = &i;
 277 #endif
 278                 gp->fromgogo = false;
 279                 getcontext(&gp->context);
 280
 281                 // When we return from getcontext, we may be running
 282                 // in a new thread.  That means that m and g may have
 283                 // changed.  They are global variables so we will
 284                 // reload them, but the addresses of m and g may be
 285                 // cached in our local stack frame, and those
 286                 // addresses may be wrong.  Call functions to reload
 287                 // the values for this thread.
 288                 mp = runtime_m();
 289                 gp = runtime_g();
 290
 291                 if(gp->traceback != nil)
 292                         gtraceback(gp);
 293         }
 294         if (gp == nil || !gp->fromgogo) {
 295 #ifdef USING_SPLIT_STACK
 296                 __splitstack_setcontext(&mp->g0->stack_context[0]);
 297 #endif
 298                 mp->g0->entry = (byte*)pfn;
 299                 mp->g0->param = gp;
 300
 301                 // It's OK to set g directly here because this case
 302                 // can not occur if we got here via a setcontext to
 303                 // the getcontext call just above.
 304                 g = mp->g0;
 305
 306                 fixcontext(&mp->g0->context);
 307                 setcontext(&mp->g0->context);
 308                 runtime_throw("runtime: mcall function returned");
 309         }
 310 }
 311
 312 #ifdef HAVE_DL_ITERATE_PHDR
 313
 314 // Called via dl_iterate_phdr.
 315
 316 static int
 317 addtls(struct dl_phdr_info* info, size_t size __attribute__ ((unused)), void *data)
 318 {
 319         size_t *total = (size_t *)data;
 320         unsigned int i;
 321
 322         for(i = 0; i < info->dlpi_phnum; ++i) {
 323                 if(info->dlpi_phdr[i].p_type == PT_TLS)
 324                         *total += info->dlpi_phdr[i].p_memsz;
 325         }
 326         return 0;
 327 }
 328
 329 // Set the total TLS size.
 330
 331 static void
 332 inittlssize()
 333 {
 334         size_t total = 0;
 335
 336         dl_iterate_phdr(addtls, (void *)&total);
 337         tlssize = total;
 338 }
 339
 340 #else
 341
 342 static void
 343 inittlssize()
 344 {
 345 }
 346
 347 #endif
 348
 349 // Goroutine scheduler
 350 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
 351 //
 352 // The main concepts are:
 353 // G - goroutine.
 354 // M - worker thread, or machine.
 355 // P - processor, a resource that is required to execute Go code.
 356 //     M must have an associated P to execute Go code, however it can be
 357 //     blocked or in a syscall w/o an associated P.
 358 //
 359 // Design doc at http://golang.org/s/go11sched.
 360
 361 typedef struct Sched Sched;
 362 struct Sched {
 363         Lock;
 364
 365         uint64  goidgen;
 366         M*      midle;   // idle m's waiting for work
 367         int32   nmidle;  // number of idle m's waiting for work
 368         int32   mlocked; // number of locked m's waiting for work
 369         int32   mcount;  // number of m's that have been created
 370
 371         P*      pidle;  // idle P's
 372         uint32  npidle;
 373         uint32  nmspinning;
 374
 375         // Global runnable queue.
 376         G*      runqhead;
 377         G*      runqtail;
 378         int32   runqsize;
 379
 380         // Global cache of dead G's.
 381         Lock    gflock;
 382         G*      gfree;
 383
 384         int32   stopwait;
 385         Note    stopnote;
 386         uint32  sysmonwait;
 387         Note    sysmonnote;
 388         uint64  lastpoll;
 389
 390         int32   profilehz;      // cpu profiling rate
 391 };
 392
 393 // The max value of GOMAXPROCS.
 394 // There are no fundamental restrictions on the value.
 395 enum { MaxGomaxprocs = 1<<8 };
 396
 397 Sched   runtime_sched;
 398 int32   runtime_gomaxprocs;
 399 bool    runtime_singleproc;
 400 bool    runtime_iscgo = true;
 401 uint32  runtime_needextram = 1;
 402 uint32  runtime_gcwaiting;
 403 M       runtime_m0;
 404 G       runtime_g0;      // idle goroutine for m0
 405 G*      runtime_allg;
 406 G*      runtime_lastg;
 407 M*      runtime_allm;
 408 P**     runtime_allp;
 409 M*      runtime_extram;
 410 int8*   runtime_goos;
 411 int32   runtime_ncpu;
 412 static int32    newprocs;
 413
 414 void* runtime_mstart(void*);
 415 static void runqput(P*, G*);
 416 static G* runqget(P*);
 417 static void runqgrow(P*);
 418 static G* runqsteal(P*, P*);
 419 static void mput(M*);
 420 static M* mget(void);
 421 static void mcommoninit(M*);
 422 static void schedule(void);
 423 static void procresize(int32);
 424 static void acquirep(P*);
 425 static P* releasep(void);
 426 static void newm(void(*)(void), P*);
 427 static void stopm(void);
 428 static void startm(P*, bool);
 429 static void handoffp(P*);
 430 static void wakep(void);
 431 static void stoplockedm(void);
 432 static void startlockedm(G*);
 433 static void sysmon(void);
 434 static uint32 retake(uint32*);
 435 static void inclocked(int32);
 436 static void checkdead(void);
 437 static void exitsyscall0(G*);
 438 static void park0(G*);
 439 static void gosched0(G*);
 440 static void goexit0(G*);
 441 static void gfput(P*, G*);
 442 static G* gfget(P*);
 443 static void gfpurge(P*);
 444 static void globrunqput(G*);
 445 static G* globrunqget(P*);
 446 static P* pidleget(void);
 447 static void pidleput(P*);
 448 static void injectglist(G*);
 449
 450 // The bootstrap sequence is:
 451 //
 452 //      call osinit
 453 //      call schedinit
 454 //      make & queue new G
 455 //      call runtime_mstart
 456 //
 457 // The new G calls runtime_main.
 458 void
 459 runtime_schedinit(void)
 460 {
 461         int32 n, procs;
 462         const byte *p;
 463
 464         m = &runtime_m0;
 465         g = &runtime_g0;
 466         m->g0 = g;
 467         m->curg = g;
 468         g->m = m;
 469
 470         initcontext();
 471         inittlssize();
 472
 473         m->nomemprof++;
 474         runtime_mprofinit();
 475         runtime_mallocinit();
 476         mcommoninit(m);
 477
 478         runtime_goargs();
 479         runtime_goenvs();
 480
 481         // For debugging:
 482         // Allocate internal symbol table representation now,
 483         // so that we don't need to call malloc when we crash.
 484         // runtime_findfunc(0);
 485
 486         runtime_sched.lastpoll = runtime_nanotime();
 487         procs = 1;
 488         p = runtime_getenv("GOMAXPROCS");
 489         if(p != nil && (n = runtime_atoi(p)) > 0) {
 490                 if(n > MaxGomaxprocs)
 491                         n = MaxGomaxprocs;
 492                 procs = n;
 493         }
 494         runtime_allp = runtime_malloc((MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
 495         procresize(procs);
 496
 497         // Can not enable GC until all roots are registered.
 498         // mstats.enablegc = 1;
 499         m->nomemprof--;
 500 }
 501
 502 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
 503 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
 504
 505 // The main goroutine.
 506 void
 507 runtime_main(void* dummy __attribute__((unused)))
 508 {
 509         newm(sysmon, nil);
 510
 511         // Lock the main goroutine onto this, the main OS thread,
 512         // during initialization.  Most programs won't care, but a few
 513         // do require certain calls to be made by the main thread.
 514         // Those can arrange for main.main to run in the main thread
 515         // by calling runtime.LockOSThread during initialization
 516         // to preserve the lock.
 517         runtime_lockOSThread();
 518         if(m != &runtime_m0)
 519                 runtime_throw("runtime_main not on m0");
 520         __go_go(runtime_MHeap_Scavenger, nil);
 521         main_init();
 522         runtime_unlockOSThread();
 523
 524         // For gccgo we have to wait until after main is initialized
 525         // to enable GC, because initializing main registers the GC
 526         // roots.
 527         mstats.enablegc = 1;
 528
 529         main_main();
 530         if(raceenabled)
 531                 runtime_racefini();
 532
 533         // Make racy client program work: if panicking on
 534         // another goroutine at the same time as main returns,
 535         // let the other goroutine finish printing the panic trace.
 536         // Once it does, it will exit. See issue 3934.
 537         if(runtime_panicking)
 538                 runtime_park(nil, nil, "panicwait");
 539
 540         runtime_exit(0);
 541         for(;;)
 542                 *(int32*)0 = 0;
 543 }
 544
 545 void
 546 runtime_goroutineheader(G *gp)
 547 {
 548         const char *status;
 549
 550         switch(gp->status) {
 551         case Gidle:
 552                 status = "idle";
 553                 break;
 554         case Grunnable:
 555                 status = "runnable";
 556                 break;
 557         case Grunning:
 558                 status = "running";
 559                 break;
 560         case Gsyscall:
 561                 status = "syscall";
 562                 break;
 563         case Gwaiting:
 564                 if(gp->waitreason)
 565                         status = gp->waitreason;
 566                 else
 567                         status = "waiting";
 568                 break;
 569         default:
 570                 status = "???";
 571                 break;
 572         }
 573         runtime_printf("goroutine %D [%s]:\n", gp->goid, status);
 574 }
 575
 576 void
 577 runtime_goroutinetrailer(G *g)
 578 {
 579         if(g != nil && g->gopc != 0 && g->goid != 1) {
 580                 String fn;
 581                 String file;
 582                 intgo line;
 583
 584                 if(__go_file_line(g->gopc - 1, &fn, &file, &line)) {
 585                         runtime_printf("created by %S\n", fn);
 586                         runtime_printf("\t%S:%D\n", file, (int64) line);
 587                 }
 588         }
 589 }
 590
 591 struct Traceback
 592 {
 593         G* gp;
 594         Location locbuf[100];
 595         int32 c;
 596 };
 597
 598 void
 599 runtime_tracebackothers(G * volatile me)
 600 {
 601         G * volatile gp;
 602         Traceback tb;
 603         int32 traceback;
 604
 605         tb.gp = me;
 606         traceback = runtime_gotraceback(nil);
 607         for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
 608                 if(gp == me || gp->status == Gdead)
 609                         continue;
 610                 if(gp->issystem && traceback < 2)
 611                         continue;
 612                 runtime_printf("\n");
 613                 runtime_goroutineheader(gp);
 614
 615                 // Our only mechanism for doing a stack trace is
 616                 // _Unwind_Backtrace.  And that only works for the
 617                 // current thread, not for other random goroutines.
 618                 // So we need to switch context to the goroutine, get
 619                 // the backtrace, and then switch back.
 620
 621                 // This means that if g is running or in a syscall, we
 622                 // can't reliably print a stack trace.  FIXME.
 623                 if(gp->status == Gsyscall || gp->status == Grunning) {
 624                         runtime_printf("no stack trace available\n");
 625                         runtime_goroutinetrailer(gp);
 626                         continue;
 627                 }
 628
 629                 gp->traceback = &tb;
 630
 631 #ifdef USING_SPLIT_STACK
 632                 __splitstack_getcontext(&me->stack_context[0]);
 633 #endif
 634                 getcontext(&me->context);
 635
 636                 if(gp->traceback != nil) {
 637                         runtime_gogo(gp);
 638                 }
 639
 640                 runtime_printtrace(tb.locbuf, tb.c, false);
 641                 runtime_goroutinetrailer(gp);
 642         }
 643 }
 644
 645 // Do a stack trace of gp, and then restore the context to
 646 // gp->dotraceback.
 647
 648 static void
 649 gtraceback(G* gp)
 650 {
 651         Traceback* traceback;
 652
 653         traceback = gp->traceback;
 654         gp->traceback = nil;
 655         traceback->c = runtime_callers(1, traceback->locbuf,
 656                 sizeof traceback->locbuf / sizeof traceback->locbuf[0]);
 657         runtime_gogo(traceback->gp);
 658 }
 659
 660 static void
 661 mcommoninit(M *mp)
 662 {
 663         // If there is no mcache runtime_callers() will crash,
 664         // and we are most likely in sysmon thread so the stack is senseless anyway.
 665         if(m->mcache)
 666                 runtime_callers(1, mp->createstack, nelem(mp->createstack));
 667
 668         mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
 669
 670         runtime_lock(&runtime_sched);
 671         mp->id = runtime_sched.mcount++;
 672
 673         runtime_mpreinit(mp);
 674
 675         // Add to runtime_allm so garbage collector doesn't free m
 676         // when it is just in a register or thread-local storage.
 677         mp->alllink = runtime_allm;
 678         // runtime_NumCgoCall() iterates over allm w/o schedlock,
 679         // so we need to publish it safely.
 680         runtime_atomicstorep(&runtime_allm, mp);
 681         runtime_unlock(&runtime_sched);
 682 }
 683
 684 // Mark gp ready to run.
 685 void
 686 runtime_ready(G *gp)
 687 {
 688         // Mark runnable.
 689         if(gp->status != Gwaiting) {
 690                 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->status);
 691                 runtime_throw("bad g->status in ready");
 692         }
 693         gp->status = Grunnable;
 694         runqput(m->p, gp);
 695         if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0)  // TODO: fast atomic
 696                 wakep();
 697 }
 698
 699 int32
 700 runtime_gcprocs(void)
 701 {
 702         int32 n;
 703
 704         // Figure out how many CPUs to use during GC.
 705         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 706         runtime_lock(&runtime_sched);
 707         n = runtime_gomaxprocs;
 708         if(n > runtime_ncpu)
 709                 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
 710         if(n > MaxGcproc)
 711                 n = MaxGcproc;
 712         if(n > runtime_sched.nmidle+1) // one M is currently running
 713                 n = runtime_sched.nmidle+1;
 714         runtime_unlock(&runtime_sched);
 715         return n;
 716 }
 717
 718 static bool
 719 needaddgcproc(void)
 720 {
 721         int32 n;
 722
 723         runtime_lock(&runtime_sched);
 724         n = runtime_gomaxprocs;
 725         if(n > runtime_ncpu)
 726                 n = runtime_ncpu;
 727         if(n > MaxGcproc)
 728                 n = MaxGcproc;
 729         n -= runtime_sched.nmidle+1; // one M is currently running
 730         runtime_unlock(&runtime_sched);
 731         return n > 0;
 732 }
 733
 734 void
 735 runtime_helpgc(int32 nproc)
 736 {
 737         M *mp;
 738         int32 n, pos;
 739
 740         runtime_lock(&runtime_sched);
 741         pos = 0;
 742         for(n = 1; n < nproc; n++) {  // one M is currently running
 743                 if(runtime_allp[pos]->mcache == m->mcache)
 744                         pos++;
 745                 mp = mget();
 746                 if(mp == nil)
 747                         runtime_throw("runtime_gcprocs inconsistency");
 748                 mp->helpgc = n;
 749                 mp->mcache = runtime_allp[pos]->mcache;
 750                 pos++;
 751                 runtime_notewakeup(&mp->park);
 752         }
 753         runtime_unlock(&runtime_sched);
 754 }
 755
 756 void
 757 runtime_stoptheworld(void)
 758 {
 759         int32 i;
 760         uint32 s;
 761         P *p;
 762         bool wait;
 763
 764         runtime_lock(&runtime_sched);
 765         runtime_sched.stopwait = runtime_gomaxprocs;
 766         runtime_atomicstore((uint32*)&runtime_gcwaiting, 1);
 767         // stop current P
 768         m->p->status = Pgcstop;
 769         runtime_sched.stopwait--;
 770         // try to retake all P's in Psyscall status
 771         for(i = 0; i < runtime_gomaxprocs; i++) {
 772                 p = runtime_allp[i];
 773                 s = p->status;
 774                 if(s == Psyscall && runtime_cas(&p->status, s, Pgcstop))
 775                         runtime_sched.stopwait--;
 776         }
 777         // stop idle P's
 778         while((p = pidleget()) != nil) {
 779                 p->status = Pgcstop;
 780                 runtime_sched.stopwait--;
 781         }
 782         wait = runtime_sched.stopwait > 0;
 783         runtime_unlock(&runtime_sched);
 784
 785         // wait for remaining P's to stop voluntary
 786         if(wait) {
 787                 runtime_notesleep(&runtime_sched.stopnote);
 788                 runtime_noteclear(&runtime_sched.stopnote);
 789         }
 790         if(runtime_sched.stopwait)
 791                 runtime_throw("stoptheworld: not stopped");
 792         for(i = 0; i < runtime_gomaxprocs; i++) {
 793                 p = runtime_allp[i];
 794                 if(p->status != Pgcstop)
 795                         runtime_throw("stoptheworld: not stopped");
 796         }
 797 }
 798
 799 static void
 800 mhelpgc(void)
 801 {
 802         m->helpgc = -1;
 803 }
 804
 805 void
 806 runtime_starttheworld(void)
 807 {
 808         P *p, *p1;
 809         M *mp;
 810         G *gp;
 811         bool add;
 812
 813         gp = runtime_netpoll(false);  // non-blocking
 814         injectglist(gp);
 815         add = needaddgcproc();
 816         runtime_lock(&runtime_sched);
 817         if(newprocs) {
 818                 procresize(newprocs);
 819                 newprocs = 0;
 820         } else
 821                 procresize(runtime_gomaxprocs);
 822         runtime_gcwaiting = 0;
 823
 824         p1 = nil;
 825         while((p = pidleget()) != nil) {
 826                 // procresize() puts p's with work at the beginning of the list.
 827                 // Once we reach a p without a run queue, the rest don't have one either.
 828                 if(p->runqhead == p->runqtail) {
 829                         pidleput(p);
 830                         break;
 831                 }
 832                 mp = mget();
 833                 if(mp == nil) {
 834                         p->link = p1;
 835                         p1 = p;
 836                         continue;
 837                 }
 838                 if(mp->nextp)
 839                         runtime_throw("starttheworld: inconsistent mp->nextp");
 840                 mp->nextp = p;
 841                 runtime_notewakeup(&mp->park);
 842         }
 843         if(runtime_sched.sysmonwait) {
 844                 runtime_sched.sysmonwait = false;
 845                 runtime_notewakeup(&runtime_sched.sysmonnote);
 846         }
 847         runtime_unlock(&runtime_sched);
 848
 849         while(p1) {
 850                 p = p1;
 851                 p1 = p1->link;
 852                 add = false;
 853                 newm(nil, p);
 854         }
 855
 856         if(add) {
 857                 // If GC could have used another helper proc, start one now,
 858                 // in the hope that it will be available next time.
 859                 // It would have been even better to start it before the collection,
 860                 // but doing so requires allocating memory, so it's tricky to
 861                 // coordinate.  This lazy approach works out in practice:
 862                 // we don't mind if the first couple gc rounds don't have quite
 863                 // the maximum number of procs.
 864                 newm(mhelpgc, nil);
 865         }
 866 }
 867
 868 // Called to start an M.
 869 void*
 870 runtime_mstart(void* mp)
 871 {
 872         m = (M*)mp;
 873         g = m->g0;
 874
 875         initcontext();
 876
 877         g->entry = nil;
 878         g->param = nil;
 879
 880         // Record top of stack for use by mcall.
 881         // Once we call schedule we're never coming back,
 882         // so other calls can reuse this stack space.
 883 #ifdef USING_SPLIT_STACK
 884         __splitstack_getcontext(&g->stack_context[0]);
 885 #else
 886         g->gcinitial_sp = &mp;
 887         // Setting gcstack_size to 0 is a marker meaning that gcinitial_sp
 888         // is the top of the stack, not the bottom.
 889         g->gcstack_size = 0;
 890         g->gcnext_sp = &mp;
 891 #endif
 892         getcontext(&g->context);
 893
 894         if(g->entry != nil) {
 895                 // Got here from mcall.
 896                 void (*pfn)(G*) = (void (*)(G*))g->entry;
 897                 G* gp = (G*)g->param;
 898                 pfn(gp);
 899                 *(int*)0x21 = 0x21;
 900         }
 901         runtime_minit();
 902
 903 #ifdef USING_SPLIT_STACK
 904         {
 905                 int dont_block_signals = 0;
 906                 __splitstack_block_signals(&dont_block_signals, nil);
 907         }
 908 #endif
 909
 910         // Install signal handlers; after minit so that minit can
 911         // prepare the thread to be able to handle the signals.
 912         if(m == &runtime_m0) {
 913                 runtime_initsig();
 914                 if(runtime_iscgo)
 915                         runtime_newextram();
 916         }
 917
 918         if(m->mstartfn)
 919                 m->mstartfn();
 920
 921         if(m->helpgc) {
 922                 m->helpgc = 0;
 923                 stopm();
 924         } else if(m != &runtime_m0) {
 925                 acquirep(m->nextp);
 926                 m->nextp = nil;
 927         }
 928         schedule();
 929
 930         // TODO(brainman): This point is never reached, because scheduler
 931         // does not release os threads at the moment. But once this path
 932         // is enabled, we must remove our seh here.
 933
 934         return nil;
 935 }
 936
 937 typedef struct CgoThreadStart CgoThreadStart;
 938 struct CgoThreadStart
 939 {
 940         M *m;
 941         G *g;
 942         void (*fn)(void);
 943 };
 944
 945 // Allocate a new m unassociated with any thread.
 946 // Can use p for allocation context if needed.
 947 M*
 948 runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, size_t* ret_g0_stacksize)
 949 {
 950         M *mp;
 951
 952         m->locks++;  // disable GC because it can be called from sysmon
 953         if(m->p == nil)
 954                 acquirep(p);  // temporarily borrow p for mallocs in this function
 955 #if 0
 956         if(mtype == nil) {
 957                 Eface e;
 958                 runtime_gc_m_ptr(&e);
 959                 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
 960         }
 961 #endif
 962
 963         mp = runtime_mal(sizeof *mp);
 964         mcommoninit(mp);
 965         mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
 966
 967         if(p == m->p)
 968                 releasep();
 969         m->locks--;
 970
 971         return mp;
 972 }
 973
 974 static M* lockextra(bool nilokay);
 975 static void unlockextra(M*);
 976
 977 // needm is called when a cgo callback happens on a
 978 // thread without an m (a thread not created by Go).
 979 // In this case, needm is expected to find an m to use
 980 // and return with m, g initialized correctly.
 981 // Since m and g are not set now (likely nil, but see below)
 982 // needm is limited in what routines it can call. In particular
 983 // it can only call nosplit functions (textflag 7) and cannot
 984 // do any scheduling that requires an m.
 985 //
 986 // In order to avoid needing heavy lifting here, we adopt
 987 // the following strategy: there is a stack of available m's
 988 // that can be stolen. Using compare-and-swap
 989 // to pop from the stack has ABA races, so we simulate
 990 // a lock by doing an exchange (via casp) to steal the stack
 991 // head and replace the top pointer with MLOCKED (1).
 992 // This serves as a simple spin lock that we can use even
 993 // without an m. The thread that locks the stack in this way
 994 // unlocks the stack by storing a valid stack head pointer.
 995 //
 996 // In order to make sure that there is always an m structure
 997 // available to be stolen, we maintain the invariant that there
 998 // is always one more than needed. At the beginning of the
 999 // program (if cgo is in use) the list is seeded with a single m.
1000 // If needm finds that it has taken the last m off the list, its job
1001 // is - once it has installed its own m so that it can do things like
1002 // allocate memory - to create a spare m and put it on the list.
1003 //
1004 // Each of these extra m's also has a g0 and a curg that are
1005 // pressed into service as the scheduling stack and current
1006 // goroutine for the duration of the cgo callback.
1007 //
1008 // When the callback is done with the m, it calls dropm to
1009 // put the m back on the list.
1010 //
1011 // Unlike the gc toolchain, we start running on curg, since we are
1012 // just going to return and let the caller continue.
1013 void
1014 runtime_needm(void)
1015 {
1016         M *mp;
1017
1018         // Lock extra list, take head, unlock popped list.
1019         // nilokay=false is safe here because of the invariant above,
1020         // that the extra list always contains or will soon contain
1021         // at least one m.
1022         mp = lockextra(false);
1023
1024         // Set needextram when we've just emptied the list,
1025         // so that the eventual call into cgocallbackg will
1026         // allocate a new m for the extra list. We delay the
1027         // allocation until then so that it can be done
1028         // after exitsyscall makes sure it is okay to be
1029         // running at all (that is, there's no garbage collection
1030         // running right now).
1031         mp->needextram = mp->schedlink == nil;
1032         unlockextra(mp->schedlink);
1033
1034         // Install m and g (= m->curg).
1035         runtime_setmg(mp, mp->curg);
1036
1037         // Initialize g's context as in mstart.
1038         initcontext();
1039         g->status = Gsyscall;
1040         g->entry = nil;
1041         g->param = nil;
1042 #ifdef USING_SPLIT_STACK
1043         __splitstack_getcontext(&g->stack_context[0]);
1044 #else
1045         g->gcinitial_sp = &mp;
1046         g->gcstack_size = 0;
1047         g->gcnext_sp = &mp;
1048 #endif
1049         getcontext(&g->context);
1050
1051         if(g->entry != nil) {
1052                 // Got here from mcall.
1053                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1054                 G* gp = (G*)g->param;
1055                 pfn(gp);
1056                 *(int*)0x22 = 0x22;
1057         }
1058
1059         // Initialize this thread to use the m.
1060         runtime_minit();
1061
1062 #ifdef USING_SPLIT_STACK
1063         {
1064                 int dont_block_signals = 0;
1065                 __splitstack_block_signals(&dont_block_signals, nil);
1066         }
1067 #endif
1068 }
1069
1070 // newextram allocates an m and puts it on the extra list.
1071 // It is called with a working local m, so that it can do things
1072 // like call schedlock and allocate.
1073 void
1074 runtime_newextram(void)
1075 {
1076         M *mp, *mnext;
1077         G *gp;
1078         byte *g0_sp, *sp;
1079         size_t g0_spsize, spsize;
1080
1081         // Create extra goroutine locked to extra m.
1082         // The goroutine is the context in which the cgo callback will run.
1083         // The sched.pc will never be returned to, but setting it to
1084         // runtime.goexit makes clear to the traceback routines where
1085         // the goroutine stack ends.
1086         mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
1087         gp = runtime_malg(StackMin, &sp, &spsize);
1088         gp->status = Gdead;
1089         mp->curg = gp;
1090         mp->locked = LockInternal;
1091         mp->lockedg = gp;
1092         gp->lockedm = mp;
1093         // put on allg for garbage collector
1094         runtime_lock(&runtime_sched);
1095         if(runtime_lastg == nil)
1096                 runtime_allg = gp;
1097         else
1098                 runtime_lastg->alllink = gp;
1099         runtime_lastg = gp;
1100         runtime_unlock(&runtime_sched);
1101         gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
1102
1103         // The context for gp will be set up in runtime_needm.  But
1104         // here we need to set up the context for g0.
1105         getcontext(&mp->g0->context);
1106         mp->g0->context.uc_stack.ss_sp = g0_sp;
1107 #ifdef MAKECONTEXT_STACK_TOP
1108         mp->g0->context.uc_stack.ss_sp += g0_spsize;
1109 #endif
1110         mp->g0->context.uc_stack.ss_size = g0_spsize;
1111         makecontext(&mp->g0->context, kickoff, 0);
1112
1113         // Add m to the extra list.
1114         mnext = lockextra(true);
1115         mp->schedlink = mnext;
1116         unlockextra(mp);
1117 }
1118
1119 // dropm is called when a cgo callback has called needm but is now
1120 // done with the callback and returning back into the non-Go thread.
1121 // It puts the current m back onto the extra list.
1122 //
1123 // The main expense here is the call to signalstack to release the
1124 // m's signal stack, and then the call to needm on the next callback
1125 // from this thread. It is tempting to try to save the m for next time,
1126 // which would eliminate both these costs, but there might not be
1127 // a next time: the current thread (which Go does not control) might exit.
1128 // If we saved the m for that thread, there would be an m leak each time
1129 // such a thread exited. Instead, we acquire and release an m on each
1130 // call. These should typically not be scheduling operations, just a few
1131 // atomics, so the cost should be small.
1132 //
1133 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1134 // variable using pthread_key_create. Unlike the pthread keys we already use
1135 // on OS X, this dummy key would never be read by Go code. It would exist
1136 // only so that we could register at thread-exit-time destructor.
1137 // That destructor would put the m back onto the extra list.
1138 // This is purely a performance optimization. The current version,
1139 // in which dropm happens on each cgo call, is still correct too.
1140 // We may have to keep the current version on systems with cgo
1141 // but without pthreads, like Windows.
1142 void
1143 runtime_dropm(void)
1144 {
1145         M *mp, *mnext;
1146
1147         // Undo whatever initialization minit did during needm.
1148         runtime_unminit();
1149
1150         // Clear m and g, and return m to the extra list.
1151         // After the call to setmg we can only call nosplit functions.
1152         mp = m;
1153         runtime_setmg(nil, nil);
1154
1155         mp->curg->status = Gdead;
1156
1157         mnext = lockextra(true);
1158         mp->schedlink = mnext;
1159         unlockextra(mp);
1160 }
1161
1162 #define MLOCKED ((M*)1)
1163
1164 // lockextra locks the extra list and returns the list head.
1165 // The caller must unlock the list by storing a new list head
1166 // to runtime.extram. If nilokay is true, then lockextra will
1167 // return a nil list head if that's what it finds. If nilokay is false,
1168 // lockextra will keep waiting until the list head is no longer nil.
1169 static M*
1170 lockextra(bool nilokay)
1171 {
1172         M *mp;
1173         void (*yield)(void);
1174
1175         for(;;) {
1176                 mp = runtime_atomicloadp(&runtime_extram);
1177                 if(mp == MLOCKED) {
1178                         yield = runtime_osyield;
1179                         yield();
1180                         continue;
1181                 }
1182                 if(mp == nil && !nilokay) {
1183                         runtime_usleep(1);
1184                         continue;
1185                 }
1186                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1187                         yield = runtime_osyield;
1188                         yield();
1189                         continue;
1190                 }
1191                 break;
1192         }
1193         return mp;
1194 }
1195
1196 static void
1197 unlockextra(M *mp)
1198 {
1199         runtime_atomicstorep(&runtime_extram, mp);
1200 }
1201
1202 static int32
1203 countextra()
1204 {
1205         M *mp, *mc;
1206         int32 c;
1207
1208         for(;;) {
1209                 mp = runtime_atomicloadp(&runtime_extram);
1210                 if(mp == MLOCKED) {
1211                         runtime_osyield();
1212                         continue;
1213                 }
1214                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1215                         runtime_osyield();
1216                         continue;
1217                 }
1218                 c = 0;
1219                 for(mc = mp; mc != nil; mc = mc->schedlink)
1220                         c++;
1221                 runtime_atomicstorep(&runtime_extram, mp);
1222                 return c;
1223         }
1224 }
1225
1226 // Create a new m.  It will start off with a call to fn, or else the scheduler.
1227 static void
1228 newm(void(*fn)(void), P *p)
1229 {
1230         M *mp;
1231
1232         mp = runtime_allocm(p, -1, nil, nil);
1233         mp->nextp = p;
1234         mp->mstartfn = fn;
1235
1236         runtime_newosproc(mp);
1237 }
1238
1239 // Stops execution of the current m until new work is available.
1240 // Returns with acquired P.
1241 static void
1242 stopm(void)
1243 {
1244         if(m->locks)
1245                 runtime_throw("stopm holding locks");
1246         if(m->p)
1247                 runtime_throw("stopm holding p");
1248         if(m->spinning) {
1249                 m->spinning = false;
1250                 runtime_xadd(&runtime_sched.nmspinning, -1);
1251         }
1252
1253 retry:
1254         runtime_lock(&runtime_sched);
1255         mput(m);
1256         runtime_unlock(&runtime_sched);
1257         runtime_notesleep(&m->park);
1258         runtime_noteclear(&m->park);
1259         if(m->helpgc) {
1260                 runtime_gchelper();
1261                 m->helpgc = 0;
1262                 m->mcache = nil;
1263                 goto retry;
1264         }
1265         acquirep(m->nextp);
1266         m->nextp = nil;
1267 }
1268
1269 static void
1270 mspinning(void)
1271 {
1272         m->spinning = true;
1273 }
1274
1275 // Schedules some M to run the p (creates an M if necessary).
1276 // If p==nil, tries to get an idle P, if no idle P's returns false.
1277 static void
1278 startm(P *p, bool spinning)
1279 {
1280         M *mp;
1281         void (*fn)(void);
1282
1283         runtime_lock(&runtime_sched);
1284         if(p == nil) {
1285                 p = pidleget();
1286                 if(p == nil) {
1287                         runtime_unlock(&runtime_sched);
1288                         if(spinning)
1289                                 runtime_xadd(&runtime_sched.nmspinning, -1);
1290                         return;
1291                 }
1292         }
1293         mp = mget();
1294         runtime_unlock(&runtime_sched);
1295         if(mp == nil) {
1296                 fn = nil;
1297                 if(spinning)
1298                         fn = mspinning;
1299                 newm(fn, p);
1300                 return;
1301         }
1302         if(mp->spinning)
1303                 runtime_throw("startm: m is spinning");
1304         if(mp->nextp)
1305                 runtime_throw("startm: m has p");
1306         mp->spinning = spinning;
1307         mp->nextp = p;
1308         runtime_notewakeup(&mp->park);
1309 }
1310
1311 // Hands off P from syscall or locked M.
1312 static void
1313 handoffp(P *p)
1314 {
1315         // if it has local work, start it straight away
1316         if(p->runqhead != p->runqtail || runtime_sched.runqsize) {
1317                 startm(p, false);
1318                 return;
1319         }
1320         // no local work, check that there are no spinning/idle M's,
1321         // otherwise our help is not required
1322         if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 &&  // TODO: fast atomic
1323                 runtime_cas(&runtime_sched.nmspinning, 0, 1)) {
1324                 startm(p, true);
1325                 return;
1326         }
1327         runtime_lock(&runtime_sched);
1328         if(runtime_gcwaiting) {
1329                 p->status = Pgcstop;
1330                 if(--runtime_sched.stopwait == 0)
1331                         runtime_notewakeup(&runtime_sched.stopnote);
1332                 runtime_unlock(&runtime_sched);
1333                 return;
1334         }
1335         if(runtime_sched.runqsize) {
1336                 runtime_unlock(&runtime_sched);
1337                 startm(p, false);
1338                 return;
1339         }
1340         // If this is the last running P and nobody is polling network,
1341         // need to wakeup another M to poll network.
1342         if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) {
1343                 runtime_unlock(&runtime_sched);
1344                 startm(p, false);
1345                 return;
1346         }
1347         pidleput(p);
1348         runtime_unlock(&runtime_sched);
1349 }
1350
1351 // Tries to add one more P to execute G's.
1352 // Called when a G is made runnable (newproc, ready).
1353 static void
1354 wakep(void)
1355 {
1356         // be conservative about spinning threads
1357         if(!runtime_cas(&runtime_sched.nmspinning, 0, 1))
1358                 return;
1359         startm(nil, true);
1360 }
1361
1362 // Stops execution of the current m that is locked to a g until the g is runnable again.
1363 // Returns with acquired P.
1364 static void
1365 stoplockedm(void)
1366 {
1367         P *p;
1368
1369         if(m->lockedg == nil || m->lockedg->lockedm != m)
1370                 runtime_throw("stoplockedm: inconsistent locking");
1371         if(m->p) {
1372                 // Schedule another M to run this p.
1373                 p = releasep();
1374                 handoffp(p);
1375         }
1376         inclocked(1);
1377         // Wait until another thread schedules lockedg again.
1378         runtime_notesleep(&m->park);
1379         runtime_noteclear(&m->park);
1380         if(m->lockedg->status != Grunnable)
1381                 runtime_throw("stoplockedm: not runnable");
1382         acquirep(m->nextp);
1383         m->nextp = nil;
1384 }
1385
1386 // Schedules the locked m to run the locked gp.
1387 static void
1388 startlockedm(G *gp)
1389 {
1390         M *mp;
1391         P *p;
1392
1393         mp = gp->lockedm;
1394         if(mp == m)
1395                 runtime_throw("startlockedm: locked to me");
1396         if(mp->nextp)
1397                 runtime_throw("startlockedm: m has p");
1398         // directly handoff current P to the locked m
1399         inclocked(-1);
1400         p = releasep();
1401         mp->nextp = p;
1402         runtime_notewakeup(&mp->park);
1403         stopm();
1404 }
1405
1406 // Stops the current m for stoptheworld.
1407 // Returns when the world is restarted.
1408 static void
1409 gcstopm(void)
1410 {
1411         P *p;
1412
1413         if(!runtime_gcwaiting)
1414                 runtime_throw("gcstopm: not waiting for gc");
1415         if(m->spinning) {
1416                 m->spinning = false;
1417                 runtime_xadd(&runtime_sched.nmspinning, -1);
1418         }
1419         p = releasep();
1420         runtime_lock(&runtime_sched);
1421         p->status = Pgcstop;
1422         if(--runtime_sched.stopwait == 0)
1423                 runtime_notewakeup(&runtime_sched.stopnote);
1424         runtime_unlock(&runtime_sched);
1425         stopm();
1426 }
1427
1428 // Schedules gp to run on the current M.
1429 // Never returns.
1430 static void
1431 execute(G *gp)
1432 {
1433         int32 hz;
1434
1435         if(gp->status != Grunnable) {
1436                 runtime_printf("execute: bad g status %d\n", gp->status);
1437                 runtime_throw("execute: bad g status");
1438         }
1439         gp->status = Grunning;
1440         m->p->tick++;
1441         m->curg = gp;
1442         gp->m = m;
1443
1444         // Check whether the profiler needs to be turned on or off.
1445         hz = runtime_sched.profilehz;
1446         if(m->profilehz != hz)
1447                 runtime_resetcpuprofiler(hz);
1448
1449         runtime_gogo(gp);
1450 }
1451
1452 // Finds a runnable goroutine to execute.
1453 // Tries to steal from other P's, get g from global queue, poll network.
1454 static G*
1455 findrunnable(void)
1456 {
1457         G *gp;
1458         P *p;
1459         int32 i;
1460
1461 top:
1462         if(runtime_gcwaiting) {
1463                 gcstopm();
1464                 goto top;
1465         }
1466         // local runq
1467         gp = runqget(m->p);
1468         if(gp)
1469                 return gp;
1470         // global runq
1471         if(runtime_sched.runqsize) {
1472                 runtime_lock(&runtime_sched);
1473                 gp = globrunqget(m->p);
1474                 runtime_unlock(&runtime_sched);
1475                 if(gp)
1476                         return gp;
1477         }
1478         // poll network
1479         gp = runtime_netpoll(false);  // non-blocking
1480         if(gp) {
1481                 injectglist(gp->schedlink);
1482                 gp->status = Grunnable;
1483                 return gp;
1484         }
1485         // If number of spinning M's >= number of busy P's, block.
1486         // This is necessary to prevent excessive CPU consumption
1487         // when GOMAXPROCS>>1 but the program parallelism is low.
1488         if(!m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle))  // TODO: fast atomic
1489                 goto stop;
1490         if(!m->spinning) {
1491                 m->spinning = true;
1492                 runtime_xadd(&runtime_sched.nmspinning, 1);
1493         }
1494         // random steal from other P's
1495         for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1496                 if(runtime_gcwaiting)
1497                         goto top;
1498                 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1499                 if(p == m->p)
1500                         gp = runqget(p);
1501                 else
1502                         gp = runqsteal(m->p, p);
1503                 if(gp)
1504                         return gp;
1505         }
1506 stop:
1507         // return P and block
1508         runtime_lock(&runtime_sched);
1509         if(runtime_gcwaiting) {
1510                 runtime_unlock(&runtime_sched);
1511                 goto top;
1512         }
1513         if(runtime_sched.runqsize) {
1514                 gp = globrunqget(m->p);
1515                 runtime_unlock(&runtime_sched);
1516                 return gp;
1517         }
1518         p = releasep();
1519         pidleput(p);
1520         runtime_unlock(&runtime_sched);
1521         if(m->spinning) {
1522                 m->spinning = false;
1523                 runtime_xadd(&runtime_sched.nmspinning, -1);
1524         }
1525         // check all runqueues once again
1526         for(i = 0; i < runtime_gomaxprocs; i++) {
1527                 p = runtime_allp[i];
1528                 if(p && p->runqhead != p->runqtail) {
1529                         runtime_lock(&runtime_sched);
1530                         p = pidleget();
1531                         runtime_unlock(&runtime_sched);
1532                         if(p) {
1533                                 acquirep(p);
1534                                 goto top;
1535                         }
1536                         break;
1537                 }
1538         }
1539         // poll network
1540         if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) {
1541                 if(m->p)
1542                         runtime_throw("findrunnable: netpoll with p");
1543                 if(m->spinning)
1544                         runtime_throw("findrunnable: netpoll with spinning");
1545                 gp = runtime_netpoll(true);  // block until new work is available
1546                 runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime());
1547                 if(gp) {
1548                         runtime_lock(&runtime_sched);
1549                         p = pidleget();
1550                         runtime_unlock(&runtime_sched);
1551                         if(p) {
1552                                 acquirep(p);
1553                                 injectglist(gp->schedlink);
1554                                 gp->status = Grunnable;
1555                                 return gp;
1556                         }
1557                         injectglist(gp);
1558                 }
1559         }
1560         stopm();
1561         goto top;
1562 }
1563
1564 // Injects the list of runnable G's into the scheduler.
1565 // Can run concurrently with GC.
1566 static void
1567 injectglist(G *glist)
1568 {
1569         int32 n;
1570         G *gp;
1571
1572         if(glist == nil)
1573                 return;
1574         runtime_lock(&runtime_sched);
1575         for(n = 0; glist; n++) {
1576                 gp = glist;
1577                 glist = gp->schedlink;
1578                 gp->status = Grunnable;
1579                 globrunqput(gp);
1580         }
1581         runtime_unlock(&runtime_sched);
1582
1583         for(; n && runtime_sched.npidle; n--)
1584                 startm(nil, false);
1585 }
1586
1587 // One round of scheduler: find a runnable goroutine and execute it.
1588 // Never returns.
1589 static void
1590 schedule(void)
1591 {
1592         G *gp;
1593
1594         if(m->locks)
1595                 runtime_throw("schedule: holding locks");
1596
1597 top:
1598         if(runtime_gcwaiting) {
1599                 gcstopm();
1600                 goto top;
1601         }
1602
1603         gp = runqget(m->p);
1604         if(gp == nil)
1605                 gp = findrunnable();
1606
1607         if(m->spinning) {
1608                 m->spinning = false;
1609                 runtime_xadd(&runtime_sched.nmspinning, -1);
1610         }
1611
1612         // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1613         // so see if we need to wakeup another M here.
1614         if (m->p->runqhead != m->p->runqtail &&
1615                 runtime_atomicload(&runtime_sched.nmspinning) == 0 &&
1616                 runtime_atomicload(&runtime_sched.npidle) > 0)  // TODO: fast atomic
1617                 wakep();
1618
1619         if(gp->lockedm) {
1620                 startlockedm(gp);
1621                 goto top;
1622         }
1623
1624         execute(gp);
1625 }
1626
1627 // Puts the current goroutine into a waiting state and unlocks the lock.
1628 // The goroutine can be made runnable again by calling runtime_ready(gp).
1629 void
1630 runtime_park(void(*unlockf)(Lock*), Lock *lock, const char *reason)
1631 {
1632         m->waitlock = lock;
1633         m->waitunlockf = unlockf;
1634         g->waitreason = reason;
1635         runtime_mcall(park0);
1636 }
1637
1638 // runtime_park continuation on g0.
1639 static void
1640 park0(G *gp)
1641 {
1642         gp->status = Gwaiting;
1643         gp->m = nil;
1644         m->curg = nil;
1645         if(m->waitunlockf) {
1646                 m->waitunlockf(m->waitlock);
1647                 m->waitunlockf = nil;
1648                 m->waitlock = nil;
1649         }
1650         if(m->lockedg) {
1651                 stoplockedm();
1652                 execute(gp);  // Never returns.
1653         }
1654         schedule();
1655 }
1656
1657 // Scheduler yield.
1658 void
1659 runtime_gosched(void)
1660 {
1661         runtime_mcall(gosched0);
1662 }
1663
1664 // runtime_gosched continuation on g0.
1665 static void
1666 gosched0(G *gp)
1667 {
1668         gp->status = Grunnable;
1669         gp->m = nil;
1670         m->curg = nil;
1671         runtime_lock(&runtime_sched);
1672         globrunqput(gp);
1673         runtime_unlock(&runtime_sched);
1674         if(m->lockedg) {
1675                 stoplockedm();
1676                 execute(gp);  // Never returns.
1677         }
1678         schedule();
1679 }
1680
1681 // Finishes execution of the current goroutine.
1682 void
1683 runtime_goexit(void)
1684 {
1685         if(raceenabled)
1686                 runtime_racegoend();
1687         runtime_mcall(goexit0);
1688 }
1689
1690 // runtime_goexit continuation on g0.
1691 static void
1692 goexit0(G *gp)
1693 {
1694         gp->status = Gdead;
1695         gp->entry = nil;
1696         gp->m = nil;
1697         gp->lockedm = nil;
1698         m->curg = nil;
1699         m->lockedg = nil;
1700         if(m->locked & ~LockExternal) {
1701                 runtime_printf("invalid m->locked = %d", m->locked);
1702                 runtime_throw("internal lockOSThread error");
1703         }
1704         m->locked = 0;
1705         gfput(m->p, gp);
1706         schedule();
1707 }
1708
1709 // The goroutine g is about to enter a system call.
1710 // Record that it's not using the cpu anymore.
1711 // This is called only from the go syscall library and cgocall,
1712 // not from the low-level system calls used by the runtime.
1713 //
1714 // Entersyscall cannot split the stack: the runtime_gosave must
1715 // make g->sched refer to the caller's stack segment, because
1716 // entersyscall is going to return immediately after.
1717
1718 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
1719
1720 void
1721 runtime_entersyscall()
1722 {
1723         if(m->profilehz > 0)
1724                 runtime_setprof(false);
1725
1726         // Leave SP around for gc and traceback.
1727 #ifdef USING_SPLIT_STACK
1728         g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
1729                                        &g->gcnext_segment, &g->gcnext_sp,
1730                                        &g->gcinitial_sp);
1731 #else
1732         {
1733                 uint32 v;
1734
1735                 g->gcnext_sp = (byte *) &v;
1736         }
1737 #endif
1738
1739         // Save the registers in the g structure so that any pointers
1740         // held in registers will be seen by the garbage collector.
1741         getcontext(&g->gcregs);
1742
1743         g->status = Gsyscall;
1744
1745         if(runtime_atomicload(&runtime_sched.sysmonwait)) {  // TODO: fast atomic
1746                 runtime_lock(&runtime_sched);
1747                 if(runtime_atomicload(&runtime_sched.sysmonwait)) {
1748                         runtime_atomicstore(&runtime_sched.sysmonwait, 0);
1749                         runtime_notewakeup(&runtime_sched.sysmonnote);
1750                 }
1751                 runtime_unlock(&runtime_sched);
1752         }
1753
1754         m->mcache = nil;
1755         m->p->tick++;
1756         m->p->m = nil;
1757         runtime_atomicstore(&m->p->status, Psyscall);
1758         if(runtime_gcwaiting) {
1759                 runtime_lock(&runtime_sched);
1760                 if (runtime_sched.stopwait > 0 && runtime_cas(&m->p->status, Psyscall, Pgcstop)) {
1761                         if(--runtime_sched.stopwait == 0)
1762                                 runtime_notewakeup(&runtime_sched.stopnote);
1763                 }
1764                 runtime_unlock(&runtime_sched);
1765         }
1766 }
1767
1768 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
1769 void
1770 runtime_entersyscallblock(void)
1771 {
1772         P *p;
1773
1774         if(m->profilehz > 0)
1775                 runtime_setprof(false);
1776
1777         // Leave SP around for gc and traceback.
1778 #ifdef USING_SPLIT_STACK
1779         g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
1780                                        &g->gcnext_segment, &g->gcnext_sp,
1781                                        &g->gcinitial_sp);
1782 #else
1783         g->gcnext_sp = (byte *) &p;
1784 #endif
1785
1786         // Save the registers in the g structure so that any pointers
1787         // held in registers will be seen by the garbage collector.
1788         getcontext(&g->gcregs);
1789
1790         g->status = Gsyscall;
1791
1792         p = releasep();
1793         handoffp(p);
1794         if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
1795                 inclocked(1);
1796 }
1797
1798 // The goroutine g exited its system call.
1799 // Arrange for it to run on a cpu again.
1800 // This is called only from the go syscall library, not
1801 // from the low-level system calls used by the runtime.
1802 void
1803 runtime_exitsyscall(void)
1804 {
1805         G *gp;
1806         P *p;
1807
1808         // Check whether the profiler needs to be turned on.
1809         if(m->profilehz > 0)
1810                 runtime_setprof(true);
1811
1812         gp = g;
1813         // Try to re-acquire the last P.
1814         if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) {
1815                 // There's a cpu for us, so we can run.
1816                 m->mcache = m->p->mcache;
1817                 m->p->m = m;
1818                 m->p->tick++;
1819                 gp->status = Grunning;
1820                 // Garbage collector isn't running (since we are),
1821                 // so okay to clear gcstack and gcsp.
1822 #ifdef USING_SPLIT_STACK
1823                 gp->gcstack = nil;
1824 #endif
1825                 gp->gcnext_sp = nil;
1826                 runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
1827                 return;
1828         }
1829
1830         if(gp->isbackground)  // do not consider blocked scavenger for deadlock detection
1831                 inclocked(-1);
1832         // Try to get any other idle P.
1833         m->p = nil;
1834         if(runtime_sched.pidle) {
1835                 runtime_lock(&runtime_sched);
1836                 p = pidleget();
1837                 runtime_unlock(&runtime_sched);
1838                 if(p) {
1839                         acquirep(p);
1840 #ifdef USING_SPLIT_STACK
1841                         gp->gcstack = nil;
1842 #endif
1843                         gp->gcnext_sp = nil;
1844                         runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
1845                         return;
1846                 }
1847         }
1848
1849         // Call the scheduler.
1850         runtime_mcall(exitsyscall0);
1851
1852         // Scheduler returned, so we're allowed to run now.
1853         // Delete the gcstack information that we left for
1854         // the garbage collector during the system call.
1855         // Must wait until now because until gosched returns
1856         // we don't know for sure that the garbage collector
1857         // is not running.
1858 #ifdef USING_SPLIT_STACK
1859         gp->gcstack = nil;
1860 #endif
1861         gp->gcnext_sp = nil;
1862         runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
1863 }
1864
1865 // runtime_exitsyscall slow path on g0.
1866 // Failed to acquire P, enqueue gp as runnable.
1867 static void
1868 exitsyscall0(G *gp)
1869 {
1870         P *p;
1871
1872         gp->status = Grunnable;
1873         gp->m = nil;
1874         m->curg = nil;
1875         runtime_lock(&runtime_sched);
1876         p = pidleget();
1877         if(p == nil)
1878                 globrunqput(gp);
1879         runtime_unlock(&runtime_sched);
1880         if(p) {
1881                 acquirep(p);
1882                 execute(gp);  // Never returns.
1883         }
1884         if(m->lockedg) {
1885                 // Wait until another thread schedules gp and so m again.
1886                 stoplockedm();
1887                 execute(gp);  // Never returns.
1888         }
1889         stopm();
1890         schedule();  // Never returns.
1891 }
1892
1893 // Allocate a new g, with a stack big enough for stacksize bytes.
1894 G*
1895 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
1896 {
1897         G *newg;
1898
1899         newg = runtime_malloc(sizeof(G));
1900         if(stacksize >= 0) {
1901 #if USING_SPLIT_STACK
1902                 int dont_block_signals = 0;
1903
1904                 *ret_stack = __splitstack_makecontext(stacksize,
1905                                                       &newg->stack_context[0],
1906                                                       ret_stacksize);
1907                 __splitstack_block_signals_context(&newg->stack_context[0],
1908                                                    &dont_block_signals, nil);
1909 #else
1910                 *ret_stack = runtime_mallocgc(stacksize, FlagNoProfiling|FlagNoGC, 0, 0);
1911                 *ret_stacksize = stacksize;
1912                 newg->gcinitial_sp = *ret_stack;
1913                 newg->gcstack_size = stacksize;
1914                 runtime_xadd(&runtime_stacks_sys, stacksize);
1915 #endif
1916         }
1917         return newg;
1918 }
1919
1920 /* For runtime package testing.  */
1921
1922 void runtime_testing_entersyscall(void)
1923   __asm__ (GOSYM_PREFIX "runtime.entersyscall");
1924
1925 void
1926 runtime_testing_entersyscall()
1927 {
1928         runtime_entersyscall();
1929 }
1930
1931 void runtime_testing_exitsyscall(void)
1932   __asm__ (GOSYM_PREFIX "runtime.exitsyscall");
1933
1934 void
1935 runtime_testing_exitsyscall()
1936 {
1937         runtime_exitsyscall();
1938 }
1939
1940 G*
1941 __go_go(void (*fn)(void*), void* arg)
1942 {
1943         byte *sp;
1944         size_t spsize;
1945         G *newg;
1946
1947         m->locks++;  // disable preemption because it can be holding p in a local var
1948
1949         if((newg = gfget(m->p)) != nil) {
1950 #ifdef USING_SPLIT_STACK
1951                 int dont_block_signals = 0;
1952
1953                 sp = __splitstack_resetcontext(&newg->stack_context[0],
1954                                                &spsize);
1955                 __splitstack_block_signals_context(&newg->stack_context[0],
1956                                                    &dont_block_signals, nil);
1957 #else
1958                 sp = newg->gcinitial_sp;
1959                 spsize = newg->gcstack_size;
1960                 if(spsize == 0)
1961                         runtime_throw("bad spsize in __go_go");
1962                 newg->gcnext_sp = sp;
1963 #endif
1964         } else {
1965                 newg = runtime_malg(StackMin, &sp, &spsize);
1966                 runtime_lock(&runtime_sched);
1967                 if(runtime_lastg == nil)
1968                         runtime_allg = newg;
1969                 else
1970                         runtime_lastg->alllink = newg;
1971                 runtime_lastg = newg;
1972                 runtime_unlock(&runtime_sched);
1973         }
1974
1975         newg->entry = (byte*)fn;
1976         newg->param = arg;
1977         newg->gopc = (uintptr)__builtin_return_address(0);
1978         newg->status = Grunnable;
1979         newg->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
1980
1981         {
1982                 // Avoid warnings about variables clobbered by
1983                 // longjmp.
1984                 byte * volatile vsp = sp;
1985                 size_t volatile vspsize = spsize;
1986                 G * volatile vnewg = newg;
1987
1988                 getcontext(&vnewg->context);
1989                 vnewg->context.uc_stack.ss_sp = vsp;
1990 #ifdef MAKECONTEXT_STACK_TOP
1991                 vnewg->context.uc_stack.ss_sp += vspsize;
1992 #endif
1993                 vnewg->context.uc_stack.ss_size = vspsize;
1994                 makecontext(&vnewg->context, kickoff, 0);
1995
1996                 runqput(m->p, vnewg);
1997
1998                 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main)  // TODO: fast atomic
1999                         wakep();
2000                 m->locks--;
2001                 return vnewg;
2002         }
2003 }
2004
2005 // Put on gfree list.
2006 // If local list is too long, transfer a batch to the global list.
2007 static void
2008 gfput(P *p, G *gp)
2009 {
2010         gp->schedlink = p->gfree;
2011         p->gfree = gp;
2012         p->gfreecnt++;
2013         if(p->gfreecnt >= 64) {
2014                 runtime_lock(&runtime_sched.gflock);
2015                 while(p->gfreecnt >= 32) {
2016                         p->gfreecnt--;
2017                         gp = p->gfree;
2018                         p->gfree = gp->schedlink;
2019                         gp->schedlink = runtime_sched.gfree;
2020                         runtime_sched.gfree = gp;
2021                 }
2022                 runtime_unlock(&runtime_sched.gflock);
2023         }
2024 }
2025
2026 // Get from gfree list.
2027 // If local list is empty, grab a batch from global list.
2028 static G*
2029 gfget(P *p)
2030 {
2031         G *gp;
2032
2033 retry:
2034         gp = p->gfree;
2035         if(gp == nil && runtime_sched.gfree) {
2036                 runtime_lock(&runtime_sched.gflock);
2037                 while(p->gfreecnt < 32 && runtime_sched.gfree) {
2038                         p->gfreecnt++;
2039                         gp = runtime_sched.gfree;
2040                         runtime_sched.gfree = gp->schedlink;
2041                         gp->schedlink = p->gfree;
2042                         p->gfree = gp;
2043                 }
2044                 runtime_unlock(&runtime_sched.gflock);
2045                 goto retry;
2046         }
2047         if(gp) {
2048                 p->gfree = gp->schedlink;
2049                 p->gfreecnt--;
2050         }
2051         return gp;
2052 }
2053
2054 // Purge all cached G's from gfree list to the global list.
2055 static void
2056 gfpurge(P *p)
2057 {
2058         G *gp;
2059
2060         runtime_lock(&runtime_sched.gflock);
2061         while(p->gfreecnt) {
2062                 p->gfreecnt--;
2063                 gp = p->gfree;
2064                 p->gfree = gp->schedlink;
2065                 gp->schedlink = runtime_sched.gfree;
2066                 runtime_sched.gfree = gp;
2067         }
2068         runtime_unlock(&runtime_sched.gflock);
2069 }
2070
2071 void
2072 runtime_Breakpoint(void)
2073 {
2074         runtime_breakpoint();
2075 }
2076
2077 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2078
2079 void
2080 runtime_Gosched(void)
2081 {
2082         runtime_gosched();
2083 }
2084
2085 // Implementation of runtime.GOMAXPROCS.
2086 // delete when scheduler is even stronger
2087 int32
2088 runtime_gomaxprocsfunc(int32 n)
2089 {
2090         int32 ret;
2091
2092         if(n > MaxGomaxprocs)
2093                 n = MaxGomaxprocs;
2094         runtime_lock(&runtime_sched);
2095         ret = runtime_gomaxprocs;
2096         if(n <= 0 || n == ret) {
2097                 runtime_unlock(&runtime_sched);
2098                 return ret;
2099         }
2100         runtime_unlock(&runtime_sched);
2101
2102         runtime_semacquire(&runtime_worldsema);
2103         m->gcing = 1;
2104         runtime_stoptheworld();
2105         newprocs = n;
2106         m->gcing = 0;
2107         runtime_semrelease(&runtime_worldsema);
2108         runtime_starttheworld();
2109
2110         return ret;
2111 }
2112
2113 static void
2114 LockOSThread(void)
2115 {
2116         m->lockedg = g;
2117         g->lockedm = m;
2118 }
2119
2120 void    runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2121 void
2122 runtime_LockOSThread(void)
2123 {
2124         m->locked |= LockExternal;
2125         LockOSThread();
2126 }
2127
2128 void
2129 runtime_lockOSThread(void)
2130 {
2131         m->locked += LockInternal;
2132         LockOSThread();
2133 }
2134
2135 static void
2136 UnlockOSThread(void)
2137 {
2138         if(m->locked != 0)
2139                 return;
2140         m->lockedg = nil;
2141         g->lockedm = nil;
2142 }
2143
2144 void    runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2145
2146 void
2147 runtime_UnlockOSThread(void)
2148 {
2149         m->locked &= ~LockExternal;
2150         UnlockOSThread();
2151 }
2152
2153 void
2154 runtime_unlockOSThread(void)
2155 {
2156         if(m->locked < LockInternal)
2157                 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2158         m->locked -= LockInternal;
2159         UnlockOSThread();
2160 }
2161
2162 bool
2163 runtime_lockedOSThread(void)
2164 {
2165         return g->lockedm != nil && m->lockedg != nil;
2166 }
2167
2168 // for testing of callbacks
2169
2170 _Bool runtime_golockedOSThread(void)
2171   __asm__ (GOSYM_PREFIX "runtime.golockedOSThread");
2172
2173 _Bool
2174 runtime_golockedOSThread(void)
2175 {
2176         return runtime_lockedOSThread();
2177 }
2178
2179 // for testing of wire, unwire
2180 uint32
2181 runtime_mid()
2182 {
2183         return m->id;
2184 }
2185
2186 intgo runtime_NumGoroutine (void)
2187   __asm__ (GOSYM_PREFIX "runtime.NumGoroutine");
2188
2189 intgo
2190 runtime_NumGoroutine()
2191 {
2192         return runtime_gcount();
2193 }
2194
2195 int32
2196 runtime_gcount(void)
2197 {
2198         G *gp;
2199         int32 n, s;
2200
2201         n = 0;
2202         runtime_lock(&runtime_sched);
2203         // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2204         // We do not want to increment/decrement centralized counter in newproc/goexit,
2205         // just to make runtime.NumGoroutine() faster.
2206         // Compromise solution is to introduce per-P counters of active goroutines.
2207         for(gp = runtime_allg; gp; gp = gp->alllink) {
2208                 s = gp->status;
2209                 if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
2210                         n++;
2211         }
2212         runtime_unlock(&runtime_sched);
2213         return n;
2214 }
2215
2216 int32
2217 runtime_mcount(void)
2218 {
2219         return runtime_sched.mcount;
2220 }
2221
2222 static struct {
2223         Lock;
2224         void (*fn)(uintptr*, int32);
2225         int32 hz;
2226         uintptr pcbuf[100];
2227         Location locbuf[100];
2228 } prof;
2229
2230 // Called if we receive a SIGPROF signal.
2231 void
2232 runtime_sigprof()
2233 {
2234         int32 n, i;
2235
2236         // Windows does profiling in a dedicated thread w/o m.
2237         if(!Windows && (m == nil || m->mcache == nil))
2238                 return;
2239         if(prof.fn == nil || prof.hz == 0)
2240                 return;
2241
2242         runtime_lock(&prof);
2243         if(prof.fn == nil) {
2244                 runtime_unlock(&prof);
2245                 return;
2246         }
2247         n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf));
2248         for(i = 0; i < n; i++)
2249                 prof.pcbuf[i] = prof.locbuf[i].pc;
2250         if(n > 0)
2251                 prof.fn(prof.pcbuf, n);
2252         runtime_unlock(&prof);
2253 }
2254
2255 // Arrange to call fn with a traceback hz times a second.
2256 void
2257 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
2258 {
2259         // Force sane arguments.
2260         if(hz < 0)
2261                 hz = 0;
2262         if(hz == 0)
2263                 fn = nil;
2264         if(fn == nil)
2265                 hz = 0;
2266
2267         // Stop profiler on this cpu so that it is safe to lock prof.
2268         // if a profiling signal came in while we had prof locked,
2269         // it would deadlock.
2270         runtime_resetcpuprofiler(0);
2271
2272         runtime_lock(&prof);
2273         prof.fn = fn;
2274         prof.hz = hz;
2275         runtime_unlock(&prof);
2276         runtime_lock(&runtime_sched);
2277         runtime_sched.profilehz = hz;
2278         runtime_unlock(&runtime_sched);
2279
2280         if(hz != 0)
2281                 runtime_resetcpuprofiler(hz);
2282 }
2283
2284 // Change number of processors.  The world is stopped, sched is locked.
2285 static void
2286 procresize(int32 new)
2287 {
2288         int32 i, old;
2289         G *gp;
2290         P *p;
2291
2292         old = runtime_gomaxprocs;
2293         if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
2294                 runtime_throw("procresize: invalid arg");
2295         // initialize new P's
2296         for(i = 0; i < new; i++) {
2297                 p = runtime_allp[i];
2298                 if(p == nil) {
2299                         p = (P*)runtime_mallocgc(sizeof(*p), 0, 0, 1);
2300                         p->status = Pgcstop;
2301                         runtime_atomicstorep(&runtime_allp[i], p);
2302                 }
2303                 if(p->mcache == nil) {
2304                         if(old==0 && i==0)
2305                                 p->mcache = m->mcache;  // bootstrap
2306                         else
2307                                 p->mcache = runtime_allocmcache();
2308                 }
2309                 if(p->runq == nil) {
2310                         p->runqsize = 128;
2311                         p->runq = (G**)runtime_mallocgc(p->runqsize*sizeof(G*), 0, 0, 1);
2312                 }
2313         }
2314
2315         // redistribute runnable G's evenly
2316         for(i = 0; i < old; i++) {
2317                 p = runtime_allp[i];
2318                 while((gp = runqget(p)) != nil)
2319                         globrunqput(gp);
2320         }
2321         // start at 1 because current M already executes some G and will acquire allp[0] below,
2322         // so if we have a spare G we want to put it into allp[1].
2323         for(i = 1; runtime_sched.runqhead; i++) {
2324                 gp = runtime_sched.runqhead;
2325                 runtime_sched.runqhead = gp->schedlink;
2326                 runqput(runtime_allp[i%new], gp);
2327         }
2328         runtime_sched.runqtail = nil;
2329         runtime_sched.runqsize = 0;
2330
2331         // free unused P's
2332         for(i = new; i < old; i++) {
2333                 p = runtime_allp[i];
2334                 runtime_freemcache(p->mcache);
2335                 p->mcache = nil;
2336                 gfpurge(p);
2337                 p->status = Pdead;
2338                 // can't free P itself because it can be referenced by an M in syscall
2339         }
2340
2341         if(m->p)
2342                 m->p->m = nil;
2343         m->p = nil;
2344         m->mcache = nil;
2345         p = runtime_allp[0];
2346         p->m = nil;
2347         p->status = Pidle;
2348         acquirep(p);
2349         for(i = new-1; i > 0; i--) {
2350                 p = runtime_allp[i];
2351                 p->status = Pidle;
2352                 pidleput(p);
2353         }
2354         runtime_singleproc = new == 1;
2355         runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2356 }
2357
2358 // Associate p and the current m.
2359 static void
2360 acquirep(P *p)
2361 {
2362         if(m->p || m->mcache)
2363                 runtime_throw("acquirep: already in go");
2364         if(p->m || p->status != Pidle) {
2365                 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
2366                 runtime_throw("acquirep: invalid p state");
2367         }
2368         m->mcache = p->mcache;
2369         m->p = p;
2370         p->m = m;
2371         p->status = Prunning;
2372 }
2373
2374 // Disassociate p and the current m.
2375 static P*
2376 releasep(void)
2377 {
2378         P *p;
2379
2380         if(m->p == nil || m->mcache == nil)
2381                 runtime_throw("releasep: invalid arg");
2382         p = m->p;
2383         if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
2384                 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2385                         m, m->p, p->m, m->mcache, p->mcache, p->status);
2386                 runtime_throw("releasep: invalid p state");
2387         }
2388         m->p = nil;
2389         m->mcache = nil;
2390         p->m = nil;
2391         p->status = Pidle;
2392         return p;
2393 }
2394
2395 static void
2396 inclocked(int32 v)
2397 {
2398         runtime_lock(&runtime_sched);
2399         runtime_sched.mlocked += v;
2400         if(v > 0)
2401                 checkdead();
2402         runtime_unlock(&runtime_sched);
2403 }
2404
2405 // Check for deadlock situation.
2406 // The check is based on number of running M's, if 0 -> deadlock.
2407 static void
2408 checkdead(void)
2409 {
2410         G *gp;
2411         int32 run, grunning, s;
2412
2413         // -1 for sysmon
2414         run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.mlocked - 1 - countextra();
2415         if(run > 0)
2416                 return;
2417         if(run < 0) {
2418                 runtime_printf("checkdead: nmidle=%d mlocked=%d mcount=%d\n",
2419                         runtime_sched.nmidle, runtime_sched.mlocked, runtime_sched.mcount);
2420                 runtime_throw("checkdead: inconsistent counts");
2421         }
2422         grunning = 0;
2423         for(gp = runtime_allg; gp; gp = gp->alllink) {
2424                 if(gp->isbackground)
2425                         continue;
2426                 s = gp->status;
2427                 if(s == Gwaiting)
2428                         grunning++;
2429                 else if(s == Grunnable || s == Grunning || s == Gsyscall) {
2430                         runtime_printf("checkdead: find g %D in status %d\n", gp->goid, s);
2431                         runtime_throw("checkdead: runnable g");
2432                 }
2433         }
2434         if(grunning == 0)  // possible if main goroutine calls runtime_Goexit()
2435                 runtime_exit(0);
2436         m->throwing = -1;  // do not dump full stacks
2437         runtime_throw("all goroutines are asleep - deadlock!");
2438 }
2439
2440 static void
2441 sysmon(void)
2442 {
2443         uint32 idle, delay;
2444         int64 now, lastpoll;
2445         G *gp;
2446         uint32 ticks[MaxGomaxprocs];
2447
2448         idle = 0;  // how many cycles in succession we had not wokeup somebody
2449         delay = 0;
2450         for(;;) {
2451                 if(idle == 0)  // start with 20us sleep...
2452                         delay = 20;
2453                 else if(idle > 50)  // start doubling the sleep after 1ms...
2454                         delay *= 2;
2455                 if(delay > 10*1000)  // up to 10ms
2456                         delay = 10*1000;
2457                 runtime_usleep(delay);
2458                 if(runtime_gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {  // TODO: fast atomic
2459                         runtime_lock(&runtime_sched);
2460                         if(runtime_atomicload(&runtime_gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
2461                                 runtime_atomicstore(&runtime_sched.sysmonwait, 1);
2462                                 runtime_unlock(&runtime_sched);
2463                                 runtime_notesleep(&runtime_sched.sysmonnote);
2464                                 runtime_noteclear(&runtime_sched.sysmonnote);
2465                                 idle = 0;
2466                                 delay = 20;
2467                         } else
2468                                 runtime_unlock(&runtime_sched);
2469                 }
2470                 // poll network if not polled for more than 10ms
2471                 lastpoll = runtime_atomicload64(&runtime_sched.lastpoll);
2472                 now = runtime_nanotime();
2473                 if(lastpoll != 0 && lastpoll + 10*1000*1000 > now) {
2474                         gp = runtime_netpoll(false);  // non-blocking
2475                         injectglist(gp);
2476                 }
2477                 // retake P's blocked in syscalls
2478                 if(retake(ticks))
2479                         idle = 0;
2480                 else
2481                         idle++;
2482         }
2483 }
2484
2485 static uint32
2486 retake(uint32 *ticks)
2487 {
2488         uint32 i, s, n;
2489         int64 t;
2490         P *p;
2491
2492         n = 0;
2493         for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
2494                 p = runtime_allp[i];
2495                 if(p==nil)
2496                         continue;
2497                 t = p->tick;
2498                 if(ticks[i] != t) {
2499                         ticks[i] = t;
2500                         continue;
2501                 }
2502                 s = p->status;
2503                 if(s != Psyscall)
2504                         continue;
2505                 if(p->runqhead == p->runqtail && runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0)  // TODO: fast atomic
2506                         continue;
2507                 // Need to increment number of locked M's before the CAS.
2508                 // Otherwise the M from which we retake can exit the syscall,
2509                 // increment nmidle and report deadlock.
2510                 inclocked(-1);
2511                 if(runtime_cas(&p->status, s, Pidle)) {
2512                         n++;
2513                         handoffp(p);
2514                 }
2515                 inclocked(1);
2516         }
2517         return n;
2518 }
2519
2520 // Put mp on midle list.
2521 // Sched must be locked.
2522 static void
2523 mput(M *mp)
2524 {
2525         mp->schedlink = runtime_sched.midle;
2526         runtime_sched.midle = mp;
2527         runtime_sched.nmidle++;
2528         checkdead();
2529 }
2530
2531 // Try to get an m from midle list.
2532 // Sched must be locked.
2533 static M*
2534 mget(void)
2535 {
2536         M *mp;
2537
2538         if((mp = runtime_sched.midle) != nil){
2539                 runtime_sched.midle = mp->schedlink;
2540                 runtime_sched.nmidle--;
2541         }
2542         return mp;
2543 }
2544
2545 // Put gp on the global runnable queue.
2546 // Sched must be locked.
2547 static void
2548 globrunqput(G *gp)
2549 {
2550         gp->schedlink = nil;
2551         if(runtime_sched.runqtail)
2552                 runtime_sched.runqtail->schedlink = gp;
2553         else
2554                 runtime_sched.runqhead = gp;
2555         runtime_sched.runqtail = gp;
2556         runtime_sched.runqsize++;
2557 }
2558
2559 // Try get a batch of G's from the global runnable queue.
2560 // Sched must be locked.
2561 static G*
2562 globrunqget(P *p)
2563 {
2564         G *gp, *gp1;
2565         int32 n;
2566
2567         if(runtime_sched.runqsize == 0)
2568                 return nil;
2569         n = runtime_sched.runqsize/runtime_gomaxprocs+1;
2570         if(n > runtime_sched.runqsize)
2571                 n = runtime_sched.runqsize;
2572         runtime_sched.runqsize -= n;
2573         if(runtime_sched.runqsize == 0)
2574                 runtime_sched.runqtail = nil;
2575         gp = runtime_sched.runqhead;
2576         runtime_sched.runqhead = gp->schedlink;
2577         n--;
2578         while(n--) {
2579                 gp1 = runtime_sched.runqhead;
2580                 runtime_sched.runqhead = gp1->schedlink;
2581                 runqput(p, gp1);
2582         }
2583         return gp;
2584 }
2585
2586 // Put p to on pidle list.
2587 // Sched must be locked.
2588 static void
2589 pidleput(P *p)
2590 {
2591         p->link = runtime_sched.pidle;
2592         runtime_sched.pidle = p;
2593         runtime_xadd(&runtime_sched.npidle, 1);  // TODO: fast atomic
2594 }
2595
2596 // Try get a p from pidle list.
2597 // Sched must be locked.
2598 static P*
2599 pidleget(void)
2600 {
2601         P *p;
2602
2603         p = runtime_sched.pidle;
2604         if(p) {
2605                 runtime_sched.pidle = p->link;
2606                 runtime_xadd(&runtime_sched.npidle, -1);  // TODO: fast atomic
2607         }
2608         return p;
2609 }
2610
2611 // Put g on local runnable queue.
2612 // TODO(dvyukov): consider using lock-free queue.
2613 static void
2614 runqput(P *p, G *gp)
2615 {
2616         int32 h, t, s;
2617
2618         runtime_lock(p);
2619 retry:
2620         h = p->runqhead;
2621         t = p->runqtail;
2622         s = p->runqsize;
2623         if(t == h-1 || (h == 0 && t == s-1)) {
2624                 runqgrow(p);
2625                 goto retry;
2626         }
2627         p->runq[t++] = gp;
2628         if(t == s)
2629                 t = 0;
2630         p->runqtail = t;
2631         runtime_unlock(p);
2632 }
2633
2634 // Get g from local runnable queue.
2635 static G*
2636 runqget(P *p)
2637 {
2638         G *gp;
2639         int32 t, h, s;
2640
2641         if(p->runqhead == p->runqtail)
2642                 return nil;
2643         runtime_lock(p);
2644         h = p->runqhead;
2645         t = p->runqtail;
2646         s = p->runqsize;
2647         if(t == h) {
2648                 runtime_unlock(p);
2649                 return nil;
2650         }
2651         gp = p->runq[h++];
2652         if(h == s)
2653                 h = 0;
2654         p->runqhead = h;
2655         runtime_unlock(p);
2656         return gp;
2657 }
2658
2659 // Grow local runnable queue.
2660 // TODO(dvyukov): consider using fixed-size array
2661 // and transfer excess to the global list (local queue can grow way too big).
2662 static void
2663 runqgrow(P *p)
2664 {
2665         G **q;
2666         int32 s, t, h, t2;
2667
2668         h = p->runqhead;
2669         t = p->runqtail;
2670         s = p->runqsize;
2671         t2 = 0;
2672         q = runtime_malloc(2*s*sizeof(*q));
2673         while(t != h) {
2674                 q[t2++] = p->runq[h++];
2675                 if(h == s)
2676                         h = 0;
2677         }
2678         runtime_free(p->runq);
2679         p->runq = q;
2680         p->runqhead = 0;
2681         p->runqtail = t2;
2682         p->runqsize = 2*s;
2683 }
2684
2685 // Steal half of elements from local runnable queue of p2
2686 // and put onto local runnable queue of p.
2687 // Returns one of the stolen elements (or nil if failed).
2688 static G*
2689 runqsteal(P *p, P *p2)
2690 {
2691         G *gp, *gp1;
2692         int32 t, h, s, t2, h2, s2, c, i;
2693
2694         if(p2->runqhead == p2->runqtail)
2695                 return nil;
2696         // sort locks to prevent deadlocks
2697         if(p < p2)
2698                 runtime_lock(p);
2699         runtime_lock(p2);
2700         if(p2->runqhead == p2->runqtail) {
2701                 runtime_unlock(p2);
2702                 if(p < p2)
2703                         runtime_unlock(p);
2704                 return nil;
2705         }
2706         if(p >= p2)
2707                 runtime_lock(p);
2708         // now we've locked both queues and know the victim is not empty
2709         h = p->runqhead;
2710         t = p->runqtail;
2711         s = p->runqsize;
2712         h2 = p2->runqhead;
2713         t2 = p2->runqtail;
2714         s2 = p2->runqsize;
2715         gp = p2->runq[h2++];  // return value
2716         if(h2 == s2)
2717                 h2 = 0;
2718         // steal roughly half
2719         if(t2 > h2)
2720                 c = (t2 - h2) / 2;
2721         else
2722                 c = (s2 - h2 + t2) / 2;
2723         // copy
2724         for(i = 0; i != c; i++) {
2725                 // the target queue is full?
2726                 if(t == h-1 || (h == 0 && t == s-1))
2727                         break;
2728                 // the victim queue is empty?
2729                 if(t2 == h2)
2730                         break;
2731                 gp1 = p2->runq[h2++];
2732                 if(h2 == s2)
2733                         h2 = 0;
2734                 p->runq[t++] = gp1;
2735                 if(t == s)
2736                         t = 0;
2737         }
2738         p->runqtail = t;
2739         p2->runqhead = h2;
2740         runtime_unlock(p2);
2741         runtime_unlock(p);
2742         return gp;
2743 }
2744
2745 void runtime_testSchedLocalQueue(void)
2746   __asm__("runtime.testSchedLocalQueue");
2747
2748 void
2749 runtime_testSchedLocalQueue(void)
2750 {
2751         P p;
2752         G gs[1000];
2753         int32 i, j;
2754
2755         runtime_memclr((byte*)&p, sizeof(p));
2756         p.runqsize = 1;
2757         p.runqhead = 0;
2758         p.runqtail = 0;
2759         p.runq = runtime_malloc(p.runqsize*sizeof(*p.runq));
2760
2761         for(i = 0; i < (int32)nelem(gs); i++) {
2762                 if(runqget(&p) != nil)
2763                         runtime_throw("runq is not empty initially");
2764                 for(j = 0; j < i; j++)
2765                         runqput(&p, &gs[i]);
2766                 for(j = 0; j < i; j++) {
2767                         if(runqget(&p) != &gs[i]) {
2768                                 runtime_printf("bad element at iter %d/%d\n", i, j);
2769                                 runtime_throw("bad element");
2770                         }
2771                 }
2772                 if(runqget(&p) != nil)
2773                         runtime_throw("runq is not empty afterwards");
2774         }
2775 }
2776
2777 void runtime_testSchedLocalQueueSteal(void)
2778   __asm__("runtime.testSchedLocalQueueSteal");
2779
2780 void
2781 runtime_testSchedLocalQueueSteal(void)
2782 {
2783         P p1, p2;
2784         G gs[1000], *gp;
2785         int32 i, j, s;
2786
2787         runtime_memclr((byte*)&p1, sizeof(p1));
2788         p1.runqsize = 1;
2789         p1.runqhead = 0;
2790         p1.runqtail = 0;
2791         p1.runq = runtime_malloc(p1.runqsize*sizeof(*p1.runq));
2792
2793         runtime_memclr((byte*)&p2, sizeof(p2));
2794         p2.runqsize = nelem(gs);
2795         p2.runqhead = 0;
2796         p2.runqtail = 0;
2797         p2.runq = runtime_malloc(p2.runqsize*sizeof(*p2.runq));
2798
2799         for(i = 0; i < (int32)nelem(gs); i++) {
2800                 for(j = 0; j < i; j++) {
2801                         gs[j].sig = 0;
2802                         runqput(&p1, &gs[j]);
2803                 }
2804                 gp = runqsteal(&p2, &p1);
2805                 s = 0;
2806                 if(gp) {
2807                         s++;
2808                         gp->sig++;
2809                 }
2810                 while((gp = runqget(&p2)) != nil) {
2811                         s++;
2812                         gp->sig++;
2813                 }
2814                 while((gp = runqget(&p1)) != nil)
2815                         gp->sig++;
2816                 for(j = 0; j < i; j++) {
2817                         if(gs[j].sig != 1) {
2818                                 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
2819                                 runtime_throw("bad element");
2820                         }
2821                 }
2822                 if(s != i/2 && s != i/2+1) {
2823                         runtime_printf("bad steal %d, want %d or %d, iter %d\n",
2824                                 s, i/2, i/2+1, i);
2825                         runtime_throw("bad steal");
2826                 }
2827         }
2828 }
2829
2830 void
2831 runtime_proc_scan(void (*addroot)(Obj))
2832 {
2833         addroot((Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
2834 }
2835
2836 // When a function calls a closure, it passes the closure value to
2837 // __go_set_closure immediately before the function call.  When a
2838 // function uses a closure, it calls __go_get_closure immediately on
2839 // function entry.  This is a hack, but it will work on any system.
2840 // It would be better to use the static chain register when there is
2841 // one.  It is also worth considering expanding these functions
2842 // directly in the compiler.
2843
2844 void
2845 __go_set_closure(void* v)
2846 {
2847         g->closure = v;
2848 }
2849
2850 void *
2851 __go_get_closure(void)
2852 {
2853         return g->closure;
2854 }