1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
13 #ifdef HAVE_DL_ITERATE_PHDR
25 #ifdef USING_SPLIT_STACK
27 /* FIXME: These are not declared anywhere. */
29 extern void __splitstack_getcontext(void *context[10]);
31 extern void __splitstack_setcontext(void *context[10]);
33 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
35 extern void * __splitstack_resetcontext(void *context[10], size_t *);
37 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
40 extern void __splitstack_block_signals (int *, int *);
42 extern void __splitstack_block_signals_context (void *context[10], int *,
47 #ifndef PTHREAD_STACK_MIN
48 # define PTHREAD_STACK_MIN 8192
51 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
52 # define StackMin PTHREAD_STACK_MIN
54 # define StackMin 2 * 1024 * 1024
57 uintptr runtime_stacks_sys;
59 static void gtraceback(G*);
68 #ifndef SETCONTEXT_CLOBBERS_TLS
76 fixcontext(ucontext_t *c __attribute__ ((unused)))
82 # if defined(__x86_64__) && defined(__sun__)
84 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
85 // register to that of the thread which called getcontext. The effect
86 // is that the address of all __thread variables changes. This bug
87 // also affects pthread_self() and pthread_getspecific. We work
88 // around it by clobbering the context field directly to keep %fs the
91 static __thread greg_t fs;
99 fs = c.uc_mcontext.gregs[REG_FSBASE];
103 fixcontext(ucontext_t* c)
105 c->uc_mcontext.gregs[REG_FSBASE] = fs;
108 # elif defined(__NetBSD__)
110 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
111 // and restore it ourselves.
113 static __thread __greg_t tlsbase;
121 tlsbase = c.uc_mcontext._mc_tlsbase;
125 fixcontext(ucontext_t* c)
127 c->uc_mcontext._mc_tlsbase = tlsbase;
132 # error unknown case for SETCONTEXT_CLOBBERS_TLS
138 // We can not always refer to the TLS variables directly. The
139 // compiler will call tls_get_addr to get the address of the variable,
140 // and it may hold it in a register across a call to schedule. When
141 // we get back from the call we may be running in a different thread,
142 // in which case the register now points to the TLS variable for a
143 // different thread. We use non-inlinable functions to avoid this
146 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
154 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
164 runtime_setmg(M* mp, G* gp)
170 // The static TLS size. See runtime_newm.
173 // Start a new thread.
175 runtime_newosproc(M *mp)
183 if(pthread_attr_init(&attr) != 0)
184 runtime_throw("pthread_attr_init");
185 if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
186 runtime_throw("pthread_attr_setdetachstate");
188 stacksize = PTHREAD_STACK_MIN;
190 // With glibc before version 2.16 the static TLS size is taken
191 // out of the stack size, and we get an error or a crash if
192 // there is not enough stack space left. Add it back in if we
193 // can, in case the program uses a lot of TLS space. FIXME:
194 // This can be disabled in glibc 2.16 and later, if the bug is
195 // indeed fixed then.
196 stacksize += tlssize;
198 if(pthread_attr_setstacksize(&attr, stacksize) != 0)
199 runtime_throw("pthread_attr_setstacksize");
201 // Block signals during pthread_create so that the new thread
202 // starts with signals disabled. It will enable them in minit.
206 // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
207 sigdelset(&clear, SIGTRAP);
211 sigprocmask(SIG_BLOCK, &clear, &old);
212 ret = pthread_create(&tid, &attr, runtime_mstart, mp);
213 sigprocmask(SIG_SETMASK, &old, nil);
216 runtime_throw("pthread_create");
219 // First function run by a new goroutine. This replaces gogocall.
225 if(g->traceback != nil)
228 fn = (void (*)(void*))(g->entry);
233 // Switch context to a different goroutine. This is like longjmp.
234 static void runtime_gogo(G*) __attribute__ ((noinline));
236 runtime_gogo(G* newg)
238 #ifdef USING_SPLIT_STACK
239 __splitstack_setcontext(&newg->stack_context[0]);
242 newg->fromgogo = true;
243 fixcontext(&newg->context);
244 setcontext(&newg->context);
245 runtime_throw("gogo setcontext returned");
248 // Save context and call fn passing g as a parameter. This is like
249 // setjmp. Because getcontext always returns 0, unlike setjmp, we use
250 // g->fromgogo as a code. It will be true if we got here via
251 // setcontext. g == nil the first time this is called in a new m.
252 static void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
254 runtime_mcall(void (*pfn)(G*))
258 #ifndef USING_SPLIT_STACK
262 // Ensure that all registers are on the stack for the garbage
264 __builtin_unwind_init();
269 runtime_throw("runtime: mcall called on m->g0 stack");
273 #ifdef USING_SPLIT_STACK
274 __splitstack_getcontext(&g->stack_context[0]);
278 gp->fromgogo = false;
279 getcontext(&gp->context);
281 // When we return from getcontext, we may be running
282 // in a new thread. That means that m and g may have
283 // changed. They are global variables so we will
284 // reload them, but the addresses of m and g may be
285 // cached in our local stack frame, and those
286 // addresses may be wrong. Call functions to reload
287 // the values for this thread.
291 if(gp->traceback != nil)
294 if (gp == nil || !gp->fromgogo) {
295 #ifdef USING_SPLIT_STACK
296 __splitstack_setcontext(&mp->g0->stack_context[0]);
298 mp->g0->entry = (byte*)pfn;
301 // It's OK to set g directly here because this case
302 // can not occur if we got here via a setcontext to
303 // the getcontext call just above.
306 fixcontext(&mp->g0->context);
307 setcontext(&mp->g0->context);
308 runtime_throw("runtime: mcall function returned");
312 #ifdef HAVE_DL_ITERATE_PHDR
314 // Called via dl_iterate_phdr.
317 addtls(struct dl_phdr_info* info, size_t size __attribute__ ((unused)), void *data)
319 size_t *total = (size_t *)data;
322 for(i = 0; i < info->dlpi_phnum; ++i) {
323 if(info->dlpi_phdr[i].p_type == PT_TLS)
324 *total += info->dlpi_phdr[i].p_memsz;
329 // Set the total TLS size.
336 dl_iterate_phdr(addtls, (void *)&total);
349 // Goroutine scheduler
350 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
352 // The main concepts are:
354 // M - worker thread, or machine.
355 // P - processor, a resource that is required to execute Go code.
356 // M must have an associated P to execute Go code, however it can be
357 // blocked or in a syscall w/o an associated P.
359 // Design doc at http://golang.org/s/go11sched.
361 typedef struct Sched Sched;
366 M* midle; // idle m's waiting for work
367 int32 nmidle; // number of idle m's waiting for work
368 int32 mlocked; // number of locked m's waiting for work
369 int32 mcount; // number of m's that have been created
371 P* pidle; // idle P's
375 // Global runnable queue.
380 // Global cache of dead G's.
390 int32 profilehz; // cpu profiling rate
393 // The max value of GOMAXPROCS.
394 // There are no fundamental restrictions on the value.
395 enum { MaxGomaxprocs = 1<<8 };
398 int32 runtime_gomaxprocs;
399 bool runtime_singleproc;
400 bool runtime_iscgo = true;
401 uint32 runtime_needextram = 1;
402 uint32 runtime_gcwaiting;
404 G runtime_g0; // idle goroutine for m0
412 static int32 newprocs;
414 void* runtime_mstart(void*);
415 static void runqput(P*, G*);
416 static G* runqget(P*);
417 static void runqgrow(P*);
418 static G* runqsteal(P*, P*);
419 static void mput(M*);
420 static M* mget(void);
421 static void mcommoninit(M*);
422 static void schedule(void);
423 static void procresize(int32);
424 static void acquirep(P*);
425 static P* releasep(void);
426 static void newm(void(*)(void), P*);
427 static void stopm(void);
428 static void startm(P*, bool);
429 static void handoffp(P*);
430 static void wakep(void);
431 static void stoplockedm(void);
432 static void startlockedm(G*);
433 static void sysmon(void);
434 static uint32 retake(uint32*);
435 static void inclocked(int32);
436 static void checkdead(void);
437 static void exitsyscall0(G*);
438 static void park0(G*);
439 static void gosched0(G*);
440 static void goexit0(G*);
441 static void gfput(P*, G*);
443 static void gfpurge(P*);
444 static void globrunqput(G*);
445 static G* globrunqget(P*);
446 static P* pidleget(void);
447 static void pidleput(P*);
448 static void injectglist(G*);
450 // The bootstrap sequence is:
454 // make & queue new G
455 // call runtime_mstart
457 // The new G calls runtime_main.
459 runtime_schedinit(void)
475 runtime_mallocinit();
482 // Allocate internal symbol table representation now,
483 // so that we don't need to call malloc when we crash.
484 // runtime_findfunc(0);
486 runtime_sched.lastpoll = runtime_nanotime();
488 p = runtime_getenv("GOMAXPROCS");
489 if(p != nil && (n = runtime_atoi(p)) > 0) {
490 if(n > MaxGomaxprocs)
494 runtime_allp = runtime_malloc((MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
497 // Can not enable GC until all roots are registered.
498 // mstats.enablegc = 1;
502 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
503 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
505 // The main goroutine.
507 runtime_main(void* dummy __attribute__((unused)))
511 // Lock the main goroutine onto this, the main OS thread,
512 // during initialization. Most programs won't care, but a few
513 // do require certain calls to be made by the main thread.
514 // Those can arrange for main.main to run in the main thread
515 // by calling runtime.LockOSThread during initialization
516 // to preserve the lock.
517 runtime_lockOSThread();
519 runtime_throw("runtime_main not on m0");
520 __go_go(runtime_MHeap_Scavenger, nil);
522 runtime_unlockOSThread();
524 // For gccgo we have to wait until after main is initialized
525 // to enable GC, because initializing main registers the GC
533 // Make racy client program work: if panicking on
534 // another goroutine at the same time as main returns,
535 // let the other goroutine finish printing the panic trace.
536 // Once it does, it will exit. See issue 3934.
537 if(runtime_panicking)
538 runtime_park(nil, nil, "panicwait");
546 runtime_goroutineheader(G *gp)
565 status = gp->waitreason;
573 runtime_printf("goroutine %D [%s]:\n", gp->goid, status);
577 runtime_goroutinetrailer(G *g)
579 if(g != nil && g->gopc != 0 && g->goid != 1) {
584 if(__go_file_line(g->gopc - 1, &fn, &file, &line)) {
585 runtime_printf("created by %S\n", fn);
586 runtime_printf("\t%S:%D\n", file, (int64) line);
594 Location locbuf[100];
599 runtime_tracebackothers(G * volatile me)
606 traceback = runtime_gotraceback(nil);
607 for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
608 if(gp == me || gp->status == Gdead)
610 if(gp->issystem && traceback < 2)
612 runtime_printf("\n");
613 runtime_goroutineheader(gp);
615 // Our only mechanism for doing a stack trace is
616 // _Unwind_Backtrace. And that only works for the
617 // current thread, not for other random goroutines.
618 // So we need to switch context to the goroutine, get
619 // the backtrace, and then switch back.
621 // This means that if g is running or in a syscall, we
622 // can't reliably print a stack trace. FIXME.
623 if(gp->status == Gsyscall || gp->status == Grunning) {
624 runtime_printf("no stack trace available\n");
625 runtime_goroutinetrailer(gp);
631 #ifdef USING_SPLIT_STACK
632 __splitstack_getcontext(&me->stack_context[0]);
634 getcontext(&me->context);
636 if(gp->traceback != nil) {
640 runtime_printtrace(tb.locbuf, tb.c, false);
641 runtime_goroutinetrailer(gp);
645 // Do a stack trace of gp, and then restore the context to
651 Traceback* traceback;
653 traceback = gp->traceback;
655 traceback->c = runtime_callers(1, traceback->locbuf,
656 sizeof traceback->locbuf / sizeof traceback->locbuf[0]);
657 runtime_gogo(traceback->gp);
663 // If there is no mcache runtime_callers() will crash,
664 // and we are most likely in sysmon thread so the stack is senseless anyway.
666 runtime_callers(1, mp->createstack, nelem(mp->createstack));
668 mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
670 runtime_lock(&runtime_sched);
671 mp->id = runtime_sched.mcount++;
673 runtime_mpreinit(mp);
675 // Add to runtime_allm so garbage collector doesn't free m
676 // when it is just in a register or thread-local storage.
677 mp->alllink = runtime_allm;
678 // runtime_NumCgoCall() iterates over allm w/o schedlock,
679 // so we need to publish it safely.
680 runtime_atomicstorep(&runtime_allm, mp);
681 runtime_unlock(&runtime_sched);
684 // Mark gp ready to run.
689 if(gp->status != Gwaiting) {
690 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->status);
691 runtime_throw("bad g->status in ready");
693 gp->status = Grunnable;
695 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0) // TODO: fast atomic
700 runtime_gcprocs(void)
704 // Figure out how many CPUs to use during GC.
705 // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
706 runtime_lock(&runtime_sched);
707 n = runtime_gomaxprocs;
709 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
712 if(n > runtime_sched.nmidle+1) // one M is currently running
713 n = runtime_sched.nmidle+1;
714 runtime_unlock(&runtime_sched);
723 runtime_lock(&runtime_sched);
724 n = runtime_gomaxprocs;
729 n -= runtime_sched.nmidle+1; // one M is currently running
730 runtime_unlock(&runtime_sched);
735 runtime_helpgc(int32 nproc)
740 runtime_lock(&runtime_sched);
742 for(n = 1; n < nproc; n++) { // one M is currently running
743 if(runtime_allp[pos]->mcache == m->mcache)
747 runtime_throw("runtime_gcprocs inconsistency");
749 mp->mcache = runtime_allp[pos]->mcache;
751 runtime_notewakeup(&mp->park);
753 runtime_unlock(&runtime_sched);
757 runtime_stoptheworld(void)
764 runtime_lock(&runtime_sched);
765 runtime_sched.stopwait = runtime_gomaxprocs;
766 runtime_atomicstore((uint32*)&runtime_gcwaiting, 1);
768 m->p->status = Pgcstop;
769 runtime_sched.stopwait--;
770 // try to retake all P's in Psyscall status
771 for(i = 0; i < runtime_gomaxprocs; i++) {
774 if(s == Psyscall && runtime_cas(&p->status, s, Pgcstop))
775 runtime_sched.stopwait--;
778 while((p = pidleget()) != nil) {
780 runtime_sched.stopwait--;
782 wait = runtime_sched.stopwait > 0;
783 runtime_unlock(&runtime_sched);
785 // wait for remaining P's to stop voluntary
787 runtime_notesleep(&runtime_sched.stopnote);
788 runtime_noteclear(&runtime_sched.stopnote);
790 if(runtime_sched.stopwait)
791 runtime_throw("stoptheworld: not stopped");
792 for(i = 0; i < runtime_gomaxprocs; i++) {
794 if(p->status != Pgcstop)
795 runtime_throw("stoptheworld: not stopped");
806 runtime_starttheworld(void)
813 gp = runtime_netpoll(false); // non-blocking
815 add = needaddgcproc();
816 runtime_lock(&runtime_sched);
818 procresize(newprocs);
821 procresize(runtime_gomaxprocs);
822 runtime_gcwaiting = 0;
825 while((p = pidleget()) != nil) {
826 // procresize() puts p's with work at the beginning of the list.
827 // Once we reach a p without a run queue, the rest don't have one either.
828 if(p->runqhead == p->runqtail) {
839 runtime_throw("starttheworld: inconsistent mp->nextp");
841 runtime_notewakeup(&mp->park);
843 if(runtime_sched.sysmonwait) {
844 runtime_sched.sysmonwait = false;
845 runtime_notewakeup(&runtime_sched.sysmonnote);
847 runtime_unlock(&runtime_sched);
857 // If GC could have used another helper proc, start one now,
858 // in the hope that it will be available next time.
859 // It would have been even better to start it before the collection,
860 // but doing so requires allocating memory, so it's tricky to
861 // coordinate. This lazy approach works out in practice:
862 // we don't mind if the first couple gc rounds don't have quite
863 // the maximum number of procs.
868 // Called to start an M.
870 runtime_mstart(void* mp)
880 // Record top of stack for use by mcall.
881 // Once we call schedule we're never coming back,
882 // so other calls can reuse this stack space.
883 #ifdef USING_SPLIT_STACK
884 __splitstack_getcontext(&g->stack_context[0]);
886 g->gcinitial_sp = ∓
887 // Setting gcstack_size to 0 is a marker meaning that gcinitial_sp
888 // is the top of the stack, not the bottom.
892 getcontext(&g->context);
894 if(g->entry != nil) {
895 // Got here from mcall.
896 void (*pfn)(G*) = (void (*)(G*))g->entry;
897 G* gp = (G*)g->param;
903 #ifdef USING_SPLIT_STACK
905 int dont_block_signals = 0;
906 __splitstack_block_signals(&dont_block_signals, nil);
910 // Install signal handlers; after minit so that minit can
911 // prepare the thread to be able to handle the signals.
912 if(m == &runtime_m0) {
924 } else if(m != &runtime_m0) {
930 // TODO(brainman): This point is never reached, because scheduler
931 // does not release os threads at the moment. But once this path
932 // is enabled, we must remove our seh here.
937 typedef struct CgoThreadStart CgoThreadStart;
938 struct CgoThreadStart
945 // Allocate a new m unassociated with any thread.
946 // Can use p for allocation context if needed.
948 runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, size_t* ret_g0_stacksize)
952 m->locks++; // disable GC because it can be called from sysmon
954 acquirep(p); // temporarily borrow p for mallocs in this function
958 runtime_gc_m_ptr(&e);
959 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
963 mp = runtime_mal(sizeof *mp);
965 mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
974 static M* lockextra(bool nilokay);
975 static void unlockextra(M*);
977 // needm is called when a cgo callback happens on a
978 // thread without an m (a thread not created by Go).
979 // In this case, needm is expected to find an m to use
980 // and return with m, g initialized correctly.
981 // Since m and g are not set now (likely nil, but see below)
982 // needm is limited in what routines it can call. In particular
983 // it can only call nosplit functions (textflag 7) and cannot
984 // do any scheduling that requires an m.
986 // In order to avoid needing heavy lifting here, we adopt
987 // the following strategy: there is a stack of available m's
988 // that can be stolen. Using compare-and-swap
989 // to pop from the stack has ABA races, so we simulate
990 // a lock by doing an exchange (via casp) to steal the stack
991 // head and replace the top pointer with MLOCKED (1).
992 // This serves as a simple spin lock that we can use even
993 // without an m. The thread that locks the stack in this way
994 // unlocks the stack by storing a valid stack head pointer.
996 // In order to make sure that there is always an m structure
997 // available to be stolen, we maintain the invariant that there
998 // is always one more than needed. At the beginning of the
999 // program (if cgo is in use) the list is seeded with a single m.
1000 // If needm finds that it has taken the last m off the list, its job
1001 // is - once it has installed its own m so that it can do things like
1002 // allocate memory - to create a spare m and put it on the list.
1004 // Each of these extra m's also has a g0 and a curg that are
1005 // pressed into service as the scheduling stack and current
1006 // goroutine for the duration of the cgo callback.
1008 // When the callback is done with the m, it calls dropm to
1009 // put the m back on the list.
1011 // Unlike the gc toolchain, we start running on curg, since we are
1012 // just going to return and let the caller continue.
1018 // Lock extra list, take head, unlock popped list.
1019 // nilokay=false is safe here because of the invariant above,
1020 // that the extra list always contains or will soon contain
1022 mp = lockextra(false);
1024 // Set needextram when we've just emptied the list,
1025 // so that the eventual call into cgocallbackg will
1026 // allocate a new m for the extra list. We delay the
1027 // allocation until then so that it can be done
1028 // after exitsyscall makes sure it is okay to be
1029 // running at all (that is, there's no garbage collection
1030 // running right now).
1031 mp->needextram = mp->schedlink == nil;
1032 unlockextra(mp->schedlink);
1034 // Install m and g (= m->curg).
1035 runtime_setmg(mp, mp->curg);
1037 // Initialize g's context as in mstart.
1039 g->status = Gsyscall;
1042 #ifdef USING_SPLIT_STACK
1043 __splitstack_getcontext(&g->stack_context[0]);
1045 g->gcinitial_sp = ∓
1046 g->gcstack_size = 0;
1049 getcontext(&g->context);
1051 if(g->entry != nil) {
1052 // Got here from mcall.
1053 void (*pfn)(G*) = (void (*)(G*))g->entry;
1054 G* gp = (G*)g->param;
1059 // Initialize this thread to use the m.
1062 #ifdef USING_SPLIT_STACK
1064 int dont_block_signals = 0;
1065 __splitstack_block_signals(&dont_block_signals, nil);
1070 // newextram allocates an m and puts it on the extra list.
1071 // It is called with a working local m, so that it can do things
1072 // like call schedlock and allocate.
1074 runtime_newextram(void)
1079 size_t g0_spsize, spsize;
1081 // Create extra goroutine locked to extra m.
1082 // The goroutine is the context in which the cgo callback will run.
1083 // The sched.pc will never be returned to, but setting it to
1084 // runtime.goexit makes clear to the traceback routines where
1085 // the goroutine stack ends.
1086 mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
1087 gp = runtime_malg(StackMin, &sp, &spsize);
1090 mp->locked = LockInternal;
1093 // put on allg for garbage collector
1094 runtime_lock(&runtime_sched);
1095 if(runtime_lastg == nil)
1098 runtime_lastg->alllink = gp;
1100 runtime_unlock(&runtime_sched);
1101 gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
1103 // The context for gp will be set up in runtime_needm. But
1104 // here we need to set up the context for g0.
1105 getcontext(&mp->g0->context);
1106 mp->g0->context.uc_stack.ss_sp = g0_sp;
1107 #ifdef MAKECONTEXT_STACK_TOP
1108 mp->g0->context.uc_stack.ss_sp += g0_spsize;
1110 mp->g0->context.uc_stack.ss_size = g0_spsize;
1111 makecontext(&mp->g0->context, kickoff, 0);
1113 // Add m to the extra list.
1114 mnext = lockextra(true);
1115 mp->schedlink = mnext;
1119 // dropm is called when a cgo callback has called needm but is now
1120 // done with the callback and returning back into the non-Go thread.
1121 // It puts the current m back onto the extra list.
1123 // The main expense here is the call to signalstack to release the
1124 // m's signal stack, and then the call to needm on the next callback
1125 // from this thread. It is tempting to try to save the m for next time,
1126 // which would eliminate both these costs, but there might not be
1127 // a next time: the current thread (which Go does not control) might exit.
1128 // If we saved the m for that thread, there would be an m leak each time
1129 // such a thread exited. Instead, we acquire and release an m on each
1130 // call. These should typically not be scheduling operations, just a few
1131 // atomics, so the cost should be small.
1133 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1134 // variable using pthread_key_create. Unlike the pthread keys we already use
1135 // on OS X, this dummy key would never be read by Go code. It would exist
1136 // only so that we could register at thread-exit-time destructor.
1137 // That destructor would put the m back onto the extra list.
1138 // This is purely a performance optimization. The current version,
1139 // in which dropm happens on each cgo call, is still correct too.
1140 // We may have to keep the current version on systems with cgo
1141 // but without pthreads, like Windows.
1147 // Undo whatever initialization minit did during needm.
1150 // Clear m and g, and return m to the extra list.
1151 // After the call to setmg we can only call nosplit functions.
1153 runtime_setmg(nil, nil);
1155 mp->curg->status = Gdead;
1157 mnext = lockextra(true);
1158 mp->schedlink = mnext;
1162 #define MLOCKED ((M*)1)
1164 // lockextra locks the extra list and returns the list head.
1165 // The caller must unlock the list by storing a new list head
1166 // to runtime.extram. If nilokay is true, then lockextra will
1167 // return a nil list head if that's what it finds. If nilokay is false,
1168 // lockextra will keep waiting until the list head is no longer nil.
1170 lockextra(bool nilokay)
1173 void (*yield)(void);
1176 mp = runtime_atomicloadp(&runtime_extram);
1178 yield = runtime_osyield;
1182 if(mp == nil && !nilokay) {
1186 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1187 yield = runtime_osyield;
1199 runtime_atomicstorep(&runtime_extram, mp);
1209 mp = runtime_atomicloadp(&runtime_extram);
1214 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1219 for(mc = mp; mc != nil; mc = mc->schedlink)
1221 runtime_atomicstorep(&runtime_extram, mp);
1226 // Create a new m. It will start off with a call to fn, or else the scheduler.
1228 newm(void(*fn)(void), P *p)
1232 mp = runtime_allocm(p, -1, nil, nil);
1236 runtime_newosproc(mp);
1239 // Stops execution of the current m until new work is available.
1240 // Returns with acquired P.
1245 runtime_throw("stopm holding locks");
1247 runtime_throw("stopm holding p");
1249 m->spinning = false;
1250 runtime_xadd(&runtime_sched.nmspinning, -1);
1254 runtime_lock(&runtime_sched);
1256 runtime_unlock(&runtime_sched);
1257 runtime_notesleep(&m->park);
1258 runtime_noteclear(&m->park);
1275 // Schedules some M to run the p (creates an M if necessary).
1276 // If p==nil, tries to get an idle P, if no idle P's returns false.
1278 startm(P *p, bool spinning)
1283 runtime_lock(&runtime_sched);
1287 runtime_unlock(&runtime_sched);
1289 runtime_xadd(&runtime_sched.nmspinning, -1);
1294 runtime_unlock(&runtime_sched);
1303 runtime_throw("startm: m is spinning");
1305 runtime_throw("startm: m has p");
1306 mp->spinning = spinning;
1308 runtime_notewakeup(&mp->park);
1311 // Hands off P from syscall or locked M.
1315 // if it has local work, start it straight away
1316 if(p->runqhead != p->runqtail || runtime_sched.runqsize) {
1320 // no local work, check that there are no spinning/idle M's,
1321 // otherwise our help is not required
1322 if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 && // TODO: fast atomic
1323 runtime_cas(&runtime_sched.nmspinning, 0, 1)) {
1327 runtime_lock(&runtime_sched);
1328 if(runtime_gcwaiting) {
1329 p->status = Pgcstop;
1330 if(--runtime_sched.stopwait == 0)
1331 runtime_notewakeup(&runtime_sched.stopnote);
1332 runtime_unlock(&runtime_sched);
1335 if(runtime_sched.runqsize) {
1336 runtime_unlock(&runtime_sched);
1340 // If this is the last running P and nobody is polling network,
1341 // need to wakeup another M to poll network.
1342 if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) {
1343 runtime_unlock(&runtime_sched);
1348 runtime_unlock(&runtime_sched);
1351 // Tries to add one more P to execute G's.
1352 // Called when a G is made runnable (newproc, ready).
1356 // be conservative about spinning threads
1357 if(!runtime_cas(&runtime_sched.nmspinning, 0, 1))
1362 // Stops execution of the current m that is locked to a g until the g is runnable again.
1363 // Returns with acquired P.
1369 if(m->lockedg == nil || m->lockedg->lockedm != m)
1370 runtime_throw("stoplockedm: inconsistent locking");
1372 // Schedule another M to run this p.
1377 // Wait until another thread schedules lockedg again.
1378 runtime_notesleep(&m->park);
1379 runtime_noteclear(&m->park);
1380 if(m->lockedg->status != Grunnable)
1381 runtime_throw("stoplockedm: not runnable");
1386 // Schedules the locked m to run the locked gp.
1395 runtime_throw("startlockedm: locked to me");
1397 runtime_throw("startlockedm: m has p");
1398 // directly handoff current P to the locked m
1402 runtime_notewakeup(&mp->park);
1406 // Stops the current m for stoptheworld.
1407 // Returns when the world is restarted.
1413 if(!runtime_gcwaiting)
1414 runtime_throw("gcstopm: not waiting for gc");
1416 m->spinning = false;
1417 runtime_xadd(&runtime_sched.nmspinning, -1);
1420 runtime_lock(&runtime_sched);
1421 p->status = Pgcstop;
1422 if(--runtime_sched.stopwait == 0)
1423 runtime_notewakeup(&runtime_sched.stopnote);
1424 runtime_unlock(&runtime_sched);
1428 // Schedules gp to run on the current M.
1435 if(gp->status != Grunnable) {
1436 runtime_printf("execute: bad g status %d\n", gp->status);
1437 runtime_throw("execute: bad g status");
1439 gp->status = Grunning;
1444 // Check whether the profiler needs to be turned on or off.
1445 hz = runtime_sched.profilehz;
1446 if(m->profilehz != hz)
1447 runtime_resetcpuprofiler(hz);
1452 // Finds a runnable goroutine to execute.
1453 // Tries to steal from other P's, get g from global queue, poll network.
1462 if(runtime_gcwaiting) {
1471 if(runtime_sched.runqsize) {
1472 runtime_lock(&runtime_sched);
1473 gp = globrunqget(m->p);
1474 runtime_unlock(&runtime_sched);
1479 gp = runtime_netpoll(false); // non-blocking
1481 injectglist(gp->schedlink);
1482 gp->status = Grunnable;
1485 // If number of spinning M's >= number of busy P's, block.
1486 // This is necessary to prevent excessive CPU consumption
1487 // when GOMAXPROCS>>1 but the program parallelism is low.
1488 if(!m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle)) // TODO: fast atomic
1492 runtime_xadd(&runtime_sched.nmspinning, 1);
1494 // random steal from other P's
1495 for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1496 if(runtime_gcwaiting)
1498 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1502 gp = runqsteal(m->p, p);
1507 // return P and block
1508 runtime_lock(&runtime_sched);
1509 if(runtime_gcwaiting) {
1510 runtime_unlock(&runtime_sched);
1513 if(runtime_sched.runqsize) {
1514 gp = globrunqget(m->p);
1515 runtime_unlock(&runtime_sched);
1520 runtime_unlock(&runtime_sched);
1522 m->spinning = false;
1523 runtime_xadd(&runtime_sched.nmspinning, -1);
1525 // check all runqueues once again
1526 for(i = 0; i < runtime_gomaxprocs; i++) {
1527 p = runtime_allp[i];
1528 if(p && p->runqhead != p->runqtail) {
1529 runtime_lock(&runtime_sched);
1531 runtime_unlock(&runtime_sched);
1540 if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) {
1542 runtime_throw("findrunnable: netpoll with p");
1544 runtime_throw("findrunnable: netpoll with spinning");
1545 gp = runtime_netpoll(true); // block until new work is available
1546 runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime());
1548 runtime_lock(&runtime_sched);
1550 runtime_unlock(&runtime_sched);
1553 injectglist(gp->schedlink);
1554 gp->status = Grunnable;
1564 // Injects the list of runnable G's into the scheduler.
1565 // Can run concurrently with GC.
1567 injectglist(G *glist)
1574 runtime_lock(&runtime_sched);
1575 for(n = 0; glist; n++) {
1577 glist = gp->schedlink;
1578 gp->status = Grunnable;
1581 runtime_unlock(&runtime_sched);
1583 for(; n && runtime_sched.npidle; n--)
1587 // One round of scheduler: find a runnable goroutine and execute it.
1595 runtime_throw("schedule: holding locks");
1598 if(runtime_gcwaiting) {
1605 gp = findrunnable();
1608 m->spinning = false;
1609 runtime_xadd(&runtime_sched.nmspinning, -1);
1612 // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1613 // so see if we need to wakeup another M here.
1614 if (m->p->runqhead != m->p->runqtail &&
1615 runtime_atomicload(&runtime_sched.nmspinning) == 0 &&
1616 runtime_atomicload(&runtime_sched.npidle) > 0) // TODO: fast atomic
1627 // Puts the current goroutine into a waiting state and unlocks the lock.
1628 // The goroutine can be made runnable again by calling runtime_ready(gp).
1630 runtime_park(void(*unlockf)(Lock*), Lock *lock, const char *reason)
1633 m->waitunlockf = unlockf;
1634 g->waitreason = reason;
1635 runtime_mcall(park0);
1638 // runtime_park continuation on g0.
1642 gp->status = Gwaiting;
1645 if(m->waitunlockf) {
1646 m->waitunlockf(m->waitlock);
1647 m->waitunlockf = nil;
1652 execute(gp); // Never returns.
1659 runtime_gosched(void)
1661 runtime_mcall(gosched0);
1664 // runtime_gosched continuation on g0.
1668 gp->status = Grunnable;
1671 runtime_lock(&runtime_sched);
1673 runtime_unlock(&runtime_sched);
1676 execute(gp); // Never returns.
1681 // Finishes execution of the current goroutine.
1683 runtime_goexit(void)
1686 runtime_racegoend();
1687 runtime_mcall(goexit0);
1690 // runtime_goexit continuation on g0.
1700 if(m->locked & ~LockExternal) {
1701 runtime_printf("invalid m->locked = %d", m->locked);
1702 runtime_throw("internal lockOSThread error");
1709 // The goroutine g is about to enter a system call.
1710 // Record that it's not using the cpu anymore.
1711 // This is called only from the go syscall library and cgocall,
1712 // not from the low-level system calls used by the runtime.
1714 // Entersyscall cannot split the stack: the runtime_gosave must
1715 // make g->sched refer to the caller's stack segment, because
1716 // entersyscall is going to return immediately after.
1718 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
1721 runtime_entersyscall()
1723 if(m->profilehz > 0)
1724 runtime_setprof(false);
1726 // Leave SP around for gc and traceback.
1727 #ifdef USING_SPLIT_STACK
1728 g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
1729 &g->gcnext_segment, &g->gcnext_sp,
1735 g->gcnext_sp = (byte *) &v;
1739 // Save the registers in the g structure so that any pointers
1740 // held in registers will be seen by the garbage collector.
1741 getcontext(&g->gcregs);
1743 g->status = Gsyscall;
1745 if(runtime_atomicload(&runtime_sched.sysmonwait)) { // TODO: fast atomic
1746 runtime_lock(&runtime_sched);
1747 if(runtime_atomicload(&runtime_sched.sysmonwait)) {
1748 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
1749 runtime_notewakeup(&runtime_sched.sysmonnote);
1751 runtime_unlock(&runtime_sched);
1757 runtime_atomicstore(&m->p->status, Psyscall);
1758 if(runtime_gcwaiting) {
1759 runtime_lock(&runtime_sched);
1760 if (runtime_sched.stopwait > 0 && runtime_cas(&m->p->status, Psyscall, Pgcstop)) {
1761 if(--runtime_sched.stopwait == 0)
1762 runtime_notewakeup(&runtime_sched.stopnote);
1764 runtime_unlock(&runtime_sched);
1768 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
1770 runtime_entersyscallblock(void)
1774 if(m->profilehz > 0)
1775 runtime_setprof(false);
1777 // Leave SP around for gc and traceback.
1778 #ifdef USING_SPLIT_STACK
1779 g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
1780 &g->gcnext_segment, &g->gcnext_sp,
1783 g->gcnext_sp = (byte *) &p;
1786 // Save the registers in the g structure so that any pointers
1787 // held in registers will be seen by the garbage collector.
1788 getcontext(&g->gcregs);
1790 g->status = Gsyscall;
1794 if(g->isbackground) // do not consider blocked scavenger for deadlock detection
1798 // The goroutine g exited its system call.
1799 // Arrange for it to run on a cpu again.
1800 // This is called only from the go syscall library, not
1801 // from the low-level system calls used by the runtime.
1803 runtime_exitsyscall(void)
1808 // Check whether the profiler needs to be turned on.
1809 if(m->profilehz > 0)
1810 runtime_setprof(true);
1813 // Try to re-acquire the last P.
1814 if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) {
1815 // There's a cpu for us, so we can run.
1816 m->mcache = m->p->mcache;
1819 gp->status = Grunning;
1820 // Garbage collector isn't running (since we are),
1821 // so okay to clear gcstack and gcsp.
1822 #ifdef USING_SPLIT_STACK
1825 gp->gcnext_sp = nil;
1826 runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
1830 if(gp->isbackground) // do not consider blocked scavenger for deadlock detection
1832 // Try to get any other idle P.
1834 if(runtime_sched.pidle) {
1835 runtime_lock(&runtime_sched);
1837 runtime_unlock(&runtime_sched);
1840 #ifdef USING_SPLIT_STACK
1843 gp->gcnext_sp = nil;
1844 runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
1849 // Call the scheduler.
1850 runtime_mcall(exitsyscall0);
1852 // Scheduler returned, so we're allowed to run now.
1853 // Delete the gcstack information that we left for
1854 // the garbage collector during the system call.
1855 // Must wait until now because until gosched returns
1856 // we don't know for sure that the garbage collector
1858 #ifdef USING_SPLIT_STACK
1861 gp->gcnext_sp = nil;
1862 runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
1865 // runtime_exitsyscall slow path on g0.
1866 // Failed to acquire P, enqueue gp as runnable.
1872 gp->status = Grunnable;
1875 runtime_lock(&runtime_sched);
1879 runtime_unlock(&runtime_sched);
1882 execute(gp); // Never returns.
1885 // Wait until another thread schedules gp and so m again.
1887 execute(gp); // Never returns.
1890 schedule(); // Never returns.
1893 // Allocate a new g, with a stack big enough for stacksize bytes.
1895 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
1899 newg = runtime_malloc(sizeof(G));
1900 if(stacksize >= 0) {
1901 #if USING_SPLIT_STACK
1902 int dont_block_signals = 0;
1904 *ret_stack = __splitstack_makecontext(stacksize,
1905 &newg->stack_context[0],
1907 __splitstack_block_signals_context(&newg->stack_context[0],
1908 &dont_block_signals, nil);
1910 *ret_stack = runtime_mallocgc(stacksize, FlagNoProfiling|FlagNoGC, 0, 0);
1911 *ret_stacksize = stacksize;
1912 newg->gcinitial_sp = *ret_stack;
1913 newg->gcstack_size = stacksize;
1914 runtime_xadd(&runtime_stacks_sys, stacksize);
1920 /* For runtime package testing. */
1922 void runtime_testing_entersyscall(void)
1923 __asm__ (GOSYM_PREFIX "runtime.entersyscall");
1926 runtime_testing_entersyscall()
1928 runtime_entersyscall();
1931 void runtime_testing_exitsyscall(void)
1932 __asm__ (GOSYM_PREFIX "runtime.exitsyscall");
1935 runtime_testing_exitsyscall()
1937 runtime_exitsyscall();
1941 __go_go(void (*fn)(void*), void* arg)
1947 m->locks++; // disable preemption because it can be holding p in a local var
1949 if((newg = gfget(m->p)) != nil) {
1950 #ifdef USING_SPLIT_STACK
1951 int dont_block_signals = 0;
1953 sp = __splitstack_resetcontext(&newg->stack_context[0],
1955 __splitstack_block_signals_context(&newg->stack_context[0],
1956 &dont_block_signals, nil);
1958 sp = newg->gcinitial_sp;
1959 spsize = newg->gcstack_size;
1961 runtime_throw("bad spsize in __go_go");
1962 newg->gcnext_sp = sp;
1965 newg = runtime_malg(StackMin, &sp, &spsize);
1966 runtime_lock(&runtime_sched);
1967 if(runtime_lastg == nil)
1968 runtime_allg = newg;
1970 runtime_lastg->alllink = newg;
1971 runtime_lastg = newg;
1972 runtime_unlock(&runtime_sched);
1975 newg->entry = (byte*)fn;
1977 newg->gopc = (uintptr)__builtin_return_address(0);
1978 newg->status = Grunnable;
1979 newg->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
1982 // Avoid warnings about variables clobbered by
1984 byte * volatile vsp = sp;
1985 size_t volatile vspsize = spsize;
1986 G * volatile vnewg = newg;
1988 getcontext(&vnewg->context);
1989 vnewg->context.uc_stack.ss_sp = vsp;
1990 #ifdef MAKECONTEXT_STACK_TOP
1991 vnewg->context.uc_stack.ss_sp += vspsize;
1993 vnewg->context.uc_stack.ss_size = vspsize;
1994 makecontext(&vnewg->context, kickoff, 0);
1996 runqput(m->p, vnewg);
1998 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic
2005 // Put on gfree list.
2006 // If local list is too long, transfer a batch to the global list.
2010 gp->schedlink = p->gfree;
2013 if(p->gfreecnt >= 64) {
2014 runtime_lock(&runtime_sched.gflock);
2015 while(p->gfreecnt >= 32) {
2018 p->gfree = gp->schedlink;
2019 gp->schedlink = runtime_sched.gfree;
2020 runtime_sched.gfree = gp;
2022 runtime_unlock(&runtime_sched.gflock);
2026 // Get from gfree list.
2027 // If local list is empty, grab a batch from global list.
2035 if(gp == nil && runtime_sched.gfree) {
2036 runtime_lock(&runtime_sched.gflock);
2037 while(p->gfreecnt < 32 && runtime_sched.gfree) {
2039 gp = runtime_sched.gfree;
2040 runtime_sched.gfree = gp->schedlink;
2041 gp->schedlink = p->gfree;
2044 runtime_unlock(&runtime_sched.gflock);
2048 p->gfree = gp->schedlink;
2054 // Purge all cached G's from gfree list to the global list.
2060 runtime_lock(&runtime_sched.gflock);
2061 while(p->gfreecnt) {
2064 p->gfree = gp->schedlink;
2065 gp->schedlink = runtime_sched.gfree;
2066 runtime_sched.gfree = gp;
2068 runtime_unlock(&runtime_sched.gflock);
2072 runtime_Breakpoint(void)
2074 runtime_breakpoint();
2077 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2080 runtime_Gosched(void)
2085 // Implementation of runtime.GOMAXPROCS.
2086 // delete when scheduler is even stronger
2088 runtime_gomaxprocsfunc(int32 n)
2092 if(n > MaxGomaxprocs)
2094 runtime_lock(&runtime_sched);
2095 ret = runtime_gomaxprocs;
2096 if(n <= 0 || n == ret) {
2097 runtime_unlock(&runtime_sched);
2100 runtime_unlock(&runtime_sched);
2102 runtime_semacquire(&runtime_worldsema);
2104 runtime_stoptheworld();
2107 runtime_semrelease(&runtime_worldsema);
2108 runtime_starttheworld();
2120 void runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2122 runtime_LockOSThread(void)
2124 m->locked |= LockExternal;
2129 runtime_lockOSThread(void)
2131 m->locked += LockInternal;
2136 UnlockOSThread(void)
2144 void runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2147 runtime_UnlockOSThread(void)
2149 m->locked &= ~LockExternal;
2154 runtime_unlockOSThread(void)
2156 if(m->locked < LockInternal)
2157 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2158 m->locked -= LockInternal;
2163 runtime_lockedOSThread(void)
2165 return g->lockedm != nil && m->lockedg != nil;
2168 // for testing of callbacks
2170 _Bool runtime_golockedOSThread(void)
2171 __asm__ (GOSYM_PREFIX "runtime.golockedOSThread");
2174 runtime_golockedOSThread(void)
2176 return runtime_lockedOSThread();
2179 // for testing of wire, unwire
2186 intgo runtime_NumGoroutine (void)
2187 __asm__ (GOSYM_PREFIX "runtime.NumGoroutine");
2190 runtime_NumGoroutine()
2192 return runtime_gcount();
2196 runtime_gcount(void)
2202 runtime_lock(&runtime_sched);
2203 // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2204 // We do not want to increment/decrement centralized counter in newproc/goexit,
2205 // just to make runtime.NumGoroutine() faster.
2206 // Compromise solution is to introduce per-P counters of active goroutines.
2207 for(gp = runtime_allg; gp; gp = gp->alllink) {
2209 if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
2212 runtime_unlock(&runtime_sched);
2217 runtime_mcount(void)
2219 return runtime_sched.mcount;
2224 void (*fn)(uintptr*, int32);
2227 Location locbuf[100];
2230 // Called if we receive a SIGPROF signal.
2236 // Windows does profiling in a dedicated thread w/o m.
2237 if(!Windows && (m == nil || m->mcache == nil))
2239 if(prof.fn == nil || prof.hz == 0)
2242 runtime_lock(&prof);
2243 if(prof.fn == nil) {
2244 runtime_unlock(&prof);
2247 n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf));
2248 for(i = 0; i < n; i++)
2249 prof.pcbuf[i] = prof.locbuf[i].pc;
2251 prof.fn(prof.pcbuf, n);
2252 runtime_unlock(&prof);
2255 // Arrange to call fn with a traceback hz times a second.
2257 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
2259 // Force sane arguments.
2267 // Stop profiler on this cpu so that it is safe to lock prof.
2268 // if a profiling signal came in while we had prof locked,
2269 // it would deadlock.
2270 runtime_resetcpuprofiler(0);
2272 runtime_lock(&prof);
2275 runtime_unlock(&prof);
2276 runtime_lock(&runtime_sched);
2277 runtime_sched.profilehz = hz;
2278 runtime_unlock(&runtime_sched);
2281 runtime_resetcpuprofiler(hz);
2284 // Change number of processors. The world is stopped, sched is locked.
2286 procresize(int32 new)
2292 old = runtime_gomaxprocs;
2293 if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
2294 runtime_throw("procresize: invalid arg");
2295 // initialize new P's
2296 for(i = 0; i < new; i++) {
2297 p = runtime_allp[i];
2299 p = (P*)runtime_mallocgc(sizeof(*p), 0, 0, 1);
2300 p->status = Pgcstop;
2301 runtime_atomicstorep(&runtime_allp[i], p);
2303 if(p->mcache == nil) {
2305 p->mcache = m->mcache; // bootstrap
2307 p->mcache = runtime_allocmcache();
2309 if(p->runq == nil) {
2311 p->runq = (G**)runtime_mallocgc(p->runqsize*sizeof(G*), 0, 0, 1);
2315 // redistribute runnable G's evenly
2316 for(i = 0; i < old; i++) {
2317 p = runtime_allp[i];
2318 while((gp = runqget(p)) != nil)
2321 // start at 1 because current M already executes some G and will acquire allp[0] below,
2322 // so if we have a spare G we want to put it into allp[1].
2323 for(i = 1; runtime_sched.runqhead; i++) {
2324 gp = runtime_sched.runqhead;
2325 runtime_sched.runqhead = gp->schedlink;
2326 runqput(runtime_allp[i%new], gp);
2328 runtime_sched.runqtail = nil;
2329 runtime_sched.runqsize = 0;
2332 for(i = new; i < old; i++) {
2333 p = runtime_allp[i];
2334 runtime_freemcache(p->mcache);
2338 // can't free P itself because it can be referenced by an M in syscall
2345 p = runtime_allp[0];
2349 for(i = new-1; i > 0; i--) {
2350 p = runtime_allp[i];
2354 runtime_singleproc = new == 1;
2355 runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2358 // Associate p and the current m.
2362 if(m->p || m->mcache)
2363 runtime_throw("acquirep: already in go");
2364 if(p->m || p->status != Pidle) {
2365 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
2366 runtime_throw("acquirep: invalid p state");
2368 m->mcache = p->mcache;
2371 p->status = Prunning;
2374 // Disassociate p and the current m.
2380 if(m->p == nil || m->mcache == nil)
2381 runtime_throw("releasep: invalid arg");
2383 if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
2384 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2385 m, m->p, p->m, m->mcache, p->mcache, p->status);
2386 runtime_throw("releasep: invalid p state");
2398 runtime_lock(&runtime_sched);
2399 runtime_sched.mlocked += v;
2402 runtime_unlock(&runtime_sched);
2405 // Check for deadlock situation.
2406 // The check is based on number of running M's, if 0 -> deadlock.
2411 int32 run, grunning, s;
2414 run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.mlocked - 1 - countextra();
2418 runtime_printf("checkdead: nmidle=%d mlocked=%d mcount=%d\n",
2419 runtime_sched.nmidle, runtime_sched.mlocked, runtime_sched.mcount);
2420 runtime_throw("checkdead: inconsistent counts");
2423 for(gp = runtime_allg; gp; gp = gp->alllink) {
2424 if(gp->isbackground)
2429 else if(s == Grunnable || s == Grunning || s == Gsyscall) {
2430 runtime_printf("checkdead: find g %D in status %d\n", gp->goid, s);
2431 runtime_throw("checkdead: runnable g");
2434 if(grunning == 0) // possible if main goroutine calls runtime_Goexit()
2436 m->throwing = -1; // do not dump full stacks
2437 runtime_throw("all goroutines are asleep - deadlock!");
2444 int64 now, lastpoll;
2446 uint32 ticks[MaxGomaxprocs];
2448 idle = 0; // how many cycles in succession we had not wokeup somebody
2451 if(idle == 0) // start with 20us sleep...
2453 else if(idle > 50) // start doubling the sleep after 1ms...
2455 if(delay > 10*1000) // up to 10ms
2457 runtime_usleep(delay);
2458 if(runtime_gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) { // TODO: fast atomic
2459 runtime_lock(&runtime_sched);
2460 if(runtime_atomicload(&runtime_gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
2461 runtime_atomicstore(&runtime_sched.sysmonwait, 1);
2462 runtime_unlock(&runtime_sched);
2463 runtime_notesleep(&runtime_sched.sysmonnote);
2464 runtime_noteclear(&runtime_sched.sysmonnote);
2468 runtime_unlock(&runtime_sched);
2470 // poll network if not polled for more than 10ms
2471 lastpoll = runtime_atomicload64(&runtime_sched.lastpoll);
2472 now = runtime_nanotime();
2473 if(lastpoll != 0 && lastpoll + 10*1000*1000 > now) {
2474 gp = runtime_netpoll(false); // non-blocking
2477 // retake P's blocked in syscalls
2486 retake(uint32 *ticks)
2493 for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
2494 p = runtime_allp[i];
2505 if(p->runqhead == p->runqtail && runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0) // TODO: fast atomic
2507 // Need to increment number of locked M's before the CAS.
2508 // Otherwise the M from which we retake can exit the syscall,
2509 // increment nmidle and report deadlock.
2511 if(runtime_cas(&p->status, s, Pidle)) {
2520 // Put mp on midle list.
2521 // Sched must be locked.
2525 mp->schedlink = runtime_sched.midle;
2526 runtime_sched.midle = mp;
2527 runtime_sched.nmidle++;
2531 // Try to get an m from midle list.
2532 // Sched must be locked.
2538 if((mp = runtime_sched.midle) != nil){
2539 runtime_sched.midle = mp->schedlink;
2540 runtime_sched.nmidle--;
2545 // Put gp on the global runnable queue.
2546 // Sched must be locked.
2550 gp->schedlink = nil;
2551 if(runtime_sched.runqtail)
2552 runtime_sched.runqtail->schedlink = gp;
2554 runtime_sched.runqhead = gp;
2555 runtime_sched.runqtail = gp;
2556 runtime_sched.runqsize++;
2559 // Try get a batch of G's from the global runnable queue.
2560 // Sched must be locked.
2567 if(runtime_sched.runqsize == 0)
2569 n = runtime_sched.runqsize/runtime_gomaxprocs+1;
2570 if(n > runtime_sched.runqsize)
2571 n = runtime_sched.runqsize;
2572 runtime_sched.runqsize -= n;
2573 if(runtime_sched.runqsize == 0)
2574 runtime_sched.runqtail = nil;
2575 gp = runtime_sched.runqhead;
2576 runtime_sched.runqhead = gp->schedlink;
2579 gp1 = runtime_sched.runqhead;
2580 runtime_sched.runqhead = gp1->schedlink;
2586 // Put p to on pidle list.
2587 // Sched must be locked.
2591 p->link = runtime_sched.pidle;
2592 runtime_sched.pidle = p;
2593 runtime_xadd(&runtime_sched.npidle, 1); // TODO: fast atomic
2596 // Try get a p from pidle list.
2597 // Sched must be locked.
2603 p = runtime_sched.pidle;
2605 runtime_sched.pidle = p->link;
2606 runtime_xadd(&runtime_sched.npidle, -1); // TODO: fast atomic
2611 // Put g on local runnable queue.
2612 // TODO(dvyukov): consider using lock-free queue.
2614 runqput(P *p, G *gp)
2623 if(t == h-1 || (h == 0 && t == s-1)) {
2634 // Get g from local runnable queue.
2641 if(p->runqhead == p->runqtail)
2659 // Grow local runnable queue.
2660 // TODO(dvyukov): consider using fixed-size array
2661 // and transfer excess to the global list (local queue can grow way too big).
2672 q = runtime_malloc(2*s*sizeof(*q));
2674 q[t2++] = p->runq[h++];
2678 runtime_free(p->runq);
2685 // Steal half of elements from local runnable queue of p2
2686 // and put onto local runnable queue of p.
2687 // Returns one of the stolen elements (or nil if failed).
2689 runqsteal(P *p, P *p2)
2692 int32 t, h, s, t2, h2, s2, c, i;
2694 if(p2->runqhead == p2->runqtail)
2696 // sort locks to prevent deadlocks
2700 if(p2->runqhead == p2->runqtail) {
2708 // now we've locked both queues and know the victim is not empty
2715 gp = p2->runq[h2++]; // return value
2718 // steal roughly half
2722 c = (s2 - h2 + t2) / 2;
2724 for(i = 0; i != c; i++) {
2725 // the target queue is full?
2726 if(t == h-1 || (h == 0 && t == s-1))
2728 // the victim queue is empty?
2731 gp1 = p2->runq[h2++];
2745 void runtime_testSchedLocalQueue(void)
2746 __asm__("runtime.testSchedLocalQueue");
2749 runtime_testSchedLocalQueue(void)
2755 runtime_memclr((byte*)&p, sizeof(p));
2759 p.runq = runtime_malloc(p.runqsize*sizeof(*p.runq));
2761 for(i = 0; i < (int32)nelem(gs); i++) {
2762 if(runqget(&p) != nil)
2763 runtime_throw("runq is not empty initially");
2764 for(j = 0; j < i; j++)
2765 runqput(&p, &gs[i]);
2766 for(j = 0; j < i; j++) {
2767 if(runqget(&p) != &gs[i]) {
2768 runtime_printf("bad element at iter %d/%d\n", i, j);
2769 runtime_throw("bad element");
2772 if(runqget(&p) != nil)
2773 runtime_throw("runq is not empty afterwards");
2777 void runtime_testSchedLocalQueueSteal(void)
2778 __asm__("runtime.testSchedLocalQueueSteal");
2781 runtime_testSchedLocalQueueSteal(void)
2787 runtime_memclr((byte*)&p1, sizeof(p1));
2791 p1.runq = runtime_malloc(p1.runqsize*sizeof(*p1.runq));
2793 runtime_memclr((byte*)&p2, sizeof(p2));
2794 p2.runqsize = nelem(gs);
2797 p2.runq = runtime_malloc(p2.runqsize*sizeof(*p2.runq));
2799 for(i = 0; i < (int32)nelem(gs); i++) {
2800 for(j = 0; j < i; j++) {
2802 runqput(&p1, &gs[j]);
2804 gp = runqsteal(&p2, &p1);
2810 while((gp = runqget(&p2)) != nil) {
2814 while((gp = runqget(&p1)) != nil)
2816 for(j = 0; j < i; j++) {
2817 if(gs[j].sig != 1) {
2818 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
2819 runtime_throw("bad element");
2822 if(s != i/2 && s != i/2+1) {
2823 runtime_printf("bad steal %d, want %d or %d, iter %d\n",
2825 runtime_throw("bad steal");
2831 runtime_proc_scan(void (*addroot)(Obj))
2833 addroot((Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
2836 // When a function calls a closure, it passes the closure value to
2837 // __go_set_closure immediately before the function call. When a
2838 // function uses a closure, it calls __go_get_closure immediately on
2839 // function entry. This is a hack, but it will work on any system.
2840 // It would be better to use the static chain register when there is
2841 // one. It is also worth considering expanding these functions
2842 // directly in the compiler.
2845 __go_set_closure(void* v)
2851 __go_get_closure(void)