1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
16 // Map gccgo field names to gc field names.
17 // Slice aka __go_open_array.
18 #define array __values
19 #define cap __capacity
20 // Iface aka __go_interface
22 // Eface aka __go_empty_interface.
23 #define type __type_descriptor
25 typedef struct __go_map Hmap;
26 // Type aka __go_type_descriptor
28 #define string __reflection
29 #define KindPtr GO_PTR
30 #define KindNoPointers GO_NO_POINTERS
31 // PtrType aka __go_ptr_type
32 #define elem __element_type
34 #ifdef USING_SPLIT_STACK
36 extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
39 extern void * __splitstack_find_context (void *context[10], size_t *, void **,
46 DebugMark = 0, // run second pass to check mark
48 ScanStackByFrames = 0,
51 // Four bits per word (see #defines below).
52 wordsPerBitmapWord = sizeof(void*)*8/4,
53 bitShift = sizeof(void*)*8/4,
56 IntermediateBufferCapacity = 64,
58 // Bits in type information
61 PC_BITS = PRECISE | LOOP,
64 // Bits in per-word bitmap.
65 // #defines because enum might not be able to hold the values.
67 // Each word in the bitmap describes wordsPerBitmapWord words
68 // of heap memory. There are 4 bitmap bits dedicated to each heap word,
69 // so on a 64-bit system there is one bitmap word per 16 heap words.
70 // The bits in the word are packed together by type first, then by
71 // heap location, so each 64-bit bitmap word consists of, from top to bottom,
72 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
73 // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
74 // This layout makes it easier to iterate over the bits of a given type.
76 // The bitmap starts at mheap.arena_start and extends *backward* from
77 // there. On a 64-bit system the off'th word in the arena is tracked by
78 // the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
79 // the only difference is that the divisor is 8.)
81 // To pull out the bits corresponding to a given pointer p, we use:
83 // off = p - (uintptr*)mheap.arena_start; // word offset
84 // b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
85 // shift = off % wordsPerBitmapWord
86 // bits = *b >> shift;
87 // /* then test bits & bitAllocated, bits & bitMarked, etc. */
89 #define bitAllocated ((uintptr)1<<(bitShift*0))
90 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
91 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
92 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
93 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
95 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
97 // Holding worldsema grants an M the right to try to stop the world.
100 // runtime_semacquire(&runtime_worldsema);
102 // runtime_stoptheworld();
107 // runtime_semrelease(&runtime_worldsema);
108 // runtime_starttheworld();
110 uint32 runtime_worldsema = 1;
112 static int32 gctrace;
114 // The size of Workbuf is N*PageSize.
115 typedef struct Workbuf Workbuf;
118 #define SIZE (2*PageSize-sizeof(LFNode)-sizeof(uintptr))
119 LFNode node; // must be first
121 Obj obj[SIZE/sizeof(Obj) - 1];
122 uint8 _padding[SIZE%sizeof(Obj) + sizeof(Obj)];
126 typedef struct Finalizer Finalizer;
131 const struct __go_func_type *ft;
134 typedef struct FinBlock FinBlock;
145 static FinBlock *finq; // list of finalizers that are to be executed
146 static FinBlock *finc; // cache of free blocks
147 static FinBlock *allfin; // list of all blocks
149 static int32 fingwait;
151 static void runfinq(void*);
152 static Workbuf* getempty(Workbuf*);
153 static Workbuf* getfull(Workbuf*);
154 static void putempty(Workbuf*);
155 static Workbuf* handoff(Workbuf*);
156 static void gchelperstart(void);
159 uint64 full; // lock-free list of full blocks
160 uint64 empty; // lock-free list of empty blocks
161 byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
163 volatile uint32 nwait;
164 volatile uint32 ndone;
165 volatile uint32 debugmarkdone;
180 GC_DEFAULT_PTR = GC_NUM_INSTR,
201 uint64 instr[GC_NUM_INSTR2];
206 // markonly marks an object. It returns true if the object
207 // has been marked by this function, false otherwise.
208 // This function doesn't append the object to any buffer.
213 uintptr *bitp, bits, shift, x, xbits, off;
217 // Words outside the arena cannot be pointers.
218 if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
221 // obj may be a pointer to a live object.
222 // Try to find the beginning of the object.
224 // Round down to word boundary.
225 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
227 // Find bits for this word.
228 off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
229 bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
230 shift = off % wordsPerBitmapWord;
232 bits = xbits >> shift;
234 // Pointing at the beginning of a block?
235 if((bits & (bitAllocated|bitBlockBoundary)) != 0)
238 // Otherwise consult span table to find beginning.
239 // (Manually inlined copy of MHeap_LookupMaybe.)
240 k = (uintptr)obj>>PageShift;
242 if(sizeof(void*) == 8)
243 x -= (uintptr)runtime_mheap->arena_start>>PageShift;
244 s = runtime_mheap->map[x];
245 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
247 p = (byte*)((uintptr)s->start<<PageShift);
248 if(s->sizeclass == 0) {
251 if((byte*)obj >= (byte*)s->limit)
253 uintptr size = s->elemsize;
254 int32 i = ((byte*)obj - p)/size;
258 // Now that we know the object header, reload bits.
259 off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
260 bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
261 shift = off % wordsPerBitmapWord;
263 bits = xbits >> shift;
266 // Now we have bits, bitp, and shift correct for
267 // obj pointing at the base of the object.
268 // Only care about allocated and not marked.
269 if((bits & (bitAllocated|bitMarked)) != bitAllocated)
272 *bitp |= bitMarked<<shift;
276 if(x & (bitMarked<<shift))
278 if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
283 // The object is now marked
287 // PtrTarget is a structure used by intermediate buffers.
288 // The intermediate buffers hold GC data before it
289 // is moved/flushed to the work buffer (Workbuf).
290 // The size of an intermediate buffer is very small,
291 // such as 32 or 64 elements.
292 typedef struct PtrTarget PtrTarget;
299 typedef struct BufferList BufferList;
302 PtrTarget ptrtarget[IntermediateBufferCapacity];
303 Obj obj[IntermediateBufferCapacity];
305 byte pad[CacheLineSize];
307 static BufferList bufferList[MaxGcproc];
309 static Type *itabtype;
311 static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj);
313 // flushptrbuf moves data from the PtrTarget buffer to the work buffer.
314 // The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned,
315 // while the work buffer contains blocks which have been marked
316 // and are prepared to be scanned by the garbage collector.
318 // _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer.
320 // A simplified drawing explaining how the todo-list moves from a structure to another:
324 // Obj ------> PtrTarget (pointer targets)
329 // (find block start, mark and enqueue)
331 flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
333 byte *p, *arena_start, *obj;
334 uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n;
339 PtrTarget *ptrbuf_end;
341 arena_start = runtime_mheap->arena_start;
347 ptrbuf_end = *ptrbufpos;
348 n = ptrbuf_end - ptrbuf;
352 runtime_xadd64(&gcstats.ptr.sum, n);
353 runtime_xadd64(&gcstats.ptr.cnt, 1);
356 // If buffer is nearly full, get a new one.
357 if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) {
360 wbuf = getempty(wbuf);
364 if(n >= nelem(wbuf->obj))
365 runtime_throw("ptrbuf has to be smaller than WorkBuf");
368 // TODO(atom): This block is a branch of an if-then-else statement.
369 // The single-threaded branch may be added in a next CL.
371 // Multi-threaded version.
373 while(ptrbuf < ptrbuf_end) {
378 // obj belongs to interval [mheap.arena_start, mheap.arena_used).
380 if(obj < runtime_mheap->arena_start || obj >= runtime_mheap->arena_used)
381 runtime_throw("object is outside of mheap");
384 // obj may be a pointer to a live object.
385 // Try to find the beginning of the object.
387 // Round down to word boundary.
388 if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) {
389 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
393 // Find bits for this word.
394 off = (uintptr*)obj - (uintptr*)arena_start;
395 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
396 shift = off % wordsPerBitmapWord;
398 bits = xbits >> shift;
400 // Pointing at the beginning of a block?
401 if((bits & (bitAllocated|bitBlockBoundary)) != 0)
406 // Pointing just past the beginning?
407 // Scan backward a little to find a block boundary.
408 for(j=shift; j-->0; ) {
409 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
410 obj = (byte*)obj - (shift-j)*PtrSize;
417 // Otherwise consult span table to find beginning.
418 // (Manually inlined copy of MHeap_LookupMaybe.)
419 k = (uintptr)obj>>PageShift;
421 if(sizeof(void*) == 8)
422 x -= (uintptr)arena_start>>PageShift;
423 s = runtime_mheap->map[x];
424 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
426 p = (byte*)((uintptr)s->start<<PageShift);
427 if(s->sizeclass == 0) {
430 if((byte*)obj >= (byte*)s->limit)
433 int32 i = ((byte*)obj - p)/size;
437 // Now that we know the object header, reload bits.
438 off = (uintptr*)obj - (uintptr*)arena_start;
439 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
440 shift = off % wordsPerBitmapWord;
442 bits = xbits >> shift;
445 // Now we have bits, bitp, and shift correct for
446 // obj pointing at the base of the object.
447 // Only care about allocated and not marked.
448 if((bits & (bitAllocated|bitMarked)) != bitAllocated)
451 *bitp |= bitMarked<<shift;
455 if(x & (bitMarked<<shift))
457 if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
462 // If object has no pointers, don't need to scan further.
463 if((bits & bitNoPointers) != 0)
466 // Ask span about size class.
467 // (Manually inlined copy of MHeap_Lookup.)
468 x = (uintptr)obj >> PageShift;
469 if(sizeof(void*) == 8)
470 x -= (uintptr)arena_start>>PageShift;
471 s = runtime_mheap->map[x];
475 *wp = (Obj){obj, s->elemsize, ti};
481 // If another proc wants a pointer, give it some.
482 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
484 wbuf = handoff(wbuf);
486 wp = wbuf->obj + nobj;
496 flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
507 objbuf_end = *objbufpos;
510 while(objbuf < objbuf_end) {
513 // Align obj.b to a word boundary.
514 off = (uintptr)obj.p & (PtrSize-1);
516 obj.p += PtrSize - off;
517 obj.n -= PtrSize - off;
521 if(obj.p == nil || obj.n == 0)
524 // If buffer is full, get a new one.
525 if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
528 wbuf = getempty(wbuf);
538 // If another proc wants a pointer, give it some.
539 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
541 wbuf = handoff(wbuf);
543 wp = wbuf->obj + nobj;
551 // Program that scans the whole block and treats every block element as a potential pointer
552 static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR};
555 // Hashmap iterator program
556 static uintptr mapProg[2] = {0, GC_MAP_NEXT};
559 static uintptr chanProg[2] = {0, GC_CHAN};
562 // Local variables of a program fragment or loop
563 typedef struct Frame Frame;
565 uintptr count, elemsize, b;
566 uintptr *loop_or_ret;
569 // Sanity check for the derived type info objti.
571 checkptr(void *obj, uintptr objti)
573 uintptr type, tisize, i, x;
579 runtime_throw("checkptr is debug only");
581 if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
583 type = runtime_gettype(obj);
584 t = (Type*)(type & ~(uintptr)(PtrSize-1));
587 x = (uintptr)obj >> PageShift;
588 if(sizeof(void*) == 8)
589 x -= (uintptr)(runtime_mheap->arena_start)>>PageShift;
590 s = runtime_mheap->map[x];
591 objstart = (byte*)((uintptr)s->start<<PageShift);
592 if(s->sizeclass != 0) {
593 i = ((byte*)obj - objstart)/s->elemsize;
594 objstart += i*s->elemsize;
596 tisize = *(uintptr*)objti;
597 // Sanity check for object size: it should fit into the memory block.
598 if((byte*)obj + tisize > objstart + s->elemsize)
599 runtime_throw("invalid gc type info");
602 // If obj points to the beginning of the memory block,
603 // check type info as well.
604 if(t->string == nil ||
605 // Gob allocates unsafe pointers for indirection.
606 (runtime_strcmp((const char *)t->string->str, (const char*)"unsafe.Pointer") &&
607 // Runtime and gc think differently about closures.
608 runtime_strstr((const char *)t->string->str, (const char*)"struct { F uintptr") != (const char *)t->string->str)) {
610 pc1 = (uintptr*)objti;
611 pc2 = (uintptr*)t->gc;
612 // A simple best-effort check until first GC_END.
613 for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) {
614 if(pc1[j] != pc2[j]) {
615 runtime_printf("invalid gc type info for '%s' at %p, type info %p, block info %p\n",
616 t->string ? (const int8*)t->string->str : (const int8*)"?", j, pc1[j], pc2[j]);
617 runtime_throw("invalid gc type info");
624 // scanblock scans a block of n bytes starting at pointer b for references
625 // to other objects, scanning any it finds recursively until there are no
626 // unscanned objects left. Instead of using an explicit recursion, it keeps
627 // a work list in the Workbuf* structures and loops in the main function
628 // body. Keeping an explicit work list is easier on the stack allocator and
631 // wbuf: current work buffer
632 // wp: storage for next queued pointer (write pointer)
633 // nobj: number of queued objects
635 scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
637 byte *b, *arena_start, *arena_used;
638 uintptr n, i, end_b, elemsize, size, ti, objti, count /* , type */;
639 uintptr *pc, precise_type, nominal_size;
641 uintptr *map_ret, mapkey_size, mapval_size, mapkey_ti, mapval_ti, *chan_ret, chancap;
646 Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4];
647 BufferList *scanbuffers;
648 PtrTarget *ptrbuf, *ptrbuf_end, *ptrbufpos;
649 Obj *objbuf, *objbuf_end, *objbufpos;
655 bool mapkey_kind, mapval_kind;
656 struct hash_gciter map_iter;
657 struct hash_gciter_data d;
662 if(sizeof(Workbuf) % PageSize != 0)
663 runtime_throw("scanblock: size of Workbuf is suboptimal");
665 // Memory arena parameters.
666 arena_start = runtime_mheap->arena_start;
667 arena_used = runtime_mheap->arena_used;
669 stack_ptr = stack+nelem(stack)-1;
671 precise_type = false;
676 scanbuffers = &bufferList[runtime_m()->helpgc];
677 ptrbuf = &scanbuffers->ptrtarget[0];
678 ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget);
679 objbuf = &scanbuffers->obj[0];
680 objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj);
686 // (Silence the compiler)
689 mapkey_size = mapval_size = 0;
690 mapkey_kind = mapval_kind = false;
691 mapkey_ti = mapval_ti = 0;
700 // Each iteration scans the block b of length n, queueing pointers in
703 runtime_printf("scanblock %p %D\n", b, (int64)n);
707 runtime_xadd64(&gcstats.nbytes, n);
708 runtime_xadd64(&gcstats.obj.sum, nobj);
709 runtime_xadd64(&gcstats.obj.cnt, 1);
712 if(ti != 0 && false) {
713 pc = (uintptr*)(ti & ~(uintptr)PC_BITS);
714 precise_type = (ti & PRECISE);
715 stack_top.elemsize = pc[0];
717 nominal_size = pc[0];
719 stack_top.count = 0; // 0 means an infinite number of iterations
720 stack_top.loop_or_ret = pc+1;
725 // Simple sanity check for provided type info ti:
726 // The declared size of the object must be not larger than the actual size
727 // (it can be smaller due to inferior pointers).
728 // It's difficult to make a comprehensive check due to inferior pointers,
729 // reflection, gob, etc.
731 runtime_printf("invalid gc type info: type info size %p, block size %p\n", pc[0], n);
732 runtime_throw("invalid gc type info");
735 } else if(UseSpanType && false) {
737 runtime_xadd64(&gcstats.obj.notype, 1);
740 type = runtime_gettype(b);
743 runtime_xadd64(&gcstats.obj.typelookup, 1);
745 t = (Type*)(type & ~(uintptr)(PtrSize-1));
746 switch(type & (PtrSize-1)) {
747 case TypeInfo_SingleObject:
748 pc = (uintptr*)t->gc;
749 precise_type = true; // type information about 'b' is precise
751 stack_top.elemsize = pc[0];
754 pc = (uintptr*)t->gc;
757 precise_type = true; // type information about 'b' is precise
758 stack_top.count = 0; // 0 means an infinite number of iterations
759 stack_top.elemsize = pc[0];
760 stack_top.loop_or_ret = pc+1;
764 maptype = (MapType*)t;
765 if(hash_gciter_init(hmap, &map_iter)) {
766 mapkey_size = maptype->key->size;
767 mapkey_kind = maptype->key->kind;
768 mapkey_ti = (uintptr)maptype->key->gc | PRECISE;
769 mapval_size = maptype->elem->size;
770 mapval_kind = maptype->elem->kind;
771 mapval_ti = (uintptr)maptype->elem->gc | PRECISE;
781 chantype = (ChanType*)t;
786 runtime_throw("scanblock: invalid type");
801 stack_top.b = (uintptr)b;
803 end_b = (uintptr)b + n - PtrSize;
807 runtime_xadd64(&gcstats.instr[pc[0]], 1);
813 obj = *(void**)(stack_top.b + pc[1]);
817 checkptr(obj, objti);
821 sliceptr = (Slice*)(stack_top.b + pc[1]);
822 if(sliceptr->cap != 0) {
823 obj = sliceptr->array;
824 // Can't use slice element type for scanning,
825 // because if it points to an array embedded
826 // in the beginning of a struct,
827 // we will scan the whole struct as the slice.
828 // So just obtain type info from heap.
834 obj = *(void**)(stack_top.b + pc[1]);
839 obj = *(void**)(stack_top.b + pc[1]);
845 eface = (Eface*)(stack_top.b + pc[1]);
847 if(eface->type == nil)
852 if((const byte*)t >= arena_start && (const byte*)t < arena_used) {
853 union { const Type *tc; Type *tr; } u;
855 *ptrbufpos++ = (struct PtrTarget){(void*)u.tr, 0};
856 if(ptrbufpos == ptrbuf_end)
857 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
861 if((byte*)eface->__object >= arena_start && (byte*)eface->__object < arena_used) {
862 if(t->__size <= sizeof(void*)) {
863 if((t->kind & KindNoPointers))
866 obj = eface->__object;
867 if((t->kind & ~KindNoPointers) == KindPtr)
868 // objti = (uintptr)((PtrType*)t)->elem->gc;
871 obj = eface->__object;
872 // objti = (uintptr)t->gc;
879 iface = (Iface*)(stack_top.b + pc[1]);
881 if(iface->tab == nil)
885 if((byte*)iface->tab >= arena_start && (byte*)iface->tab < arena_used) {
886 // *ptrbufpos++ = (struct PtrTarget){iface->tab, (uintptr)itabtype->gc};
887 *ptrbufpos++ = (struct PtrTarget){iface->tab, 0};
888 if(ptrbufpos == ptrbuf_end)
889 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
893 if((byte*)iface->__object >= arena_start && (byte*)iface->__object < arena_used) {
894 // t = iface->tab->type;
896 if(t->__size <= sizeof(void*)) {
897 if((t->kind & KindNoPointers))
900 obj = iface->__object;
901 if((t->kind & ~KindNoPointers) == KindPtr)
902 // objti = (uintptr)((const PtrType*)t)->elem->gc;
905 obj = iface->__object;
906 // objti = (uintptr)t->gc;
913 while(stack_top.b <= end_b) {
914 obj = *(byte**)stack_top.b;
915 stack_top.b += PtrSize;
916 if((byte*)obj >= arena_start && (byte*)obj < arena_used) {
917 *ptrbufpos++ = (struct PtrTarget){obj, 0};
918 if(ptrbufpos == ptrbuf_end)
919 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
925 if(--stack_top.count != 0) {
926 // Next iteration of a loop if possible.
927 stack_top.b += stack_top.elemsize;
928 if(stack_top.b + stack_top.elemsize <= end_b+PtrSize) {
929 pc = stack_top.loop_or_ret;
934 // Stack pop if possible.
935 if(stack_ptr+1 < stack+nelem(stack)) {
936 pc = stack_top.loop_or_ret;
937 stack_top = *(++stack_ptr);
940 i = (uintptr)b + nominal_size;
943 // Quickly scan [b+i,b+n) for possible pointers.
944 for(; i<=end_b; i+=PtrSize) {
945 if(*(byte**)i != nil) {
946 // Found a value that may be a pointer.
947 // Do a rescan of the entire block.
948 enqueue((Obj){b, n, 0}, &wbuf, &wp, &nobj);
950 runtime_xadd64(&gcstats.rescan, 1);
951 runtime_xadd64(&gcstats.rescanbytes, n);
960 i = stack_top.b + pc[1];
966 *stack_ptr-- = stack_top;
967 stack_top = (Frame){count, elemsize, i, pc};
971 if(--stack_top.count != 0) {
972 stack_top.b += stack_top.elemsize;
973 pc = stack_top.loop_or_ret;
976 stack_top = *(++stack_ptr);
983 *stack_ptr-- = stack_top;
984 stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/};
985 pc = (uintptr*)((byte*)pc + *(int32*)(pc+2)); // target of the CALL instruction
990 hmap = *(Hmap**)(stack_top.b + pc[1]);
996 maptype = (MapType*)pc[2];
997 if(hash_gciter_init(hmap, &map_iter)) {
998 mapkey_size = maptype->key->size;
999 mapkey_kind = maptype->key->kind;
1000 mapkey_ti = (uintptr)maptype->key->gc | PRECISE;
1001 mapval_size = maptype->elem->size;
1002 mapval_kind = maptype->elem->kind;
1003 mapval_ti = (uintptr)maptype->elem->gc | PRECISE;
1017 // Add all keys and values to buffers, mark all subtables.
1018 while(hash_gciter_next(&map_iter, &d)) {
1019 // buffers: reserve space for 2 objects.
1020 if(ptrbufpos+2 >= ptrbuf_end)
1021 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
1022 if(objbufpos+2 >= objbuf_end)
1023 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
1028 if(d.key_data != nil) {
1029 if(!(mapkey_kind & KindNoPointers) || d.indirectkey) {
1031 *objbufpos++ = (Obj){d.key_data, mapkey_size, mapkey_ti};
1034 obj = *(void**)d.key_data;
1035 if(!(arena_start <= obj && obj < arena_used))
1036 runtime_throw("scanblock: inconsistent hashmap");
1038 *ptrbufpos++ = (struct PtrTarget){*(void**)d.key_data, mapkey_ti};
1041 if(!(mapval_kind & KindNoPointers) || d.indirectval) {
1043 *objbufpos++ = (Obj){d.val_data, mapval_size, mapval_ti};
1046 obj = *(void**)d.val_data;
1047 if(!(arena_start <= obj && obj < arena_used))
1048 runtime_throw("scanblock: inconsistent hashmap");
1050 *ptrbufpos++ = (struct PtrTarget){*(void**)d.val_data, mapval_ti};
1062 obj = (void*)(stack_top.b + pc[1]);
1067 *objbufpos++ = (Obj){obj, size, objti};
1068 if(objbufpos == objbuf_end)
1069 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
1074 // Similar to GC_MAP_PTR
1075 chan = *(Hchan**)(stack_top.b + pc[1]);
1080 if(markonly(chan)) {
1081 chantype = (ChanType*)pc[2];
1082 if(!(chantype->elem->kind & KindNoPointers)) {
1093 // There are no heap pointers in struct Hchan,
1094 // so we can ignore the leading sizeof(Hchan) bytes.
1095 if(!(chantype->elem->kind & KindNoPointers)) {
1096 // Channel's buffer follows Hchan immediately in memory.
1097 // Size of buffer (cap(c)) is second int in the chan struct.
1098 chancap = ((uintgo*)chan)[1];
1100 // TODO(atom): split into two chunks so that only the
1101 // in-use part of the circular buffer is scanned.
1102 // (Channel routines zero the unused part, so the current
1103 // code does not lead to leaks, it's just a little inefficient.)
1104 *objbufpos++ = (Obj){(byte*)chan+runtime_Hchansize, chancap*chantype->elem->size,
1105 (uintptr)chantype->elem->gc | PRECISE | LOOP};
1106 if(objbufpos == objbuf_end)
1107 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
1117 runtime_throw("scanblock: invalid GC instruction");
1121 if((byte*)obj >= arena_start && (byte*)obj < arena_used) {
1122 *ptrbufpos++ = (struct PtrTarget){obj, objti};
1123 if(ptrbufpos == ptrbuf_end)
1124 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
1129 // Done scanning [b, b+n). Prepare for the next iteration of
1130 // the loop by setting b, n, ti to the parameters for the next block.
1133 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
1134 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
1142 // Emptied our buffer: refill.
1143 wbuf = getfull(wbuf);
1147 wp = wbuf->obj + wbuf->nobj;
1151 // Fetch b from the work buffer.
1162 // debug_scanblock is the debug copy of scanblock.
1163 // it is simpler, slower, single-threaded, recursive,
1164 // and uses bitSpecial as the mark bit.
1166 debug_scanblock(byte *b, uintptr n)
1170 uintptr size, *bitp, bits, shift, i, xbits, off;
1174 runtime_throw("debug_scanblock without DebugMark");
1177 runtime_printf("debug_scanblock %p %D\n", b, (int64)n);
1178 runtime_throw("debug_scanblock");
1181 // Align b to a word boundary.
1182 off = (uintptr)b & (PtrSize-1);
1190 for(i=0; i<(uintptr)n; i++) {
1193 // Words outside the arena cannot be pointers.
1194 if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
1197 // Round down to word boundary.
1198 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
1200 // Consult span table to find beginning.
1201 s = runtime_MHeap_LookupMaybe(runtime_mheap, obj);
1205 p = (byte*)((uintptr)s->start<<PageShift);
1207 if(s->sizeclass == 0) {
1210 if((byte*)obj >= (byte*)s->limit)
1212 int32 i = ((byte*)obj - p)/size;
1216 // Now that we know the object header, reload bits.
1217 off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
1218 bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
1219 shift = off % wordsPerBitmapWord;
1221 bits = xbits >> shift;
1223 // Now we have bits, bitp, and shift correct for
1224 // obj pointing at the base of the object.
1225 // If not allocated or already marked, done.
1226 if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
1228 *bitp |= bitSpecial<<shift;
1229 if(!(bits & bitMarked))
1230 runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
1232 // If object has no pointers, don't need to scan further.
1233 if((bits & bitNoPointers) != 0)
1236 debug_scanblock(obj, size);
1240 // Append obj to the work buffer.
1241 // _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer.
1243 enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj)
1250 runtime_printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti);
1252 // Align obj.b to a word boundary.
1253 off = (uintptr)obj.p & (PtrSize-1);
1255 obj.p += PtrSize - off;
1256 obj.n -= PtrSize - off;
1260 if(obj.p == nil || obj.n == 0)
1263 // Load work buffer state
1268 // If another proc wants a pointer, give it some.
1269 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
1271 wbuf = handoff(wbuf);
1273 wp = wbuf->obj + nobj;
1276 // If buffer is full, get a new one.
1277 if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
1280 wbuf = getempty(wbuf);
1289 // Save work buffer state
1296 markroot(ParFor *desc, uint32 i)
1306 enqueue(work.roots[i], &wbuf, &wp, &nobj);
1307 scanblock(wbuf, wp, nobj, false);
1310 // Get an empty work buffer off the work.empty list,
1311 // allocating new buffers as needed.
1313 getempty(Workbuf *b)
1316 runtime_lfstackpush(&work.full, &b->node);
1317 b = (Workbuf*)runtime_lfstackpop(&work.empty);
1319 // Need to allocate.
1320 runtime_lock(&work);
1321 if(work.nchunk < sizeof *b) {
1322 work.nchunk = 1<<20;
1323 work.chunk = runtime_SysAlloc(work.nchunk);
1324 if(work.chunk == nil)
1325 runtime_throw("runtime: cannot allocate memory");
1327 b = (Workbuf*)work.chunk;
1328 work.chunk += sizeof *b;
1329 work.nchunk -= sizeof *b;
1330 runtime_unlock(&work);
1337 putempty(Workbuf *b)
1340 runtime_xadd64(&gcstats.putempty, 1);
1342 runtime_lfstackpush(&work.empty, &b->node);
1345 // Get a full work buffer off the work.full list, or return nil.
1353 runtime_xadd64(&gcstats.getfull, 1);
1356 runtime_lfstackpush(&work.empty, &b->node);
1357 b = (Workbuf*)runtime_lfstackpop(&work.full);
1358 if(b != nil || work.nproc == 1)
1362 runtime_xadd(&work.nwait, +1);
1364 if(work.full != 0) {
1365 runtime_xadd(&work.nwait, -1);
1366 b = (Workbuf*)runtime_lfstackpop(&work.full);
1369 runtime_xadd(&work.nwait, +1);
1371 if(work.nwait == work.nproc)
1374 m->gcstats.nprocyield++;
1375 runtime_procyield(20);
1377 m->gcstats.nosyield++;
1380 m->gcstats.nsleep++;
1381 runtime_usleep(100);
1395 // Make new buffer with half of b's pointers.
1400 runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
1401 m->gcstats.nhandoff++;
1402 m->gcstats.nhandoffcnt += n;
1404 // Put b on full list - let first half of b get stolen.
1405 runtime_lfstackpush(&work.full, &b->node);
1415 if(work.nroot >= work.rootcap) {
1416 cap = PageSize/sizeof(Obj);
1417 if(cap < 2*work.rootcap)
1418 cap = 2*work.rootcap;
1419 new = (Obj*)runtime_SysAlloc(cap*sizeof(Obj));
1421 runtime_throw("runtime: cannot allocate memory");
1422 if(work.roots != nil) {
1423 runtime_memmove(new, work.roots, work.rootcap*sizeof(Obj));
1424 runtime_SysFree(work.roots, work.rootcap*sizeof(Obj));
1429 work.roots[work.nroot] = obj;
1434 addstackroots(G *gp)
1436 #ifdef USING_SPLIT_STACK
1444 if(gp == runtime_g()) {
1445 // Scanning our own stack.
1446 sp = __splitstack_find(nil, nil, &spsize, &next_segment,
1447 &next_sp, &initial_sp);
1448 } else if((mp = gp->m) != nil && mp->helpgc) {
1449 // gchelper's stack is in active use and has no interesting pointers.
1452 // Scanning another goroutine's stack.
1453 // The goroutine is usually asleep (the world is stopped).
1455 // The exception is that if the goroutine is about to enter or might
1456 // have just exited a system call, it may be executing code such
1457 // as schedlock and may have needed to start a new stack segment.
1458 // Use the stack segment and stack pointer at the time of
1459 // the system call instead, since that won't change underfoot.
1460 if(gp->gcstack != nil) {
1462 spsize = gp->gcstack_size;
1463 next_segment = gp->gcnext_segment;
1464 next_sp = gp->gcnext_sp;
1465 initial_sp = gp->gcinitial_sp;
1467 sp = __splitstack_find_context(&gp->stack_context[0],
1468 &spsize, &next_segment,
1469 &next_sp, &initial_sp);
1473 addroot((Obj){sp, spsize, 0});
1474 while((sp = __splitstack_find(next_segment, next_sp,
1475 &spsize, &next_segment,
1476 &next_sp, &initial_sp)) != nil)
1477 addroot((Obj){sp, spsize, 0});
1484 if(gp == runtime_g()) {
1485 // Scanning our own stack.
1486 bottom = (byte*)&gp;
1487 } else if((mp = gp->m) != nil && mp->helpgc) {
1488 // gchelper's stack is in active use and has no interesting pointers.
1491 // Scanning another goroutine's stack.
1492 // The goroutine is usually asleep (the world is stopped).
1493 bottom = (byte*)gp->gcnext_sp;
1497 top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
1499 addroot((Obj){bottom, top - bottom, 0});
1501 addroot((Obj){top, bottom - top, 0});
1506 addfinroots(void *v)
1512 if(!runtime_mlookup(v, (byte**)&base, &size, nil) || !runtime_blockspecial(base))
1513 runtime_throw("mark - finalizer inconsistency");
1515 // do not mark the finalizer block itself. just mark the things it points at.
1516 addroot((Obj){base, size, 0});
1519 static struct root_list* roots;
1522 __go_register_gc_roots (struct root_list* r)
1524 // FIXME: This needs locking if multiple goroutines can call
1525 // dlopen simultaneously.
1533 struct root_list *pl;
1536 MSpan *s, **allspans;
1542 for(pl = roots; pl != nil; pl = pl->next) {
1543 struct root* pr = &pl->roots[0];
1545 void *decl = pr->decl;
1548 addroot((Obj){decl, pr->size, 0});
1553 addroot((Obj){(byte*)&runtime_m0, sizeof runtime_m0, 0});
1554 addroot((Obj){(byte*)&runtime_g0, sizeof runtime_g0, 0});
1555 addroot((Obj){(byte*)&runtime_allg, sizeof runtime_allg, 0});
1556 addroot((Obj){(byte*)&runtime_allm, sizeof runtime_allm, 0});
1557 addroot((Obj){(byte*)&runtime_allp, sizeof runtime_allp, 0});
1558 runtime_proc_scan(addroot);
1559 runtime_MProf_Mark(addroot);
1560 runtime_time_scan(addroot);
1563 allspans = runtime_mheap->allspans;
1564 for(spanidx=0; spanidx<runtime_mheap->nspan; spanidx++) {
1565 s = allspans[spanidx];
1566 if(s->state == MSpanInUse) {
1567 // The garbage collector ignores type pointers stored in MSpan.types:
1568 // - Compiler-generated types are stored outside of heap.
1569 // - The reflect package has runtime-generated types cached in its data structures.
1570 // The garbage collector relies on finding the references via that cache.
1571 switch(s->types.compression) {
1577 markonly((byte*)s->types.data);
1584 for(gp=runtime_allg; gp!=nil; gp=gp->alllink) {
1587 runtime_printf("unexpected G.status %d\n", gp->status);
1588 runtime_throw("mark - bad status");
1592 if(gp != runtime_g())
1593 runtime_throw("mark - world not stopped");
1604 runtime_walkfintab(addfinroots, addroot);
1606 for(fb=allfin; fb; fb=fb->alllink)
1607 addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
1609 addroot((Obj){(byte*)&work, sizeof work, 0});
1613 handlespecial(byte *p, uintptr size)
1616 const struct __go_func_type *ft;
1620 if(!runtime_getfinalizer(p, true, &fn, &ft)) {
1621 runtime_setblockspecial(p, false);
1622 runtime_MProf_Free(p, size);
1626 runtime_lock(&finlock);
1627 if(finq == nil || finq->cnt == finq->cap) {
1629 finc = runtime_SysAlloc(PageSize);
1631 runtime_throw("runtime: cannot allocate memory");
1632 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
1633 finc->alllink = allfin;
1641 f = &finq->fin[finq->cnt];
1646 runtime_unlock(&finlock);
1650 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
1651 // It clears the mark bits in preparation for the next GC round.
1653 sweepspan(ParFor *desc, uint32 idx)
1656 int32 cl, n, npages;
1665 uintptr type_data_inc;
1671 s = runtime_mheap->allspans[idx];
1672 if(s->state != MSpanInUse)
1674 arena_start = runtime_mheap->arena_start;
1675 p = (byte*)(s->start << PageShift);
1681 // Chunk full of small blocks.
1682 npages = runtime_class_to_allocnpages[cl];
1683 n = (npages << PageShift) / size;
1689 type_data = (byte*)s->types.data;
1690 type_data_inc = sizeof(uintptr);
1691 compression = s->types.compression;
1692 switch(compression) {
1694 type_data += 8*sizeof(uintptr);
1699 // Sweep through n objects of given size starting at p.
1700 // This thread owns the span now, so it can manipulate
1701 // the block bitmap without atomic operations.
1702 for(; n > 0; n--, p += size, type_data+=type_data_inc) {
1703 uintptr off, *bitp, shift, bits;
1705 off = (uintptr*)p - (uintptr*)arena_start;
1706 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
1707 shift = off % wordsPerBitmapWord;
1708 bits = *bitp>>shift;
1710 if((bits & bitAllocated) == 0)
1713 if((bits & bitMarked) != 0) {
1715 if(!(bits & bitSpecial))
1716 runtime_printf("found spurious mark on %p\n", p);
1717 *bitp &= ~(bitSpecial<<shift);
1719 *bitp &= ~(bitMarked<<shift);
1723 // Special means it has a finalizer or is being profiled.
1724 // In DebugMark mode, the bit has been coopted so
1725 // we have to assume all blocks are special.
1726 if(DebugMark || (bits & bitSpecial) != 0) {
1727 if(handlespecial(p, size))
1731 // Mark freed; restore block boundary bit.
1732 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1736 runtime_unmarkspan(p, 1<<PageShift);
1737 *(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll; // needs zeroing
1738 runtime_MHeap_Free(runtime_mheap, s, 1);
1739 c->local_alloc -= size;
1742 // Free small object.
1743 switch(compression) {
1745 *(uintptr*)type_data = 0;
1748 *(byte*)type_data = 0;
1751 if(size > sizeof(uintptr))
1752 ((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed"
1754 end->next = (MLink*)p;
1761 c->local_by_size[cl].nfree += nfree;
1762 c->local_alloc -= size * nfree;
1763 c->local_nfree += nfree;
1764 c->local_cachealloc -= nfree * size;
1765 c->local_objects -= nfree;
1766 runtime_MCentral_FreeSpan(&runtime_mheap->central[cl], s, nfree, head.next, end);
1771 dumpspan(uint32 idx)
1773 int32 sizeclass, n, npages, i, column;
1778 bool allocated, special;
1780 s = runtime_mheap->allspans[idx];
1781 if(s->state != MSpanInUse)
1783 arena_start = runtime_mheap->arena_start;
1784 p = (byte*)(s->start << PageShift);
1785 sizeclass = s->sizeclass;
1787 if(sizeclass == 0) {
1790 npages = runtime_class_to_allocnpages[sizeclass];
1791 n = (npages << PageShift) / size;
1794 runtime_printf("%p .. %p:\n", p, p+n*size);
1796 for(; n>0; n--, p+=size) {
1797 uintptr off, *bitp, shift, bits;
1799 off = (uintptr*)p - (uintptr*)arena_start;
1800 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
1801 shift = off % wordsPerBitmapWord;
1802 bits = *bitp>>shift;
1804 allocated = ((bits & bitAllocated) != 0);
1805 special = ((bits & bitSpecial) != 0);
1807 for(i=0; (uint32)i<size; i+=sizeof(void*)) {
1809 runtime_printf("\t");
1812 runtime_printf(allocated ? "(" : "[");
1813 runtime_printf(special ? "@" : "");
1814 runtime_printf("%p: ", p+i);
1816 runtime_printf(" ");
1819 runtime_printf("%p", *(void**)(p+i));
1821 if(i+sizeof(void*) >= size) {
1822 runtime_printf(allocated ? ") " : "] ");
1827 runtime_printf("\n");
1832 runtime_printf("\n");
1835 // A debugging function to dump the contents of memory
1837 runtime_memorydump(void)
1841 for(spanidx=0; spanidx<runtime_mheap->nspan; spanidx++) {
1847 runtime_gchelper(void)
1851 // parallel mark for over gc roots
1852 runtime_parfordo(work.markfor);
1854 // help other threads scan secondary blocks
1855 scanblock(nil, nil, 0, true);
1858 // wait while the main thread executes mark(debug_scanblock)
1859 while(runtime_atomicload(&work.debugmarkdone) == 0)
1863 runtime_parfordo(work.sweepfor);
1864 bufferList[runtime_m()->helpgc].busy = 0;
1865 if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
1866 runtime_notewakeup(&work.alldone);
1869 #define GcpercentUnknown (-2)
1871 // Initialized from $GOGC. GOGC=off means no gc.
1873 // Next gc is after we've allocated an extra amount of
1874 // memory proportional to the amount already in use.
1875 // If gcpercent=100 and we're using 4M, we'll gc again
1876 // when we get to 8M. This keeps the gc cost in linear
1877 // proportion to the allocation cost. Adjusting gcpercent
1878 // just changes the linear constant (and also the amount of
1879 // extra memory used).
1880 static int32 gcpercent = GcpercentUnknown;
1883 cachestats(GCStats *stats)
1889 uint64 stacks_inuse;
1893 runtime_memclr((byte*)stats, sizeof(*stats));
1895 for(mp=runtime_allm; mp; mp=mp->alllink) {
1896 //stacks_inuse += mp->stackinuse*FixedStack;
1898 src = (uint64*)&mp->gcstats;
1899 dst = (uint64*)stats;
1900 for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
1902 runtime_memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
1905 for(pp=runtime_allp; (p=*pp) != nil; pp++) {
1909 runtime_purgecachedstats(c);
1910 for(i=0; i<nelem(c->local_by_size); i++) {
1911 mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
1912 c->local_by_size[i].nmalloc = 0;
1913 mstats.by_size[i].nfree += c->local_by_size[i].nfree;
1914 c->local_by_size[i].nfree = 0;
1917 mstats.stacks_inuse = stacks_inuse;
1920 // Structure of arguments passed to function gc().
1921 // This allows the arguments to be passed via reflect_call.
1927 static void gc(struct gc_args *args);
1934 p = runtime_getenv("GOGC");
1935 if(p == nil || p[0] == '\0')
1937 if(runtime_strcmp((const char *)p, "off") == 0)
1939 return runtime_atoi(p);
1943 runtime_gc(int32 force)
1947 struct gc_args a, *ap;
1949 // The atomic operations are not atomic if the uint64s
1950 // are not aligned on uint64 boundaries. This has been
1951 // a problem in the past.
1952 if((((uintptr)&work.empty) & 7) != 0)
1953 runtime_throw("runtime: gc work buffer is misaligned");
1954 if((((uintptr)&work.full) & 7) != 0)
1955 runtime_throw("runtime: gc work buffer is misaligned");
1957 // Make sure all registers are saved on stack so that
1958 // scanstack sees them.
1959 __builtin_unwind_init();
1961 // The gc is turned off (via enablegc) until
1962 // the bootstrap has completed.
1963 // Also, malloc gets called in the guts
1964 // of a number of libraries that might be
1965 // holding locks. To avoid priority inversion
1966 // problems, don't bother trying to run gc
1967 // while holding a lock. The next mallocgc
1968 // without a lock will do the gc instead.
1970 if(!mstats.enablegc || m->locks > 0 || runtime_panicking)
1973 if(gcpercent == GcpercentUnknown) { // first time through
1974 gcpercent = readgogc();
1976 p = runtime_getenv("GOGCTRACE");
1978 gctrace = runtime_atoi(p);
1983 // Run gc on a bigger stack to eliminate
1984 // a potentially large number of calls to runtime_morestack.
1985 // But not when using gccgo.
1990 if(gctrace > 1 && !force) {
1997 gc(struct gc_args *args)
2000 int64 t0, t1, t2, t3, t4;
2001 uint64 heap0, heap1, obj0, obj1, ninstr;
2007 runtime_semacquire(&runtime_worldsema);
2008 if(!args->force && mstats.heap_alloc < mstats.next_gc) {
2009 runtime_semrelease(&runtime_worldsema);
2015 t0 = runtime_nanotime();
2018 runtime_stoptheworld();
2021 runtime_memclr((byte*)&gcstats, sizeof(gcstats));
2023 for(mp=runtime_allm; mp; mp=mp->alllink)
2024 runtime_settype_flush(mp, false);
2030 heap0 = mstats.heap_alloc;
2031 obj0 = mstats.nmalloc - mstats.nfree;
2034 m->locks++; // disable gc during mallocs in parforalloc
2035 if(work.markfor == nil)
2036 work.markfor = runtime_parforalloc(MaxGcproc);
2037 if(work.sweepfor == nil)
2038 work.sweepfor = runtime_parforalloc(MaxGcproc);
2041 if(itabtype == nil) {
2042 // get C pointer to the Go type "itab"
2043 // runtime_gc_itab_ptr(&eface);
2044 // itabtype = ((PtrType*)eface.type)->elem;
2049 work.debugmarkdone = 0;
2050 work.nproc = runtime_gcprocs();
2052 runtime_parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot);
2053 runtime_parforsetup(work.sweepfor, work.nproc, runtime_mheap->nspan, nil, true, sweepspan);
2054 if(work.nproc > 1) {
2055 runtime_noteclear(&work.alldone);
2056 runtime_helpgc(work.nproc);
2059 t1 = runtime_nanotime();
2062 runtime_parfordo(work.markfor);
2063 scanblock(nil, nil, 0, true);
2066 for(i=0; i<work.nroot; i++)
2067 debug_scanblock(work.roots[i].p, work.roots[i].n);
2068 runtime_atomicstore(&work.debugmarkdone, 1);
2070 t2 = runtime_nanotime();
2072 runtime_parfordo(work.sweepfor);
2073 bufferList[m->helpgc].busy = 0;
2074 t3 = runtime_nanotime();
2077 runtime_notesleep(&work.alldone);
2081 stats.nprocyield += work.sweepfor->nprocyield;
2082 stats.nosyield += work.sweepfor->nosyield;
2083 stats.nsleep += work.sweepfor->nsleep;
2085 mstats.next_gc = mstats.heap_alloc+(mstats.heap_alloc-runtime_stacks_sys)*gcpercent/100;
2089 m->locks++; // disable gc during the mallocs in newproc
2090 // kick off or wake up goroutine to run queued finalizers
2092 fing = __go_go(runfinq, nil);
2095 runtime_ready(fing);
2100 heap1 = mstats.heap_alloc;
2101 obj1 = mstats.nmalloc - mstats.nfree;
2103 t4 = runtime_nanotime();
2104 mstats.last_gc = t4;
2105 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
2106 mstats.pause_total_ns += t4 - t0;
2109 runtime_printf("pause %D\n", t4-t0);
2112 runtime_printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
2113 " %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
2114 mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000,
2115 heap0>>20, heap1>>20, obj0, obj1,
2116 mstats.nmalloc, mstats.nfree,
2117 stats.nhandoff, stats.nhandoffcnt,
2118 work.sweepfor->nsteal, work.sweepfor->nstealcnt,
2119 stats.nprocyield, stats.nosyield, stats.nsleep);
2121 runtime_printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n",
2122 gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup);
2123 if(gcstats.ptr.cnt != 0)
2124 runtime_printf("avg ptrbufsize: %D (%D/%D)\n",
2125 gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt);
2126 if(gcstats.obj.cnt != 0)
2127 runtime_printf("avg nobj: %D (%D/%D)\n",
2128 gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt);
2129 runtime_printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes);
2131 runtime_printf("instruction counts:\n");
2133 for(i=0; i<nelem(gcstats.instr); i++) {
2134 runtime_printf("\t%d:\t%D\n", i, gcstats.instr[i]);
2135 ninstr += gcstats.instr[i];
2137 runtime_printf("\ttotal:\t%D\n", ninstr);
2139 runtime_printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull);
2144 runtime_semrelease(&runtime_worldsema);
2145 runtime_starttheworld();
2147 // give the queued finalizers, if any, a chance to run
2152 void runtime_ReadMemStats(MStats *)
2153 __asm__ (GOSYM_PREFIX "runtime.ReadMemStats");
2156 runtime_ReadMemStats(MStats *stats)
2160 // Have to acquire worldsema to stop the world,
2161 // because stoptheworld can only be used by
2162 // one goroutine at a time, and there might be
2163 // a pending garbage collection already calling it.
2164 runtime_semacquire(&runtime_worldsema);
2167 runtime_stoptheworld();
2171 runtime_semrelease(&runtime_worldsema);
2172 runtime_starttheworld();
2175 void runtime_debug_readGCStats(Slice*)
2176 __asm__("runtime_debug.readGCStats");
2179 runtime_debug_readGCStats(Slice *pauses)
2184 // Calling code in runtime/debug should make the slice large enough.
2185 if((size_t)pauses->cap < nelem(mstats.pause_ns)+3)
2186 runtime_throw("runtime: short slice passed to readGCStats");
2188 // Pass back: pauses, last gc (absolute time), number of gc, total pause ns.
2189 p = (uint64*)pauses->array;
2190 runtime_lock(runtime_mheap);
2192 if(n > nelem(mstats.pause_ns))
2193 n = nelem(mstats.pause_ns);
2195 // The pause buffer is circular. The most recent pause is at
2196 // pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
2197 // from there to go back farther in time. We deliver the times
2198 // most recent first (in p[0]).
2200 p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)];
2202 p[n] = mstats.last_gc;
2203 p[n+1] = mstats.numgc;
2204 p[n+2] = mstats.pause_total_ns;
2205 runtime_unlock(runtime_mheap);
2206 pauses->__count = n+3;
2209 intgo runtime_debug_setGCPercent(intgo)
2210 __asm__("runtime_debug.setGCPercent");
2213 runtime_debug_setGCPercent(intgo in)
2217 runtime_lock(runtime_mheap);
2218 if(gcpercent == GcpercentUnknown)
2219 gcpercent = readgogc();
2224 runtime_unlock(runtime_mheap);
2234 if(m->helpgc < 0 || m->helpgc >= MaxGcproc)
2235 runtime_throw("gchelperstart: bad m->helpgc");
2236 if(runtime_xchg(&bufferList[m->helpgc].busy, 1))
2237 runtime_throw("gchelperstart: already busy");
2241 runfinq(void* dummy __attribute__ ((unused)))
2244 FinBlock *fb, *next;
2248 // There's no need for a lock in this section
2249 // because it only conflicts with the garbage
2250 // collector, and the garbage collector only
2251 // runs when everyone else is stopped, and
2252 // runfinq only stops at the gosched() or
2253 // during the calls in the for loop.
2258 runtime_park(nil, nil, "finalizer wait");
2262 runtime_racefingo();
2263 for(; fb; fb=next) {
2265 for(i=0; i<(uint32)fb->cnt; i++) {
2270 reflect_call(f->ft, f->fn, 0, 0, ¶m, nil);
2278 runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible
2282 // mark the block at v of size n as allocated.
2283 // If noptr is true, mark it as having no pointers.
2285 runtime_markallocated(void *v, uintptr n, bool noptr)
2287 uintptr *b, obits, bits, off, shift;
2290 runtime_printf("markallocated %p+%p\n", v, n);
2292 if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2293 runtime_throw("markallocated: bad pointer");
2295 off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset
2296 b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2297 shift = off % wordsPerBitmapWord;
2301 bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
2303 bits |= bitNoPointers<<shift;
2304 if(runtime_singleproc) {
2308 // more than one goroutine is potentially running: use atomic op
2309 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
2315 // mark the block at v of size n as freed.
2317 runtime_markfreed(void *v, uintptr n)
2319 uintptr *b, obits, bits, off, shift;
2322 runtime_printf("markallocated %p+%p\n", v, n);
2324 if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2325 runtime_throw("markallocated: bad pointer");
2327 off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset
2328 b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2329 shift = off % wordsPerBitmapWord;
2333 bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
2334 if(runtime_singleproc) {
2338 // more than one goroutine is potentially running: use atomic op
2339 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
2345 // check that the block at v of size n is marked freed.
2347 runtime_checkfreed(void *v, uintptr n)
2349 uintptr *b, bits, off, shift;
2351 if(!runtime_checking)
2354 if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2355 return; // not allocated, so okay
2357 off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset
2358 b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2359 shift = off % wordsPerBitmapWord;
2362 if((bits & bitAllocated) != 0) {
2363 runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
2364 v, n, off, bits & bitMask);
2365 runtime_throw("checkfreed: not freed");
2369 // mark the span of memory at v as having n blocks of the given size.
2370 // if leftover is true, there is left over space at the end of the span.
2372 runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
2374 uintptr *b, off, shift;
2377 if((byte*)v+size*n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2378 runtime_throw("markspan: bad pointer");
2381 if(leftover) // mark a boundary just past end of last block too
2383 for(; n-- > 0; p += size) {
2384 // Okay to use non-atomic ops here, because we control
2385 // the entire span, and each bitmap word has bits for only
2386 // one span, so no other goroutines are changing these
2388 off = (uintptr*)p - (uintptr*)runtime_mheap->arena_start; // word offset
2389 b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2390 shift = off % wordsPerBitmapWord;
2391 *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
2395 // unmark the span of memory at v of length n bytes.
2397 runtime_unmarkspan(void *v, uintptr n)
2399 uintptr *p, *b, off;
2401 if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2402 runtime_throw("markspan: bad pointer");
2405 off = p - (uintptr*)runtime_mheap->arena_start; // word offset
2406 if(off % wordsPerBitmapWord != 0)
2407 runtime_throw("markspan: unaligned pointer");
2408 b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2410 if(n%wordsPerBitmapWord != 0)
2411 runtime_throw("unmarkspan: unaligned length");
2412 // Okay to use non-atomic ops here, because we control
2413 // the entire span, and each bitmap word has bits for only
2414 // one span, so no other goroutines are changing these
2416 n /= wordsPerBitmapWord;
2422 runtime_blockspecial(void *v)
2424 uintptr *b, off, shift;
2429 off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start;
2430 b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2431 shift = off % wordsPerBitmapWord;
2433 return (*b & (bitSpecial<<shift)) != 0;
2437 runtime_setblockspecial(void *v, bool s)
2439 uintptr *b, off, shift, bits, obits;
2444 off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start;
2445 b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2446 shift = off % wordsPerBitmapWord;
2451 bits = obits | (bitSpecial<<shift);
2453 bits = obits & ~(bitSpecial<<shift);
2454 if(runtime_singleproc) {
2458 // more than one goroutine is potentially running: use atomic op
2459 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
2466 runtime_MHeap_MapBits(MHeap *h)
2470 // Caller has added extra mappings to the arena.
2471 // Add extra mappings of bitmap words as needed.
2472 // We allocate extra bitmap pieces in chunks of bitmapChunk.
2478 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
2479 n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
2480 if(h->bitmap_mapped >= n)
2483 page_size = getpagesize();
2484 n = (n+page_size-1) & ~(page_size-1);
2486 runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
2487 h->bitmap_mapped = n;