Importing Upstream version 4.8.2
[platform/upstream/gcc48.git] / libgo / runtime / mprof.goc
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Malloc profiling.
6 // Patterned after tcmalloc's algorithms; shorter code.
7
8 package runtime
9 #include "runtime.h"
10 #include "arch.h"
11 #include "malloc.h"
12 #include "defs.h"
13 #include "go-type.h"
14 #include "go-string.h"
15
16 // NOTE(rsc): Everything here could use cas if contention became an issue.
17 static Lock proflock, alloclock;
18
19 // All memory allocations are local and do not escape outside of the profiler.
20 // The profiler is forbidden from referring to garbage-collected memory.
21
22 static byte *pool;        // memory allocation pool
23 static uintptr poolfree;  // number of bytes left in the pool
24 enum {
25         Chunk = 32*PageSize,  // initial size of the pool
26 };
27
28 // Memory allocation local to this file.
29 // There is no way to return the allocated memory back to the OS.
30 static void*
31 allocate(uintptr size)
32 {
33         void *v;
34
35         if(size == 0)
36                 return nil;
37
38         if(size >= Chunk/2)
39                 return runtime_SysAlloc(size);
40
41         runtime_lock(&alloclock);
42         if(size > poolfree) {
43                 pool = runtime_SysAlloc(Chunk);
44                 if(pool == nil)
45                         runtime_throw("runtime: cannot allocate memory");
46                 poolfree = Chunk;
47         }
48         v = pool;
49         pool += size;
50         poolfree -= size;
51         runtime_unlock(&alloclock);
52         return v;
53 }
54
55 enum { MProf, BProf };  // profile types
56
57 // Per-call-stack profiling information.
58 // Lookup by hashing call stack into a linked-list hash table.
59 typedef struct Bucket Bucket;
60 struct Bucket
61 {
62         Bucket  *next;  // next in hash list
63         Bucket  *allnext;       // next in list of all mbuckets/bbuckets
64         int32   typ;
65         // Generally unions can break precise GC,
66         // this one is fine because it does not contain pointers.
67         union
68         {
69                 struct  // typ == MProf
70                 {
71                         uintptr allocs;
72                         uintptr frees;
73                         uintptr alloc_bytes;
74                         uintptr free_bytes;
75                         uintptr recent_allocs;  // since last gc
76                         uintptr recent_frees;
77                         uintptr recent_alloc_bytes;
78                         uintptr recent_free_bytes;
79                 };
80                 struct  // typ == BProf
81                 {
82                         int64   count;
83                         int64   cycles;
84                 };
85         };
86         uintptr hash;
87         uintptr nstk;
88         Location stk[1];
89 };
90 enum {
91         BuckHashSize = 179999,
92 };
93 static Bucket **buckhash;
94 static Bucket *mbuckets;  // memory profile buckets
95 static Bucket *bbuckets;  // blocking profile buckets
96 static uintptr bucketmem;
97
98 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
99 static Bucket*
100 stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc)
101 {
102         int32 i, j;
103         uintptr h;
104         Bucket *b;
105
106         if(buckhash == nil) {
107                 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]);
108                 if(buckhash == nil)
109                         runtime_throw("runtime: cannot allocate memory");
110                 mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
111         }
112
113         // Hash stack.
114         h = 0;
115         for(i=0; i<nstk; i++) {
116                 h += stk[i].pc;
117                 h += h<<10;
118                 h ^= h>>6;
119         }
120         h += h<<3;
121         h ^= h>>11;
122
123         i = h%BuckHashSize;
124         for(b = buckhash[i]; b; b=b->next) {
125                 if(b->typ == typ && b->hash == h && b->nstk == (uintptr)nstk) {
126                         for(j = 0; j < nstk; j++) {
127                                 if(b->stk[j].pc != stk[j].pc ||
128                                    b->stk[j].lineno != stk[j].lineno ||
129                                    !__go_strings_equal(b->stk[j].filename, stk[j].filename))
130                                         break;
131                         }
132                         if (j == nstk)
133                                 return b;
134                 }
135         }
136
137         if(!alloc)
138                 return nil;
139
140         b = allocate(sizeof *b + nstk*sizeof stk[0]);
141         if(b == nil)
142                 runtime_throw("runtime: cannot allocate memory");
143         bucketmem += sizeof *b + nstk*sizeof stk[0];
144         runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
145         b->typ = typ;
146         b->hash = h;
147         b->nstk = nstk;
148         b->next = buckhash[i];
149         buckhash[i] = b;
150         if(typ == MProf) {
151                 b->allnext = mbuckets;
152                 mbuckets = b;
153         } else {
154                 b->allnext = bbuckets;
155                 bbuckets = b;
156         }
157         return b;
158 }
159
160 static void
161 MProf_GC(void)
162 {
163         Bucket *b;
164
165         for(b=mbuckets; b; b=b->allnext) {
166                 b->allocs += b->recent_allocs;
167                 b->frees += b->recent_frees;
168                 b->alloc_bytes += b->recent_alloc_bytes;
169                 b->free_bytes += b->recent_free_bytes;
170                 b->recent_allocs = 0;
171                 b->recent_frees = 0;
172                 b->recent_alloc_bytes = 0;
173                 b->recent_free_bytes = 0;
174         }
175 }
176
177 // Record that a gc just happened: all the 'recent' statistics are now real.
178 void
179 runtime_MProf_GC(void)
180 {
181         runtime_lock(&proflock);
182         MProf_GC();
183         runtime_unlock(&proflock);
184 }
185
186 // Map from pointer to Bucket* that allocated it.
187 // Three levels:
188 //      Linked-list hash table for top N-AddrHashShift bits.
189 //      Array index for next AddrDenseBits bits.
190 //      Linked list for next AddrHashShift-AddrDenseBits bits.
191 // This is more efficient than using a general map,
192 // because of the typical clustering of the pointer keys.
193
194 typedef struct AddrHash AddrHash;
195 typedef struct AddrEntry AddrEntry;
196
197 enum {
198         AddrHashBits = 12,      // good for 4GB of used address space
199         AddrHashShift = 20,     // each AddrHash knows about 1MB of address space
200         AddrDenseBits = 8,      // good for a profiling rate of 4096 bytes
201 };
202
203 struct AddrHash
204 {
205         AddrHash *next; // next in top-level hash table linked list
206         uintptr addr;   // addr>>20
207         AddrEntry *dense[1<<AddrDenseBits];
208 };
209
210 struct AddrEntry
211 {
212         AddrEntry *next;        // next in bottom-level linked list
213         uint32 addr;
214         Bucket *b;
215 };
216
217 static AddrHash **addrhash;     // points to (AddrHash*)[1<<AddrHashBits]
218 static AddrEntry *addrfree;
219 static uintptr addrmem;
220
221 // Multiplicative hash function:
222 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
223 // This is a good multiplier as suggested in CLR, Knuth.  The hash
224 // value is taken to be the top AddrHashBits bits of the bottom 32 bits
225 // of the multiplied value.
226 enum {
227         HashMultiplier = 2654435769U
228 };
229
230 // Set the bucket associated with addr to b.
231 static void
232 setaddrbucket(uintptr addr, Bucket *b)
233 {
234         int32 i;
235         uint32 h;
236         AddrHash *ah;
237         AddrEntry *e;
238
239         h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
240         for(ah=addrhash[h]; ah; ah=ah->next)
241                 if(ah->addr == (addr>>AddrHashShift))
242                         goto found;
243
244         ah = allocate(sizeof *ah);
245         addrmem += sizeof *ah;
246         ah->next = addrhash[h];
247         ah->addr = addr>>AddrHashShift;
248         addrhash[h] = ah;
249
250 found:
251         if((e = addrfree) == nil) {
252                 e = allocate(64*sizeof *e);
253                 addrmem += 64*sizeof *e;
254                 for(i=0; i+1<64; i++)
255                         e[i].next = &e[i+1];
256                 e[63].next = nil;
257         }
258         addrfree = e->next;
259         e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
260         e->b = b;
261         h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
262         e->next = ah->dense[h];
263         ah->dense[h] = e;
264 }
265
266 // Get the bucket associated with addr and clear the association.
267 static Bucket*
268 getaddrbucket(uintptr addr)
269 {
270         uint32 h;
271         AddrHash *ah;
272         AddrEntry *e, **l;
273         Bucket *b;
274
275         h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
276         for(ah=addrhash[h]; ah; ah=ah->next)
277                 if(ah->addr == (addr>>AddrHashShift))
278                         goto found;
279         return nil;
280
281 found:
282         h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
283         for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
284                 if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
285                         *l = e->next;
286                         b = e->b;
287                         e->next = addrfree;
288                         addrfree = e;
289                         return b;
290                 }
291         }
292         return nil;
293 }
294
295 // Called by malloc to record a profiled block.
296 void
297 runtime_MProf_Malloc(void *p, uintptr size)
298 {
299         M *m;
300         int32 nstk;
301         Location stk[32];
302         Bucket *b;
303
304         m = runtime_m();
305         if(m->nomemprof > 0)
306                 return;
307
308         m->nomemprof++;
309         nstk = runtime_callers(1, stk, 32);
310         runtime_lock(&proflock);
311         b = stkbucket(MProf, stk, nstk, true);
312         b->recent_allocs++;
313         b->recent_alloc_bytes += size;
314         setaddrbucket((uintptr)p, b);
315         runtime_unlock(&proflock);
316         m = runtime_m();
317         m->nomemprof--;
318 }
319
320 // Called when freeing a profiled block.
321 void
322 runtime_MProf_Free(void *p, uintptr size)
323 {
324         M *m;
325         Bucket *b;
326
327         m = runtime_m();
328         if(m->nomemprof > 0)
329                 return;
330
331         m->nomemprof++;
332         runtime_lock(&proflock);
333         b = getaddrbucket((uintptr)p);
334         if(b != nil) {
335                 b->recent_frees++;
336                 b->recent_free_bytes += size;
337         }
338         runtime_unlock(&proflock);
339         m = runtime_m();
340         m->nomemprof--;
341 }
342
343 int64 runtime_blockprofilerate;  // in CPU ticks
344
345 void runtime_SetBlockProfileRate(intgo) __asm__ (GOSYM_PREFIX "runtime.SetBlockProfileRate");
346
347 void
348 runtime_SetBlockProfileRate(intgo rate)
349 {
350         runtime_atomicstore64((uint64*)&runtime_blockprofilerate, rate * runtime_tickspersecond() / (1000*1000*1000));
351 }
352
353 void
354 runtime_blockevent(int64 cycles, int32 skip)
355 {
356         int32 nstk;
357         int64 rate;
358         Location stk[32];
359         Bucket *b;
360
361         if(cycles <= 0)
362                 return;
363         rate = runtime_atomicload64((uint64*)&runtime_blockprofilerate);
364         if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles))
365                 return;
366
367         nstk = runtime_callers(skip, stk, 32);
368         runtime_lock(&proflock);
369         b = stkbucket(BProf, stk, nstk, true);
370         b->count++;
371         b->cycles += cycles;
372         runtime_unlock(&proflock);
373 }
374
375 // Go interface to profile data.  (Declared in debug.go)
376
377 // Must match MemProfileRecord in debug.go.
378 typedef struct Record Record;
379 struct Record {
380         int64 alloc_bytes, free_bytes;
381         int64 alloc_objects, free_objects;
382         uintptr stk[32];
383 };
384
385 // Write b's data to r.
386 static void
387 record(Record *r, Bucket *b)
388 {
389         uint32 i;
390
391         r->alloc_bytes = b->alloc_bytes;
392         r->free_bytes = b->free_bytes;
393         r->alloc_objects = b->allocs;
394         r->free_objects = b->frees;
395         for(i=0; i<b->nstk && i<nelem(r->stk); i++)
396                 r->stk[i] = b->stk[i].pc;
397         for(; i<nelem(r->stk); i++)
398                 r->stk[i] = 0;
399 }
400
401 func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
402         Bucket *b;
403         Record *r;
404         bool clear;
405
406         runtime_lock(&proflock);
407         n = 0;
408         clear = true;
409         for(b=mbuckets; b; b=b->allnext) {
410                 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
411                         n++;
412                 if(b->allocs != 0 || b->frees != 0)
413                         clear = false;
414         }
415         if(clear) {
416                 // Absolutely no data, suggesting that a garbage collection
417                 // has not yet happened. In order to allow profiling when
418                 // garbage collection is disabled from the beginning of execution,
419                 // accumulate stats as if a GC just happened, and recount buckets.
420                 MProf_GC();
421                 n = 0;
422                 for(b=mbuckets; b; b=b->allnext)
423                         if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
424                                 n++;
425         }
426         ok = false;
427         if(n <= p.__count) {
428                 ok = true;
429                 r = (Record*)p.__values;
430                 for(b=mbuckets; b; b=b->allnext)
431                         if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
432                                 record(r++, b);
433         }
434         runtime_unlock(&proflock);
435 }
436
437 void
438 runtime_MProf_Mark(void (*addroot)(Obj))
439 {
440         // buckhash is not allocated via mallocgc.
441         addroot((Obj){(byte*)&mbuckets, sizeof mbuckets, 0});
442         addroot((Obj){(byte*)&bbuckets, sizeof bbuckets, 0});
443         addroot((Obj){(byte*)&addrhash, sizeof addrhash, 0});
444         addroot((Obj){(byte*)&addrfree, sizeof addrfree, 0});
445 }
446
447 // Must match BlockProfileRecord in debug.go.
448 typedef struct BRecord BRecord;
449 struct BRecord {
450         int64 count;
451         int64 cycles;
452         uintptr stk[32];
453 };
454
455 func BlockProfile(p Slice) (n int, ok bool) {
456         Bucket *b;
457         BRecord *r;
458         int32 i;
459
460         runtime_lock(&proflock);
461         n = 0;
462         for(b=bbuckets; b; b=b->allnext)
463                 n++;
464         ok = false;
465         if(n <= p.__count) {
466                 ok = true;
467                 r = (BRecord*)p.__values;
468                 for(b=bbuckets; b; b=b->allnext, r++) {
469                         r->count = b->count;
470                         r->cycles = b->cycles;
471                         for(i=0; (uintptr)i<b->nstk && (uintptr)i<nelem(r->stk); i++)
472                                 r->stk[i] = b->stk[i].pc;
473                         for(; (uintptr)i<nelem(r->stk); i++)
474                                 r->stk[i] = 0;                  
475                 }
476         }
477         runtime_unlock(&proflock);
478 }
479
480 // Must match StackRecord in debug.go.
481 typedef struct TRecord TRecord;
482 struct TRecord {
483         uintptr stk[32];
484 };
485
486 func ThreadCreateProfile(p Slice) (n int, ok bool) {
487         TRecord *r;
488         M *first, *mp;
489         int32 i;
490         
491         first = runtime_atomicloadp(&runtime_allm);
492         n = 0;
493         for(mp=first; mp; mp=mp->alllink)
494                 n++;
495         ok = false;
496         if(n <= p.__count) {
497                 ok = true;
498                 r = (TRecord*)p.__values;
499                 for(mp=first; mp; mp=mp->alllink) {
500                         for(i = 0; (uintptr)i < nelem(r->stk); i++) {
501                                 r->stk[i] = mp->createstack[i].pc;
502                         }
503                         r++;
504                 }
505         }
506 }
507
508 func Stack(b Slice, all bool) (n int) {
509         byte *pc, *sp;
510         bool enablegc;
511         
512         sp = runtime_getcallersp(&b);
513         pc = runtime_getcallerpc(&b);
514
515         if(all) {
516                 runtime_semacquire(&runtime_worldsema);
517                 runtime_m()->gcing = 1;
518                 runtime_stoptheworld();
519                 enablegc = mstats.enablegc;
520                 mstats.enablegc = false;
521         }
522
523         if(b.__count == 0)
524                 n = 0;
525         else{
526                 G* g = runtime_g();
527                 g->writebuf = (byte*)b.__values;
528                 g->writenbuf = b.__count;
529                 USED(pc);
530                 USED(sp);
531                 runtime_goroutineheader(g);
532                 runtime_traceback();
533                 runtime_goroutinetrailer(g);
534                 if(all)
535                         runtime_tracebackothers(g);
536                 n = b.__count - g->writenbuf;
537                 g->writebuf = nil;
538                 g->writenbuf = 0;
539         }
540         
541         if(all) {
542                 runtime_m()->gcing = 0;
543                 mstats.enablegc = enablegc;
544                 runtime_semrelease(&runtime_worldsema);
545                 runtime_starttheworld();
546         }
547 }
548
549 static void
550 saveg(G *gp, TRecord *r)
551 {
552         int32 n, i;
553         Location locstk[nelem(r->stk)];
554
555         if(gp == runtime_g()) {
556                 n = runtime_callers(0, locstk, nelem(r->stk));
557                 for(i = 0; i < n; i++)
558                         r->stk[i] = locstk[i].pc;
559         }
560         else {
561                 // FIXME: Not implemented.
562                 n = 0;
563         }
564         if((size_t)n < nelem(r->stk))
565                 r->stk[n] = 0;
566 }
567
568 func GoroutineProfile(b Slice) (n int, ok bool) {
569         TRecord *r;
570         G *gp;
571         
572         ok = false;
573         n = runtime_gcount();
574         if(n <= b.__count) {
575                 runtime_semacquire(&runtime_worldsema);
576                 runtime_m()->gcing = 1;
577                 runtime_stoptheworld();
578
579                 n = runtime_gcount();
580                 if(n <= b.__count) {
581                         G* g = runtime_g();
582                         ok = true;
583                         r = (TRecord*)b.__values;
584                         saveg(g, r++);
585                         for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
586                                 if(gp == g || gp->status == Gdead)
587                                         continue;
588                                 saveg(gp, r++);
589                         }
590                 }
591         
592                 runtime_m()->gcing = 0;
593                 runtime_semrelease(&runtime_worldsema);
594                 runtime_starttheworld();
595         }
596 }
597
598 void
599 runtime_mprofinit(void)
600 {
601         addrhash = allocate((1<<AddrHashBits)*sizeof *addrhash);
602 }