diff options
author | Ben Cheng <bccheng@google.com> | 2013-03-28 11:14:20 -0700 |
---|---|---|
committer | Ben Cheng <bccheng@google.com> | 2013-03-28 12:40:33 -0700 |
commit | af0c51ac87ab2a87caa03fa108f0d164987a2764 (patch) | |
tree | 4b8b470f7c5b69642fdab8d0aa1fbc148d02196b /gcc-4.8/libgo/runtime/mprof.goc | |
parent | d87cae247d39ebf4f5a6bf25c932a14d2fdb9384 (diff) | |
download | toolchain_gcc-af0c51ac87ab2a87caa03fa108f0d164987a2764.tar.gz toolchain_gcc-af0c51ac87ab2a87caa03fa108f0d164987a2764.tar.bz2 toolchain_gcc-af0c51ac87ab2a87caa03fa108f0d164987a2764.zip |
[GCC 4.8] Initial check-in of GCC 4.8.0
Change-Id: I0719d8a6d0f69b367a6ab6f10eb75622dbf12771
Diffstat (limited to 'gcc-4.8/libgo/runtime/mprof.goc')
-rw-r--r-- | gcc-4.8/libgo/runtime/mprof.goc | 533 |
1 files changed, 533 insertions, 0 deletions
diff --git a/gcc-4.8/libgo/runtime/mprof.goc b/gcc-4.8/libgo/runtime/mprof.goc new file mode 100644 index 000000000..c1b09bea7 --- /dev/null +++ b/gcc-4.8/libgo/runtime/mprof.goc @@ -0,0 +1,533 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Malloc profiling. +// Patterned after tcmalloc's algorithms; shorter code. + +package runtime +#include "runtime.h" +#include "arch.h" +#include "malloc.h" +#include "defs.h" +#include "go-type.h" +#include "go-string.h" + +// NOTE(rsc): Everything here could use cas if contention became an issue. +static Lock proflock; + +enum { MProf, BProf }; // profile types + +// Per-call-stack profiling information. +// Lookup by hashing call stack into a linked-list hash table. +typedef struct Bucket Bucket; +struct Bucket +{ + Bucket *next; // next in hash list + Bucket *allnext; // next in list of all mbuckets/bbuckets + int32 typ; + union + { + struct // typ == MProf + { + uintptr allocs; + uintptr frees; + uintptr alloc_bytes; + uintptr free_bytes; + uintptr recent_allocs; // since last gc + uintptr recent_frees; + uintptr recent_alloc_bytes; + uintptr recent_free_bytes; + }; + struct // typ == BProf + { + int64 count; + int64 cycles; + }; + }; + uintptr hash; + uintptr nstk; + Location stk[1]; +}; +enum { + BuckHashSize = 179999, +}; +static Bucket **buckhash; +static Bucket *mbuckets; // memory profile buckets +static Bucket *bbuckets; // blocking profile buckets +static uintptr bucketmem; + +// Return the bucket for stk[0:nstk], allocating new bucket if needed. +static Bucket* +stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc) +{ + int32 i, j; + uintptr h; + Bucket *b; + + if(buckhash == nil) { + buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]); + mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0]; + } + + // Hash stack. + h = 0; + for(i=0; i<nstk; i++) { + h += stk[i].pc; + h += h<<10; + h ^= h>>6; + } + h += h<<3; + h ^= h>>11; + + i = h%BuckHashSize; + for(b = buckhash[i]; b; b=b->next) { + if(b->typ == typ && b->hash == h && b->nstk == (uintptr)nstk) { + for(j = 0; j < nstk; j++) { + if(b->stk[j].pc != stk[j].pc || + b->stk[j].lineno != stk[j].lineno || + !__go_strings_equal(b->stk[j].filename, stk[j].filename)) + break; + } + if (j == nstk) + return b; + } + } + + if(!alloc) + return nil; + + b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1); + bucketmem += sizeof *b + nstk*sizeof stk[0]; + runtime_memmove(b->stk, stk, nstk*sizeof stk[0]); + b->typ = typ; + b->hash = h; + b->nstk = nstk; + b->next = buckhash[i]; + buckhash[i] = b; + if(typ == MProf) { + b->allnext = mbuckets; + mbuckets = b; + } else { + b->allnext = bbuckets; + bbuckets = b; + } + return b; +} + +// Record that a gc just happened: all the 'recent' statistics are now real. +void +runtime_MProf_GC(void) +{ + Bucket *b; + + runtime_lock(&proflock); + for(b=mbuckets; b; b=b->allnext) { + b->allocs += b->recent_allocs; + b->frees += b->recent_frees; + b->alloc_bytes += b->recent_alloc_bytes; + b->free_bytes += b->recent_free_bytes; + b->recent_allocs = 0; + b->recent_frees = 0; + b->recent_alloc_bytes = 0; + b->recent_free_bytes = 0; + } + runtime_unlock(&proflock); +} + +// Map from pointer to Bucket* that allocated it. +// Three levels: +// Linked-list hash table for top N-AddrHashShift bits. +// Array index for next AddrDenseBits bits. +// Linked list for next AddrHashShift-AddrDenseBits bits. +// This is more efficient than using a general map, +// because of the typical clustering of the pointer keys. + +typedef struct AddrHash AddrHash; +typedef struct AddrEntry AddrEntry; + +enum { + AddrHashBits = 12, // good for 4GB of used address space + AddrHashShift = 20, // each AddrHash knows about 1MB of address space + AddrDenseBits = 8, // good for a profiling rate of 4096 bytes +}; + +struct AddrHash +{ + AddrHash *next; // next in top-level hash table linked list + uintptr addr; // addr>>20 + AddrEntry *dense[1<<AddrDenseBits]; +}; + +struct AddrEntry +{ + AddrEntry *next; // next in bottom-level linked list + uint32 addr; + Bucket *b; +}; + +static AddrHash *addrhash[1<<AddrHashBits]; +static AddrEntry *addrfree; +static uintptr addrmem; + +// Multiplicative hash function: +// hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)). +// This is a good multiplier as suggested in CLR, Knuth. The hash +// value is taken to be the top AddrHashBits bits of the bottom 32 bits +// of the multiplied value. +enum { + HashMultiplier = 2654435769U +}; + +// Set the bucket associated with addr to b. +static void +setaddrbucket(uintptr addr, Bucket *b) +{ + int32 i; + uint32 h; + AddrHash *ah; + AddrEntry *e; + + h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits); + for(ah=addrhash[h]; ah; ah=ah->next) + if(ah->addr == (addr>>AddrHashShift)) + goto found; + + ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1); + addrmem += sizeof *ah; + ah->next = addrhash[h]; + ah->addr = addr>>AddrHashShift; + addrhash[h] = ah; + +found: + if((e = addrfree) == nil) { + e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0); + addrmem += 64*sizeof *e; + for(i=0; i+1<64; i++) + e[i].next = &e[i+1]; + e[63].next = nil; + } + addrfree = e->next; + e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1)); + e->b = b; + h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20. + e->next = ah->dense[h]; + ah->dense[h] = e; +} + +// Get the bucket associated with addr and clear the association. +static Bucket* +getaddrbucket(uintptr addr) +{ + uint32 h; + AddrHash *ah; + AddrEntry *e, **l; + Bucket *b; + + h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits); + for(ah=addrhash[h]; ah; ah=ah->next) + if(ah->addr == (addr>>AddrHashShift)) + goto found; + return nil; + +found: + h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20. + for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) { + if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) { + *l = e->next; + b = e->b; + e->next = addrfree; + addrfree = e; + return b; + } + } + return nil; +} + +// Called by malloc to record a profiled block. +void +runtime_MProf_Malloc(void *p, uintptr size) +{ + M *m; + int32 nstk; + Location stk[32]; + Bucket *b; + + m = runtime_m(); + if(m->nomemprof > 0) + return; + + m->nomemprof++; + nstk = runtime_callers(1, stk, 32); + runtime_lock(&proflock); + b = stkbucket(MProf, stk, nstk, true); + b->recent_allocs++; + b->recent_alloc_bytes += size; + setaddrbucket((uintptr)p, b); + runtime_unlock(&proflock); + m = runtime_m(); + m->nomemprof--; +} + +// Called when freeing a profiled block. +void +runtime_MProf_Free(void *p, uintptr size) +{ + M *m; + Bucket *b; + + m = runtime_m(); + if(m->nomemprof > 0) + return; + + m->nomemprof++; + runtime_lock(&proflock); + b = getaddrbucket((uintptr)p); + if(b != nil) { + b->recent_frees++; + b->recent_free_bytes += size; + } + runtime_unlock(&proflock); + m = runtime_m(); + m->nomemprof--; +} + +int64 runtime_blockprofilerate; // in CPU ticks + +void runtime_SetBlockProfileRate(intgo) __asm__ (GOSYM_PREFIX "runtime.SetBlockProfileRate"); + +void +runtime_SetBlockProfileRate(intgo rate) +{ + runtime_atomicstore64((uint64*)&runtime_blockprofilerate, rate * runtime_tickspersecond() / (1000*1000*1000)); +} + +void +runtime_blockevent(int64 cycles, int32 skip) +{ + int32 nstk; + int64 rate; + Location stk[32]; + Bucket *b; + + if(cycles <= 0) + return; + rate = runtime_atomicload64((uint64*)&runtime_blockprofilerate); + if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles)) + return; + + nstk = runtime_callers(skip, stk, 32); + runtime_lock(&proflock); + b = stkbucket(BProf, stk, nstk, true); + b->count++; + b->cycles += cycles; + runtime_unlock(&proflock); +} + +// Go interface to profile data. (Declared in debug.go) + +// Must match MemProfileRecord in debug.go. +typedef struct Record Record; +struct Record { + int64 alloc_bytes, free_bytes; + int64 alloc_objects, free_objects; + uintptr stk[32]; +}; + +// Write b's data to r. +static void +record(Record *r, Bucket *b) +{ + uint32 i; + + r->alloc_bytes = b->alloc_bytes; + r->free_bytes = b->free_bytes; + r->alloc_objects = b->allocs; + r->free_objects = b->frees; + for(i=0; i<b->nstk && i<nelem(r->stk); i++) + r->stk[i] = b->stk[i].pc; + for(; i<nelem(r->stk); i++) + r->stk[i] = 0; +} + +func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) { + Bucket *b; + Record *r; + + runtime_lock(&proflock); + n = 0; + for(b=mbuckets; b; b=b->allnext) + if(include_inuse_zero || b->alloc_bytes != b->free_bytes) + n++; + ok = false; + if(n <= p.__count) { + ok = true; + r = (Record*)p.__values; + for(b=mbuckets; b; b=b->allnext) + if(include_inuse_zero || b->alloc_bytes != b->free_bytes) + record(r++, b); + } + runtime_unlock(&proflock); +} + +void +runtime_MProf_Mark(void (*addroot)(Obj)) +{ + // buckhash is not allocated via mallocgc. + addroot((Obj){(byte*)&mbuckets, sizeof mbuckets, 0}); + addroot((Obj){(byte*)&bbuckets, sizeof bbuckets, 0}); + addroot((Obj){(byte*)&addrhash, sizeof addrhash, 0}); + addroot((Obj){(byte*)&addrfree, sizeof addrfree, 0}); +} + +// Must match BlockProfileRecord in debug.go. +typedef struct BRecord BRecord; +struct BRecord { + int64 count; + int64 cycles; + uintptr stk[32]; +}; + +func BlockProfile(p Slice) (n int, ok bool) { + Bucket *b; + BRecord *r; + int32 i; + + runtime_lock(&proflock); + n = 0; + for(b=bbuckets; b; b=b->allnext) + n++; + ok = false; + if(n <= p.__count) { + ok = true; + r = (BRecord*)p.__values; + for(b=bbuckets; b; b=b->allnext, r++) { + r->count = b->count; + r->cycles = b->cycles; + for(i=0; (uintptr)i<b->nstk && (uintptr)i<nelem(r->stk); i++) + r->stk[i] = b->stk[i].pc; + for(; (uintptr)i<nelem(r->stk); i++) + r->stk[i] = 0; + } + } + runtime_unlock(&proflock); +} + +// Must match StackRecord in debug.go. +typedef struct TRecord TRecord; +struct TRecord { + uintptr stk[32]; +}; + +func ThreadCreateProfile(p Slice) (n int, ok bool) { + TRecord *r; + M *first, *mp; + int32 i; + + first = runtime_atomicloadp(&runtime_allm); + n = 0; + for(mp=first; mp; mp=mp->alllink) + n++; + ok = false; + if(n <= p.__count) { + ok = true; + r = (TRecord*)p.__values; + for(mp=first; mp; mp=mp->alllink) { + for(i = 0; (uintptr)i < nelem(r->stk); i++) { + r->stk[i] = mp->createstack[i].pc; + } + r++; + } + } +} + +func Stack(b Slice, all bool) (n int) { + byte *pc, *sp; + bool enablegc; + + sp = runtime_getcallersp(&b); + pc = runtime_getcallerpc(&b); + + if(all) { + runtime_semacquire(&runtime_worldsema); + runtime_m()->gcing = 1; + runtime_stoptheworld(); + enablegc = mstats.enablegc; + mstats.enablegc = false; + } + + if(b.__count == 0) + n = 0; + else{ + G* g = runtime_g(); + g->writebuf = (byte*)b.__values; + g->writenbuf = b.__count; + USED(pc); + USED(sp); + runtime_goroutineheader(g); + runtime_traceback(); + runtime_goroutinetrailer(g); + if(all) + runtime_tracebackothers(g); + n = b.__count - g->writenbuf; + g->writebuf = nil; + g->writenbuf = 0; + } + + if(all) { + runtime_m()->gcing = 0; + mstats.enablegc = enablegc; + runtime_semrelease(&runtime_worldsema); + runtime_starttheworld(); + } +} + +static void +saveg(G *gp, TRecord *r) +{ + int32 n, i; + Location locstk[nelem(r->stk)]; + + if(gp == runtime_g()) { + n = runtime_callers(0, locstk, nelem(r->stk)); + for(i = 0; i < n; i++) + r->stk[i] = locstk[i].pc; + } + else { + // FIXME: Not implemented. + n = 0; + } + if((size_t)n < nelem(r->stk)) + r->stk[n] = 0; +} + +func GoroutineProfile(b Slice) (n int, ok bool) { + TRecord *r; + G *gp; + + ok = false; + n = runtime_gcount(); + if(n <= b.__count) { + runtime_semacquire(&runtime_worldsema); + runtime_m()->gcing = 1; + runtime_stoptheworld(); + + n = runtime_gcount(); + if(n <= b.__count) { + G* g = runtime_g(); + ok = true; + r = (TRecord*)b.__values; + saveg(g, r++); + for(gp = runtime_allg; gp != nil; gp = gp->alllink) { + if(gp == g || gp->status == Gdead) + continue; + saveg(gp, r++); + } + } + + runtime_m()->gcing = 0; + runtime_semrelease(&runtime_worldsema); + runtime_starttheworld(); + } +} + |