周游列国之仕子 2019-06-30
书接上文 修改golang源代码获取goroutine id实现ThreadLocal。上文实现的版本由于map是多个goroutine共享的,存在竞争,影响了性能,实现思路类似java初期的ThreadLocal,今天我们借鉴现代版java的ThreadLocal来实现。
先看看java里面怎么实现的
可以看到每个线程实例都引用了一个map,map的key是ThreadLocal对象,value是实际存储的数据。下面我们也按照这个思路来实现,golang中g实例相当于java的Thread实例,我们可以修改g的结构来达到目的。
修改 $GOROOT/src/runtime/runtime2.go 文件,为g结构体添加 localMap *goroutineLocalMap 字段
type g struct { // Stack parameters. // stack describes the actual stack memory: [stack.lo, stack.hi). // stackguard0 is the stack pointer compared in the Go stack growth prologue. // It is stack.lo+StackGuard normally, but can be StackPreempt to trigger a preemption. // stackguard1 is the stack pointer compared in the C stack growth prologue. // It is stack.lo+StackGuard on g0 and gsignal stacks. // It is ~0 on other goroutine stacks, to trigger a call to morestackc (and crash). stack stack // offset known to runtime/cgo stackguard0 uintptr // offset known to liblink stackguard1 uintptr // offset known to liblink _panic *_panic // innermost panic - offset known to liblink _defer *_defer // innermost defer m *m // current m; offset known to arm liblink sched gobuf syscallsp uintptr // if status==Gsyscall, syscallsp = sched.sp to use during gc syscallpc uintptr // if status==Gsyscall, syscallpc = sched.pc to use during gc stktopsp uintptr // expected sp at top of stack, to check in traceback param unsafe.Pointer // passed parameter on wakeup atomicstatus uint32 stackLock uint32 // sigprof/scang lock; TODO: fold in to atomicstatus goid int64 schedlink guintptr waitsince int64 // approx time when the g become blocked waitreason waitReason // if status==Gwaiting preempt bool // preemption signal, duplicates stackguard0 = stackpreempt paniconfault bool // panic (instead of crash) on unexpected fault address preemptscan bool // preempted g does scan for gc gcscandone bool // g has scanned stack; protected by _Gscan bit in status gcscanvalid bool // false at start of gc cycle, true if G has not run since last scan; TODO: remove? throwsplit bool // must not split stack raceignore int8 // ignore race detection events sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine sysexitticks int64 // cputicks when syscall has returned (for tracing) traceseq uint64 // trace event sequencer tracelastp puintptr // last P emitted an event for this goroutine lockedm muintptr sig uint32 writebuf []byte sigcode0 uintptr sigcode1 uintptr sigpc uintptr gopc uintptr // pc of go statement that created this goroutine ancestors *[]ancestorInfo // ancestor information goroutine(s) that created this goroutine (only used if debug.tracebackancestors) startpc uintptr // pc of goroutine function racectx uintptr waiting *sudog // sudog structures this g is waiting on (that have a valid elem ptr); in lock order cgoCtxt []uintptr // cgo traceback context labels unsafe.Pointer // profiler labels timer *timer // cached timer for time.Sleep selectDone uint32 // are we participating in a select and did someone win the race? // Per-G GC state // gcAssistBytes is this G's GC assist credit in terms of // bytes allocated. If this is positive, then the G has credit // to allocate gcAssistBytes bytes without assisting. If this // is negative, then the G must correct this by performing // scan work. We track this in bytes to make it fast to update // and check for debt in the malloc hot path. The assist ratio // determines how this corresponds to scan work debt. gcAssistBytes int64 localMap *goroutineLocalMap //这是我们添加的 }
注意不要放在第一个字段,否则编译会出现 fatal: morestack on g0
在 $GOROOT/src/runtime/ 目录下创建go原文件 goroutine_local.go
package runtime type goroutineLocalMap struct { m map[*goroutineLocal]interface{} } type goroutineLocal struct { initfun func() interface{} } func NewGoroutineLocal(initfun func() interface{}) *goroutineLocal { return &goroutineLocal{initfun} } func (gl *goroutineLocal)Get() interface{} { if getg().localMap == nil { getg().localMap = &goroutineLocalMap{make(map[*goroutineLocal]interface{})} } v, ok := getg().localMap.m[gl] if !ok && gl.initfun != nil{ v = gl.initfun() } return v } func (gl *goroutineLocal)Set(v interface{}) { if getg().localMap == nil { getg().localMap = &goroutineLocalMap{make(map[*goroutineLocal]interface{})} } getg().localMap.m[gl] = v } func (gl *goroutineLocal)Remove() { if getg().localMap != nil { delete(getg().localMap.m, gl) } }
cd ~/go/src GOROOT_BOOTSTRAP='/Users/qiuxudong/go1.9' ./all.bash
package main import ( "fmt" "time" "runtime" ) var gl = runtime.NewGoroutineLocal(func() interface{} { return "default" }) func main() { gl.Set("test0") fmt.Println(runtime.GetGoroutineId(), gl.Get()) go func() { gl.Set("test1") fmt.Println(runtime.GetGoroutineId(), gl.Get()) gl.Remove() fmt.Println(runtime.GetGoroutineId(), gl.Get()) }() time.Sleep(2 * time.Second) }
可以看到
1 test0 18 test1 18 default
同样的,这个版本也可能会内存泄露,建议主动调用Remove清除数据。但是如果goroutine销毁了,对应的数据不再被引用,是可以被GC清理的,泄露的概率降低很多。
package goroutine_local import ( "testing" "fmt" "time" "runtime" ) var gl = NewGoroutineLocal(func() interface{} { return make([]byte, 1024*1024) }) func TestGoroutineLocal(t *testing.T) { var stats runtime.MemStats runtime.ReadMemStats(&stats) go func() { for { runtime.GC() runtime.ReadMemStats(&stats) fmt.Printf("HeapAlloc = %d\n", stats.HeapAlloc) fmt.Printf("NumGoroutine = %d\n", runtime.NumGoroutine()) time.Sleep(1*time.Second) } }() startAlloc() time.Sleep(10000 * time.Second) } func startAlloc() { for i := 0; i < 1000; i++ { runtime.GC() go func() { gl.Set(make([]byte, 10*1024*1024)) fmt.Println(runtime.GetGoroutineId()) //gl.Remove() //故意不删除数据,观察是否泄露 time.Sleep(1 * time.Second) //模拟其它操作 }() time.Sleep(1 * time.Second) } fmt.Println("done") }
结果:
HeapAlloc = 98336 NumGoroutine = 2 HeapAlloc = 92280 NumGoroutine = 4 19 49 HeapAlloc = 21070408 NumGoroutine = 4 5 HeapAlloc = 31556568 NumGoroutine = 4 38 HeapAlloc = 42043600 NumGoroutine = 4 21 HeapAlloc = 52529512 NumGoroutine = 5 6 HeapAlloc = 63015760 NumGoroutine = 4 7 HeapAlloc = 73500784 NumGoroutine = 4 40 HeapAlloc = 83986616 NumGoroutine = 4 ...
可以看到是持续上升的。
package main import ( "fmt" "time" "runtime" ) var gl = runtime.NewGoroutineLocal(func() interface{} { return make([]byte, 10*1024*1024) }) func main() { var stats runtime.MemStats go func() { for { runtime.GC() runtime.ReadMemStats(&stats) fmt.Printf("HeapAlloc = %d\n", stats.HeapAlloc) fmt.Printf("NumGoroutine = %d\n", runtime.NumGoroutine()) time.Sleep(1*time.Second) } }() startAlloc() time.Sleep(10000 * time.Second) } func startAlloc() { for i := 0; i < 1000; i++ { runtime.GC() go func() { gl.Set(make([]byte, 10*1024*1024)) fmt.Println(runtime.GetGoroutineId()) //gl.Remove() //故意不删除数据,观察是否泄露 time.Sleep(1 * time.Second) //模拟其它操作 }() time.Sleep(1 * time.Second) } fmt.Println("done") }
结果:
... HeapAlloc = 178351296 NumGoroutine = 3 114 HeapAlloc = 178351296 NumGoroutine = 3 112 HeapAlloc = 188837800 NumGoroutine = 3 131 HeapAlloc = 188837800 NumGoroutine = 3 145 HeapAlloc = 178351296 NumGoroutine = 3 146 HeapAlloc = 178351296 NumGoroutine = 3 HeapAlloc = 188837736 NumGoroutine = 3 132 58 HeapAlloc = 178351296 NumGoroutine = 3 ...
可以看到 HeapAlloc 不是一直上升的,中间会有GC使其下降