|
|
Created:
12 years ago by dvyukov Modified:
12 years ago Reviewers:
CC:
golang-dev, 0xe2.0x9a.0x9b_gmail.com, dave_cheney.net, adg, rsc, iant Visibility:
Public. |
Descriptionruntime: faster parallel GC
Use per-thread work buffers instead of global mutex-protected pool. This eliminates contention from parallel scan phase.
benchmark old ns/op new ns/op delta
garbage.BenchmarkTree2-8 97100768 71417553 -26.45%
garbage.BenchmarkTree2LastPause-8 970931485 714103692 -26.45%
garbage.BenchmarkTree2Pause-8 469127802 345029253 -26.45%
garbage.BenchmarkParser-8 2880950854 2715456901 -5.74%
garbage.BenchmarkParserLastPause-8 137047399 103336476 -24.60%
garbage.BenchmarkParserPause-8 80686028 58922680 -26.97%
Patch Set 1 #Patch Set 2 : diff -r e4e13824b6a3 https://dvyukov%40google.com@code.google.com/p/go/ #Patch Set 3 : diff -r e4e13824b6a3 https://dvyukov%40google.com@code.google.com/p/go/ #Patch Set 4 : diff -r e4e13824b6a3 https://dvyukov%40google.com@code.google.com/p/go/ #Patch Set 5 : diff -r e4e13824b6a3 https://dvyukov%40google.com@code.google.com/p/go/ #Patch Set 6 : diff -r ee1b8339ab04 https://dvyukov%40google.com@code.google.com/p/go/ #MessagesTotal messages: 14
Hello golang-dev@googlegroups.com, I'd like you to review this change to https://dvyukov%40google.com@code.google.com/p/go/
Sign in to reply to this message.
On 2013/03/17 09:19:32, dvyukov wrote: > Hello mailto:golang-dev@googlegroups.com, > > I'd like you to review this change to > https://dvyukov%2540google.com%40code.google.com/p/go/ SGTM
Sign in to reply to this message.
Thank you, can you share any numbers for the single proc case? On 17/03/2013, at 8:19 PM, dvyukov@google.com wrote: > Reviewers: golang-dev1, > > Message: > Hello golang-dev@googlegroups.com, > > I'd like you to review this change to > https://dvyukov%40google.com@code.google.com/p/go/ > > > Description: > runtime: faster parallel GC > benchmark old ns/op new ns/op delta > garbage.BenchmarkTree2-8 97100768 71417553 -26.45% > garbage.BenchmarkTree2LastPause-8 970931485 714103692 -26.45% > garbage.BenchmarkTree2Pause-8 469127802 345029253 -26.45% > garbage.BenchmarkParser-8 2880950854 2715456901 -5.74% > garbage.BenchmarkParserLastPause-8 137047399 103336476 -24.60% > garbage.BenchmarkParserPause-8 80686028 58922680 -26.97% > > Please review this at https://codereview.appspot.com/7816044/ > > Affected files: > M src/pkg/runtime/mgc0.c > M src/pkg/runtime/proc.c > > > Index: src/pkg/runtime/mgc0.c > =================================================================== > --- a/src/pkg/runtime/mgc0.c > +++ b/src/pkg/runtime/mgc0.c > @@ -140,6 +140,7 @@ > static Workbuf* getfull(Workbuf*); > static void putempty(Workbuf*); > static Workbuf* handoff(Workbuf*); > +static void gchelperstart(void); > > static struct { > uint64 full; // lock-free list of full blocks > @@ -287,11 +288,12 @@ > { > PtrTarget ptrtarget[IntermediateBufferCapacity]; > Obj obj[IntermediateBufferCapacity]; > - BufferList *next; > + uint32 busy; > + byte pad[CacheLineSize]; > }; > -static BufferList *bufferList; > +#pragma dataflag 16 // no pointers > +static BufferList bufferList[MaxGcproc]; > > -static Lock lock; > static Type *itabtype; > > static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj); > @@ -598,23 +600,11 @@ > > // Allocate ptrbuf > { > - runtime·lock(&lock); > - > - if(bufferList == nil) { > - bufferList = runtime·SysAlloc(sizeof(*bufferList)); > - if(bufferList == nil) > - runtime·throw("runtime: cannot allocate memory"); > - bufferList->next = nil; > - } > - scanbuffers = bufferList; > - bufferList = bufferList->next; > - > + scanbuffers = &bufferList[m->helpgc]; > ptrbuf = &scanbuffers->ptrtarget[0]; > ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget); > objbuf = &scanbuffers->obj[0]; > objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj); > - > - runtime·unlock(&lock); > } > > ptrbufpos = ptrbuf; > @@ -1020,11 +1010,7 @@ > nobj--; > } > > -endscan: > - runtime·lock(&lock); > - scanbuffers->next = bufferList; > - bufferList = scanbuffers; > - runtime·unlock(&lock); > +endscan:; > } > > // debug_scanblock is the debug copy of scanblock. > @@ -1649,6 +1635,8 @@ > void > runtime·gchelper(void) > { > + gchelperstart(); > + > // parallel mark for over gc roots > runtime·parfordo(work.markfor); > > @@ -1662,6 +1650,7 @@ > } > > runtime·parfordo(work.sweepfor); > + bufferList[m->helpgc].busy = 0; > if(runtime·xadd(&work.ndone, +1) == work.nproc-1) > runtime·notewakeup(&work.alldone); > } > @@ -1853,6 +1842,7 @@ > > t1 = runtime·nanotime(); > > + gchelperstart(); > runtime·parfordo(work.markfor); > scanblock(nil, nil, 0, true); > > @@ -1864,6 +1854,7 @@ > t2 = runtime·nanotime(); > > runtime·parfordo(work.sweepfor); > + bufferList[m->helpgc].busy = 0; > t3 = runtime·nanotime(); > > if(work.nproc > 1) > @@ -2005,6 +1996,15 @@ > } > > static void > +gchelperstart(void) > +{ > + if(m->helpgc < 0 || m->helpgc >= MaxGcproc) > + runtime·throw("gchelperstart: bad m->helpgc"); > + if(runtime·xchg(&bufferList[m->helpgc].busy, 1)) > + runtime·throw("gchelperstart: already busy"); > +} > + > +static void > runfinq(void) > { > Finalizer *f; > Index: src/pkg/runtime/proc.c > =================================================================== > --- a/src/pkg/runtime/proc.c > +++ b/src/pkg/runtime/proc.c > @@ -332,7 +332,7 @@ > mp = mget(); > if(mp == nil) > runtime·throw("runtime·gcprocs inconsistency"); > - mp->helpgc = 1; > + mp->helpgc = n; > mp->mcache = runtime·allp[pos]->mcache; > pos++; > runtime·notewakeup(&mp->park); > @@ -386,7 +386,7 @@ > static void > mhelpgc(void) > { > - m->helpgc = 1; > + m->helpgc = -1; > } > > void > @@ -485,7 +485,7 @@ > m->mstartfn(); > > if(m->helpgc) { > - m->helpgc = false; > + m->helpgc = 0; > stopm(); > } else if(m != &runtime·m0) { > acquirep(m->nextp); > @@ -794,8 +794,8 @@ > runtime·notesleep(&m->park); > runtime·noteclear(&m->park); > if(m->helpgc) { > + runtime·gchelper(); > m->helpgc = 0; > - runtime·gchelper(); > m->mcache = nil; > goto retry; > } > > > -- > > ---You received this message because you are subscribed to the Google Groups "golang-dev" group. > To unsubscribe from this group and stop receiving emails from it, send an email to golang-dev+unsubscribe@googlegroups.com. > For more options, visit https://groups.google.com/groups/opt_out. > >
Sign in to reply to this message.
On 2013/03/17 09:35:52, dfc wrote: > Thank you, can you share any numbers for the single proc case? benchmark old ns/op new ns/op delta garbage.BenchmarkTree2 130886989 130157981 -0.56% garbage.BenchmarkTree2LastPause 1308799713 1301522373 -0.56% garbage.BenchmarkTree2Pause 685015129 679179882 -0.85%
Sign in to reply to this message.
There is another interesting point. I've modified the test to create N blocked goroutines in the beginning: for i := 0; i < *gos; i++ { go func() { select{} }() } With 100MB heap and 100K goroutines the numbers are: benchmark old ns/op new ns/op delta garbage.BenchmarkTree2-8 15619722 9835418 -37.03% garbage.BenchmarkTree2LastPause-8 156136947 98301229 -37.04% garbage.BenchmarkTree2Pause-8 131758259 81782675 -37.93% Goroutines create additional GC roots, which results in additional contention and slowdown.
Sign in to reply to this message.
Excellent. I am pleased that there is no penalty for the single proc (*cough* arm *cough*) case. On Sun, Mar 17, 2013 at 8:57 PM, <dvyukov@google.com> wrote: > On 2013/03/17 09:35:52, dfc wrote: >> >> Thank you, can you share any numbers for the single proc case? > > > benchmark old ns/op new ns/op delta > garbage.BenchmarkTree2 130886989 130157981 -0.56% > garbage.BenchmarkTree2LastPause 1308799713 1301522373 -0.56% > garbage.BenchmarkTree2Pause 685015129 679179882 -0.85% > > > > > https://codereview.appspot.com/7816044/
Sign in to reply to this message.
On 2013/03/17 11:02:06, dfc wrote: > Excellent. I am pleased that there is no penalty for the single proc > (*cough* arm *cough*) case. In respect to the old GC? > On Sun, Mar 17, 2013 at 8:57 PM, <mailto:dvyukov@google.com> wrote: > > On 2013/03/17 09:35:52, dfc wrote: > >> > >> Thank you, can you share any numbers for the single proc case? > > > > > > benchmark old ns/op new ns/op delta > > garbage.BenchmarkTree2 130886989 130157981 -0.56% > > garbage.BenchmarkTree2LastPause 1308799713 1301522373 -0.56% > > garbage.BenchmarkTree2Pause 685015129 679179882 -0.85% > > > > > > > > > > https://codereview.appspot.com/7816044/
Sign in to reply to this message.
On Sun, Mar 17, 2013 at 3:02 PM, Dave Cheney <dave@cheney.net> wrote: > Excellent. I am pleased that there is no penalty for the single proc > (*cough* arm *cough*) case. If anything, single proc (and arm) is a dash faster. > On Sun, Mar 17, 2013 at 8:57 PM, <dvyukov@google.com> wrote: >> On 2013/03/17 09:35:52, dfc wrote: >>> >>> Thank you, can you share any numbers for the single proc case? >> >> >> benchmark old ns/op new ns/op delta >> garbage.BenchmarkTree2 130886989 130157981 -0.56% >> garbage.BenchmarkTree2LastPause 1308799713 1301522373 -0.56% >> garbage.BenchmarkTree2Pause 685015129 679179882 -0.85% >> >> >> >> >> https://codereview.appspot.com/7816044/
Sign in to reply to this message.
No, just before and after this CL. On 17/03/2013, at 10:05 PM, 0xE2.0x9A.0x9B@gmail.com wrote: > On 2013/03/17 11:02:06, dfc wrote: >> Excellent. I am pleased that there is no penalty for the single proc >> (*cough* arm *cough*) case. > > In respect to the old GC? > >> On Sun, Mar 17, 2013 at 8:57 PM, <mailto:dvyukov@google.com> wrote: >> > On 2013/03/17 09:35:52, dfc wrote: >> >> >> >> Thank you, can you share any numbers for the single proc case? >> > >> > >> > benchmark old ns/op new ns/op delta >> > garbage.BenchmarkTree2 130886989 130157981 -0.56% >> > garbage.BenchmarkTree2LastPause 1308799713 1301522373 -0.56% >> > garbage.BenchmarkTree2Pause 685015129 679179882 -0.85% >> > >> > >> > >> > >> > https://codereview.appspot.com/7816044/ > > > > https://codereview.appspot.com/7816044/
Sign in to reply to this message.
Could you please add a sentence or two to the change description, to explain what you've added here? Andrew On 17 March 2013 20:19, <dvyukov@google.com> wrote: > Reviewers: golang-dev1, > > Message: > Hello golang-dev@googlegroups.com, > > I'd like you to review this change to > https://dvyukov%40google.com@**code.google.com/p/go/<http://40google.com@code... > > > Description: > runtime: faster parallel GC > benchmark old ns/op new ns/op delta > garbage.BenchmarkTree2-8 97100768 71417553 -26.45% > garbage.**BenchmarkTree2LastPause-8 970931485 714103692 -26.45% > garbage.BenchmarkTree2Pause-8 469127802 345029253 -26.45% > garbage.BenchmarkParser-8 2880950854 2715456901 -5.74% > garbage.**BenchmarkParserLastPause-8 137047399 103336476 -24.60% > garbage.BenchmarkParserPause-8 80686028 58922680 -26.97% > > Please review this at https://codereview.appspot.**com/7816044/<https://codereview.appspot.com/7816... > > Affected files: > M src/pkg/runtime/mgc0.c > M src/pkg/runtime/proc.c > > > Index: src/pkg/runtime/mgc0.c > ==============================**==============================**======= > --- a/src/pkg/runtime/mgc0.c > +++ b/src/pkg/runtime/mgc0.c > @@ -140,6 +140,7 @@ > static Workbuf* getfull(Workbuf*); > static void putempty(Workbuf*); > static Workbuf* handoff(Workbuf*); > +static void gchelperstart(void); > > static struct { > uint64 full; // lock-free list of full blocks > @@ -287,11 +288,12 @@ > { > PtrTarget ptrtarget[**IntermediateBufferCapacity]; > Obj obj[**IntermediateBufferCapacity]; > - BufferList *next; > + uint32 busy; > + byte pad[CacheLineSize]; > }; > -static BufferList *bufferList; > +#pragma dataflag 16 // no pointers > +static BufferList bufferList[MaxGcproc]; > > -static Lock lock; > static Type *itabtype; > > static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj); > @@ -598,23 +600,11 @@ > > // Allocate ptrbuf > { > - runtime·lock(&lock); > - > - if(bufferList == nil) { > - bufferList = runtime·SysAlloc(sizeof(*** > bufferList)); > - if(bufferList == nil) > - runtime·throw("runtime: cannot allocate > memory"); > - bufferList->next = nil; > - } > - scanbuffers = bufferList; > - bufferList = bufferList->next; > - > + scanbuffers = &bufferList[m->helpgc]; > ptrbuf = &scanbuffers->ptrtarget[0]; > ptrbuf_end = &scanbuffers->ptrtarget[0] + > nelem(scanbuffers->ptrtarget); > objbuf = &scanbuffers->obj[0]; > objbuf_end = &scanbuffers->obj[0] + > nelem(scanbuffers->obj); > - > - runtime·unlock(&lock); > } > > ptrbufpos = ptrbuf; > @@ -1020,11 +1010,7 @@ > nobj--; > } > > -endscan: > - runtime·lock(&lock); > - scanbuffers->next = bufferList; > - bufferList = scanbuffers; > - runtime·unlock(&lock); > +endscan:; > } > > // debug_scanblock is the debug copy of scanblock. > @@ -1649,6 +1635,8 @@ > void > runtime·gchelper(void) > { > + gchelperstart(); > + > // parallel mark for over gc roots > runtime·parfordo(work.markfor)**; > > @@ -1662,6 +1650,7 @@ > } > > runtime·parfordo(work.**sweepfor); > + bufferList[m->helpgc].busy = 0; > if(runtime·xadd(&work.ndone, +1) == work.nproc-1) > runtime·notewakeup(&work.**alldone); > } > @@ -1853,6 +1842,7 @@ > > t1 = runtime·nanotime(); > > + gchelperstart(); > runtime·parfordo(work.markfor)**; > scanblock(nil, nil, 0, true); > > @@ -1864,6 +1854,7 @@ > t2 = runtime·nanotime(); > > runtime·parfordo(work.**sweepfor); > + bufferList[m->helpgc].busy = 0; > t3 = runtime·nanotime(); > > if(work.nproc > 1) > @@ -2005,6 +1996,15 @@ > } > > static void > +gchelperstart(void) > +{ > + if(m->helpgc < 0 || m->helpgc >= MaxGcproc) > + runtime·throw("gchelperstart: bad m->helpgc"); > + if(runtime·xchg(&bufferList[m-**>helpgc].busy, 1)) > + runtime·throw("gchelperstart: already busy"); > +} > + > +static void > runfinq(void) > { > Finalizer *f; > Index: src/pkg/runtime/proc.c > ==============================**==============================**======= > --- a/src/pkg/runtime/proc.c > +++ b/src/pkg/runtime/proc.c > @@ -332,7 +332,7 @@ > mp = mget(); > if(mp == nil) > runtime·throw("runtime·gcprocs inconsistency"); > - mp->helpgc = 1; > + mp->helpgc = n; > mp->mcache = runtime·allp[pos]->mcache; > pos++; > runtime·notewakeup(&mp->park); > @@ -386,7 +386,7 @@ > static void > mhelpgc(void) > { > - m->helpgc = 1; > + m->helpgc = -1; > } > > void > @@ -485,7 +485,7 @@ > m->mstartfn(); > > if(m->helpgc) { > - m->helpgc = false; > + m->helpgc = 0; > stopm(); > } else if(m != &runtime·m0) { > acquirep(m->nextp); > @@ -794,8 +794,8 @@ > runtime·notesleep(&m->park); > runtime·noteclear(&m->park); > if(m->helpgc) { > + runtime·gchelper(); > m->helpgc = 0; > - runtime·gchelper(); > m->mcache = nil; > goto retry; > } > > > -- > > ---You received this message because you are subscribed to the Google > Groups "golang-dev" group. > To unsubscribe from this group and stop receiving emails from it, send an > email to golang-dev+unsubscribe@**googlegroups.com<golang-dev%2Bunsubscribe@googlegrou... > . > For more options, visit https://groups.google.com/**groups/opt_out<https://groups.google.com/groups/o... > . > > >
Sign in to reply to this message.
On 2013/03/17 21:36:34, adg wrote: > Could you please add a sentence or two to the change description, to > explain what you've added here? Done
Sign in to reply to this message.
*** Submitted as https://code.google.com/p/go/source/detail?r=0d7891ca5e06 *** runtime: faster parallel GC Use per-thread work buffers instead of global mutex-protected pool. This eliminates contention from parallel scan phase. benchmark old ns/op new ns/op delta garbage.BenchmarkTree2-8 97100768 71417553 -26.45% garbage.BenchmarkTree2LastPause-8 970931485 714103692 -26.45% garbage.BenchmarkTree2Pause-8 469127802 345029253 -26.45% garbage.BenchmarkParser-8 2880950854 2715456901 -5.74% garbage.BenchmarkParserLastPause-8 137047399 103336476 -24.60% garbage.BenchmarkParserPause-8 80686028 58922680 -26.97% R=golang-dev, 0xe2.0x9a.0x9b, dave, adg, rsc, iant CC=golang-dev https://codereview.appspot.com/7816044
Sign in to reply to this message.
|