Run Format

Text file src/pkg/runtime/proc.c

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "runtime.h"
     6	#include "arch_GOARCH.h"
     7	#include "malloc.h"
     8	#include "stack.h"
     9	#include "race.h"
    10	#include "type.h"
    11	
    12	// Goroutine scheduler
    13	// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
    14	//
    15	// The main concepts are:
    16	// G - goroutine.
    17	// M - worker thread, or machine.
    18	// P - processor, a resource that is required to execute Go code.
    19	//     M must have an associated P to execute Go code, however it can be
    20	//     blocked or in a syscall w/o an associated P.
    21	//
    22	// Design doc at http://golang.org/s/go11sched.
    23	
    24	typedef struct Sched Sched;
    25	struct Sched {
    26		Lock;
    27	
    28		uint64	goidgen;
    29	
    30		M*	midle;	 // idle m's waiting for work
    31		int32	nmidle;	 // number of idle m's waiting for work
    32		int32	mlocked; // number of locked m's waiting for work
    33		int32	mcount;	 // number of m's that have been created
    34	
    35		P*	pidle;  // idle P's
    36		uint32	npidle;
    37		uint32	nmspinning;
    38	
    39		// Global runnable queue.
    40		G*	runqhead;
    41		G*	runqtail;
    42		int32	runqsize;
    43	
    44		// Global cache of dead G's.
    45		Lock	gflock;
    46		G*	gfree;
    47	
    48		int32	stopwait;
    49		Note	stopnote;
    50		uint32	sysmonwait;
    51		Note	sysmonnote;
    52		uint64	lastpoll;
    53	
    54		int32	profilehz;	// cpu profiling rate
    55	};
    56	
    57	// The max value of GOMAXPROCS.
    58	// There are no fundamental restrictions on the value.
    59	enum { MaxGomaxprocs = 1<<8 };
    60	
    61	Sched	runtime·sched;
    62	int32	runtime·gomaxprocs;
    63	bool	runtime·singleproc;
    64	bool	runtime·iscgo;
    65	uint32	runtime·gcwaiting;
    66	M	runtime·m0;
    67	G	runtime·g0;	 // idle goroutine for m0
    68	G*	runtime·allg;
    69	G*	runtime·lastg;
    70	M*	runtime·allm;
    71	M*	runtime·extram;
    72	int8*	runtime·goos;
    73	int32	runtime·ncpu;
    74	static int32	newprocs;
    75	
    76	void runtime·mstart(void);
    77	static void runqput(P*, G*);
    78	static G* runqget(P*);
    79	static void runqgrow(P*);
    80	static G* runqsteal(P*, P*);
    81	static void mput(M*);
    82	static M* mget(void);
    83	static void mcommoninit(M*);
    84	static void schedule(void);
    85	static void procresize(int32);
    86	static void acquirep(P*);
    87	static P* releasep(void);
    88	static void newm(void(*)(void), P*);
    89	static void goidle(void);
    90	static void stopm(void);
    91	static void startm(P*, bool);
    92	static void handoffp(P*);
    93	static void wakep(void);
    94	static void stoplockedm(void);
    95	static void startlockedm(G*);
    96	static void sysmon(void);
    97	static uint32 retake(uint32*);
    98	static void inclocked(int32);
    99	static void checkdead(void);
   100	static void exitsyscall0(G*);
   101	static void park0(G*);
   102	static void gosched0(G*);
   103	static void goexit0(G*);
   104	static void gfput(P*, G*);
   105	static G* gfget(P*);
   106	static void gfpurge(P*);
   107	static void globrunqput(G*);
   108	static G* globrunqget(P*);
   109	static P* pidleget(void);
   110	static void pidleput(P*);
   111	static void injectglist(G*);
   112	
   113	// The bootstrap sequence is:
   114	//
   115	//	call osinit
   116	//	call schedinit
   117	//	make & queue new G
   118	//	call runtime·mstart
   119	//
   120	// The new G calls runtime·main.
   121	void
   122	runtime·schedinit(void)
   123	{
   124		int32 n, procs;
   125		byte *p;
   126	
   127		m->nomemprof++;
   128		runtime·mprofinit();
   129		runtime·mallocinit();
   130		mcommoninit(m);
   131	
   132		runtime·goargs();
   133		runtime·goenvs();
   134	
   135		// For debugging:
   136		// Allocate internal symbol table representation now,
   137		// so that we don't need to call malloc when we crash.
   138		// runtime·findfunc(0);
   139	
   140		runtime·sched.lastpoll = runtime·nanotime();
   141		procs = 1;
   142		p = runtime·getenv("GOMAXPROCS");
   143		if(p != nil && (n = runtime·atoi(p)) > 0) {
   144			if(n > MaxGomaxprocs)
   145				n = MaxGomaxprocs;
   146			procs = n;
   147		}
   148		runtime·allp = runtime·malloc((MaxGomaxprocs+1)*sizeof(runtime·allp[0]));
   149		procresize(procs);
   150	
   151		mstats.enablegc = 1;
   152		m->nomemprof--;
   153	
   154		if(raceenabled)
   155			g->racectx = runtime·raceinit();
   156	}
   157	
   158	extern void main·init(void);
   159	extern void main·main(void);
   160	
   161	static FuncVal scavenger = {runtime·MHeap_Scavenger};
   162	
   163	// The main goroutine.
   164	void
   165	runtime·main(void)
   166	{
   167		newm(sysmon, nil);
   168	
   169		// Lock the main goroutine onto this, the main OS thread,
   170		// during initialization.  Most programs won't care, but a few
   171		// do require certain calls to be made by the main thread.
   172		// Those can arrange for main.main to run in the main thread
   173		// by calling runtime.LockOSThread during initialization
   174		// to preserve the lock.
   175		runtime·lockOSThread();
   176		if(m != &runtime·m0)
   177			runtime·throw("runtime·main not on m0");
   178		runtime·newproc1(&scavenger, nil, 0, 0, runtime·main);
   179		main·init();
   180		runtime·unlockOSThread();
   181	
   182		main·main();
   183		if(raceenabled)
   184			runtime·racefini();
   185	
   186		// Make racy client program work: if panicking on
   187		// another goroutine at the same time as main returns,
   188		// let the other goroutine finish printing the panic trace.
   189		// Once it does, it will exit. See issue 3934.
   190		if(runtime·panicking)
   191			runtime·park(nil, nil, "panicwait");
   192	
   193		runtime·exit(0);
   194		for(;;)
   195			*(int32*)runtime·main = 0;
   196	}
   197	
   198	void
   199	runtime·goroutineheader(G *gp)
   200	{
   201		int8 *status;
   202	
   203		switch(gp->status) {
   204		case Gidle:
   205			status = "idle";
   206			break;
   207		case Grunnable:
   208			status = "runnable";
   209			break;
   210		case Grunning:
   211			status = "running";
   212			break;
   213		case Gsyscall:
   214			status = "syscall";
   215			break;
   216		case Gwaiting:
   217			if(gp->waitreason)
   218				status = gp->waitreason;
   219			else
   220				status = "waiting";
   221			break;
   222		default:
   223			status = "???";
   224			break;
   225		}
   226		runtime·printf("goroutine %D [%s]:\n", gp->goid, status);
   227	}
   228	
   229	void
   230	runtime·tracebackothers(G *me)
   231	{
   232		G *gp;
   233		int32 traceback;
   234	
   235		traceback = runtime·gotraceback(nil);
   236		for(gp = runtime·allg; gp != nil; gp = gp->alllink) {
   237			if(gp == me || gp->status == Gdead)
   238				continue;
   239			if(gp->issystem && traceback < 2)
   240				continue;
   241			runtime·printf("\n");
   242			runtime·goroutineheader(gp);
   243			runtime·traceback(gp->sched.pc, (byte*)gp->sched.sp, 0, gp);
   244		}
   245	}
   246	
   247	static void
   248	mcommoninit(M *mp)
   249	{
   250		// If there is no mcache runtime·callers() will crash,
   251		// and we are most likely in sysmon thread so the stack is senseless anyway.
   252		if(m->mcache)
   253			runtime·callers(1, mp->createstack, nelem(mp->createstack));
   254	
   255		mp->fastrand = 0x49f6428aUL + mp->id + runtime·cputicks();
   256	
   257		runtime·lock(&runtime·sched);
   258		mp->id = runtime·sched.mcount++;
   259	
   260		runtime·mpreinit(mp);
   261	
   262		// Add to runtime·allm so garbage collector doesn't free m
   263		// when it is just in a register or thread-local storage.
   264		mp->alllink = runtime·allm;
   265		// runtime·NumCgoCall() iterates over allm w/o schedlock,
   266		// so we need to publish it safely.
   267		runtime·atomicstorep(&runtime·allm, mp);
   268		runtime·unlock(&runtime·sched);
   269	}
   270	
   271	// Mark gp ready to run.
   272	void
   273	runtime·ready(G *gp)
   274	{
   275		// Mark runnable.
   276		if(gp->status != Gwaiting) {
   277			runtime·printf("goroutine %D has status %d\n", gp->goid, gp->status);
   278			runtime·throw("bad g->status in ready");
   279		}
   280		gp->status = Grunnable;
   281		runqput(m->p, gp);
   282		if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0)  // TODO: fast atomic
   283			wakep();
   284	}
   285	
   286	int32
   287	runtime·gcprocs(void)
   288	{
   289		int32 n;
   290	
   291		// Figure out how many CPUs to use during GC.
   292		// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
   293		runtime·lock(&runtime·sched);
   294		n = runtime·gomaxprocs;
   295		if(n > runtime·ncpu)
   296			n = runtime·ncpu;
   297		if(n > MaxGcproc)
   298			n = MaxGcproc;
   299		if(n > runtime·sched.nmidle+1) // one M is currently running
   300			n = runtime·sched.nmidle+1;
   301		runtime·unlock(&runtime·sched);
   302		return n;
   303	}
   304	
   305	static bool
   306	needaddgcproc(void)
   307	{
   308		int32 n;
   309	
   310		runtime·lock(&runtime·sched);
   311		n = runtime·gomaxprocs;
   312		if(n > runtime·ncpu)
   313			n = runtime·ncpu;
   314		if(n > MaxGcproc)
   315			n = MaxGcproc;
   316		n -= runtime·sched.nmidle+1; // one M is currently running
   317		runtime·unlock(&runtime·sched);
   318		return n > 0;
   319	}
   320	
   321	void
   322	runtime·helpgc(int32 nproc)
   323	{
   324		M *mp;
   325		int32 n, pos;
   326	
   327		runtime·lock(&runtime·sched);
   328		pos = 0;
   329		for(n = 1; n < nproc; n++) {  // one M is currently running
   330			if(runtime·allp[pos]->mcache == m->mcache)
   331				pos++;
   332			mp = mget();
   333			if(mp == nil)
   334				runtime·throw("runtime·gcprocs inconsistency");
   335			mp->helpgc = n;
   336			mp->mcache = runtime·allp[pos]->mcache;
   337			pos++;
   338			runtime·notewakeup(&mp->park);
   339		}
   340		runtime·unlock(&runtime·sched);
   341	}
   342	
   343	void
   344	runtime·stoptheworld(void)
   345	{
   346		int32 i;
   347		uint32 s;
   348		P *p;
   349		bool wait;
   350	
   351		runtime·lock(&runtime·sched);
   352		runtime·sched.stopwait = runtime·gomaxprocs;
   353		runtime·atomicstore((uint32*)&runtime·gcwaiting, 1);
   354		// stop current P
   355		m->p->status = Pgcstop;
   356		runtime·sched.stopwait--;
   357		// try to retake all P's in Psyscall status
   358		for(i = 0; i < runtime·gomaxprocs; i++) {
   359			p = runtime·allp[i];
   360			s = p->status;
   361			if(s == Psyscall && runtime·cas(&p->status, s, Pgcstop))
   362				runtime·sched.stopwait--;
   363		}
   364		// stop idle P's
   365		while(p = pidleget()) {
   366			p->status = Pgcstop;
   367			runtime·sched.stopwait--;
   368		}
   369		wait = runtime·sched.stopwait > 0;
   370		runtime·unlock(&runtime·sched);
   371	
   372		// wait for remaining P's to stop voluntary
   373		if(wait) {
   374			runtime·notesleep(&runtime·sched.stopnote);
   375			runtime·noteclear(&runtime·sched.stopnote);
   376		}
   377		if(runtime·sched.stopwait)
   378			runtime·throw("stoptheworld: not stopped");
   379		for(i = 0; i < runtime·gomaxprocs; i++) {
   380			p = runtime·allp[i];
   381			if(p->status != Pgcstop)
   382				runtime·throw("stoptheworld: not stopped");
   383		}
   384	}
   385	
   386	static void
   387	mhelpgc(void)
   388	{
   389		m->helpgc = -1;
   390	}
   391	
   392	void
   393	runtime·starttheworld(void)
   394	{
   395		P *p, *p1;
   396		M *mp;
   397		G *gp;
   398		bool add;
   399	
   400		gp = runtime·netpoll(false);  // non-blocking
   401		injectglist(gp);
   402		add = needaddgcproc();
   403		runtime·lock(&runtime·sched);
   404		if(newprocs) {
   405			procresize(newprocs);
   406			newprocs = 0;
   407		} else
   408			procresize(runtime·gomaxprocs);
   409		runtime·gcwaiting = 0;
   410	
   411		p1 = nil;
   412		while(p = pidleget()) {
   413			// procresize() puts p's with work at the beginning of the list.
   414			// Once we reach a p without a run queue, the rest don't have one either.
   415			if(p->runqhead == p->runqtail) {
   416				pidleput(p);
   417				break;
   418			}
   419			mp = mget();
   420			if(mp == nil) {
   421				p->link = p1;
   422				p1 = p;
   423				continue;
   424			}
   425			if(mp->nextp)
   426				runtime·throw("starttheworld: inconsistent mp->nextp");
   427			mp->nextp = p;
   428			runtime·notewakeup(&mp->park);
   429		}
   430		if(runtime·sched.sysmonwait) {
   431			runtime·sched.sysmonwait = false;
   432			runtime·notewakeup(&runtime·sched.sysmonnote);
   433		}
   434		runtime·unlock(&runtime·sched);
   435	
   436		while(p1) {
   437			p = p1;
   438			p1 = p1->link;
   439			add = false;
   440			newm(nil, p);
   441		}
   442	
   443		if(add) {
   444			// If GC could have used another helper proc, start one now,
   445			// in the hope that it will be available next time.
   446			// It would have been even better to start it before the collection,
   447			// but doing so requires allocating memory, so it's tricky to
   448			// coordinate.  This lazy approach works out in practice:
   449			// we don't mind if the first couple gc rounds don't have quite
   450			// the maximum number of procs.
   451			newm(mhelpgc, nil);
   452		}
   453	}
   454	
   455	// Called to start an M.
   456	void
   457	runtime·mstart(void)
   458	{
   459		// It is used by windows-386 only. Unfortunately, seh needs
   460		// to be located on os stack, and mstart runs on os stack
   461		// for both m0 and m.
   462		SEH seh;
   463	
   464		if(g != m->g0)
   465			runtime·throw("bad runtime·mstart");
   466	
   467		// Record top of stack for use by mcall.
   468		// Once we call schedule we're never coming back,
   469		// so other calls can reuse this stack space.
   470		runtime·gosave(&m->g0->sched);
   471		m->g0->sched.pc = (void*)-1;  // make sure it is never used
   472		m->seh = &seh;
   473		runtime·asminit();
   474		runtime·minit();
   475	
   476		// Install signal handlers; after minit so that minit can
   477		// prepare the thread to be able to handle the signals.
   478		if(m == &runtime·m0) {
   479			runtime·initsig();
   480			if(runtime·iscgo)
   481				runtime·newextram();
   482		}
   483		
   484		if(m->mstartfn)
   485			m->mstartfn();
   486	
   487		if(m->helpgc) {
   488			m->helpgc = 0;
   489			stopm();
   490		} else if(m != &runtime·m0) {
   491			acquirep(m->nextp);
   492			m->nextp = nil;
   493		}
   494		schedule();
   495	
   496		// TODO(brainman): This point is never reached, because scheduler
   497		// does not release os threads at the moment. But once this path
   498		// is enabled, we must remove our seh here.
   499	}
   500	
   501	// When running with cgo, we call _cgo_thread_start
   502	// to start threads for us so that we can play nicely with
   503	// foreign code.
   504	void (*_cgo_thread_start)(void*);
   505	
   506	typedef struct CgoThreadStart CgoThreadStart;
   507	struct CgoThreadStart
   508	{
   509		M *m;
   510		G *g;
   511		void (*fn)(void);
   512	};
   513	
   514	// Allocate a new m unassociated with any thread.
   515	// Can use p for allocation context if needed.
   516	M*
   517	runtime·allocm(P *p)
   518	{
   519		M *mp;
   520		static Type *mtype;  // The Go type M
   521	
   522		m->locks++;  // disable GC because it can be called from sysmon
   523		if(m->p == nil)
   524			acquirep(p);  // temporarily borrow p for mallocs in this function
   525		if(mtype == nil) {
   526			Eface e;
   527			runtime·gc_m_ptr(&e);
   528			mtype = ((PtrType*)e.type)->elem;
   529		}
   530	
   531		mp = runtime·cnew(mtype);
   532		mcommoninit(mp);
   533	
   534		// In case of cgo, pthread_create will make us a stack.
   535		// Windows will layout sched stack on OS stack.
   536		if(runtime·iscgo || Windows)
   537			mp->g0 = runtime·malg(-1);
   538		else
   539			mp->g0 = runtime·malg(8192);
   540	
   541		if(p == m->p)
   542			releasep();
   543		m->locks--;
   544	
   545		return mp;
   546	}
   547	
   548	static M* lockextra(bool nilokay);
   549	static void unlockextra(M*);
   550	
   551	// needm is called when a cgo callback happens on a
   552	// thread without an m (a thread not created by Go).
   553	// In this case, needm is expected to find an m to use
   554	// and return with m, g initialized correctly.
   555	// Since m and g are not set now (likely nil, but see below)
   556	// needm is limited in what routines it can call. In particular
   557	// it can only call nosplit functions (textflag 7) and cannot
   558	// do any scheduling that requires an m.
   559	//
   560	// In order to avoid needing heavy lifting here, we adopt
   561	// the following strategy: there is a stack of available m's
   562	// that can be stolen. Using compare-and-swap
   563	// to pop from the stack has ABA races, so we simulate
   564	// a lock by doing an exchange (via casp) to steal the stack
   565	// head and replace the top pointer with MLOCKED (1).
   566	// This serves as a simple spin lock that we can use even
   567	// without an m. The thread that locks the stack in this way
   568	// unlocks the stack by storing a valid stack head pointer.
   569	//
   570	// In order to make sure that there is always an m structure
   571	// available to be stolen, we maintain the invariant that there
   572	// is always one more than needed. At the beginning of the
   573	// program (if cgo is in use) the list is seeded with a single m.
   574	// If needm finds that it has taken the last m off the list, its job
   575	// is - once it has installed its own m so that it can do things like
   576	// allocate memory - to create a spare m and put it on the list.
   577	//
   578	// Each of these extra m's also has a g0 and a curg that are
   579	// pressed into service as the scheduling stack and current
   580	// goroutine for the duration of the cgo callback.
   581	//
   582	// When the callback is done with the m, it calls dropm to
   583	// put the m back on the list.
   584	#pragma textflag 7
   585	void
   586	runtime·needm(byte x)
   587	{
   588		M *mp;
   589	
   590		// Lock extra list, take head, unlock popped list.
   591		// nilokay=false is safe here because of the invariant above,
   592		// that the extra list always contains or will soon contain
   593		// at least one m.
   594		mp = lockextra(false);
   595	
   596		// Set needextram when we've just emptied the list,
   597		// so that the eventual call into cgocallbackg will
   598		// allocate a new m for the extra list. We delay the
   599		// allocation until then so that it can be done
   600		// after exitsyscall makes sure it is okay to be
   601		// running at all (that is, there's no garbage collection
   602		// running right now).
   603		mp->needextram = mp->schedlink == nil;
   604		unlockextra(mp->schedlink);
   605	
   606		// Install m and g (= m->g0) and set the stack bounds
   607		// to match the current stack. We don't actually know
   608		// how big the stack is, like we don't know how big any
   609		// scheduling stack is, but we assume there's at least 32 kB,
   610		// which is more than enough for us.
   611		runtime·setmg(mp, mp->g0);
   612		g->stackbase = (uintptr)(&x + 1024);
   613		g->stackguard = (uintptr)(&x - 32*1024);
   614	
   615		// On windows/386, we need to put an SEH frame (two words)
   616		// somewhere on the current stack. We are called
   617		// from needm, and we know there is some available
   618		// space one word into the argument frame. Use that.
   619		m->seh = (SEH*)((uintptr*)&x + 1);
   620	
   621		// Initialize this thread to use the m.
   622		runtime·asminit();
   623		runtime·minit();
   624	}
   625	
   626	// newextram allocates an m and puts it on the extra list.
   627	// It is called with a working local m, so that it can do things
   628	// like call schedlock and allocate.
   629	void
   630	runtime·newextram(void)
   631	{
   632		M *mp, *mnext;
   633		G *gp;
   634	
   635		// Create extra goroutine locked to extra m.
   636		// The goroutine is the context in which the cgo callback will run.
   637		// The sched.pc will never be returned to, but setting it to
   638		// runtime.goexit makes clear to the traceback routines where
   639		// the goroutine stack ends.
   640		mp = runtime·allocm(nil);
   641		gp = runtime·malg(4096);
   642		gp->sched.pc = (void*)runtime·goexit;
   643		gp->sched.sp = gp->stackbase;
   644		gp->sched.g = gp;
   645		gp->status = Gsyscall;
   646		mp->curg = gp;
   647		mp->locked = LockInternal;
   648		mp->lockedg = gp;
   649		gp->lockedm = mp;
   650		// put on allg for garbage collector
   651		runtime·lock(&runtime·sched);
   652		if(runtime·lastg == nil)
   653			runtime·allg = gp;
   654		else
   655			runtime·lastg->alllink = gp;
   656		runtime·lastg = gp;
   657		runtime·unlock(&runtime·sched);
   658		gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
   659		if(raceenabled)
   660			gp->racectx = runtime·racegostart(runtime·newextram);
   661	
   662		// Add m to the extra list.
   663		mnext = lockextra(true);
   664		mp->schedlink = mnext;
   665		unlockextra(mp);
   666	}
   667	
   668	// dropm is called when a cgo callback has called needm but is now
   669	// done with the callback and returning back into the non-Go thread.
   670	// It puts the current m back onto the extra list.
   671	//
   672	// The main expense here is the call to signalstack to release the
   673	// m's signal stack, and then the call to needm on the next callback
   674	// from this thread. It is tempting to try to save the m for next time,
   675	// which would eliminate both these costs, but there might not be
   676	// a next time: the current thread (which Go does not control) might exit.
   677	// If we saved the m for that thread, there would be an m leak each time
   678	// such a thread exited. Instead, we acquire and release an m on each
   679	// call. These should typically not be scheduling operations, just a few
   680	// atomics, so the cost should be small.
   681	//
   682	// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
   683	// variable using pthread_key_create. Unlike the pthread keys we already use
   684	// on OS X, this dummy key would never be read by Go code. It would exist
   685	// only so that we could register at thread-exit-time destructor.
   686	// That destructor would put the m back onto the extra list.
   687	// This is purely a performance optimization. The current version,
   688	// in which dropm happens on each cgo call, is still correct too.
   689	// We may have to keep the current version on systems with cgo
   690	// but without pthreads, like Windows.
   691	void
   692	runtime·dropm(void)
   693	{
   694		M *mp, *mnext;
   695	
   696		// Undo whatever initialization minit did during needm.
   697		runtime·unminit();
   698		m->seh = nil;  // reset dangling typed pointer
   699	
   700		// Clear m and g, and return m to the extra list.
   701		// After the call to setmg we can only call nosplit functions.
   702		mp = m;
   703		runtime·setmg(nil, nil);
   704	
   705		mnext = lockextra(true);
   706		mp->schedlink = mnext;
   707		unlockextra(mp);
   708	}
   709	
   710	#define MLOCKED ((M*)1)
   711	
   712	// lockextra locks the extra list and returns the list head.
   713	// The caller must unlock the list by storing a new list head
   714	// to runtime.extram. If nilokay is true, then lockextra will
   715	// return a nil list head if that's what it finds. If nilokay is false,
   716	// lockextra will keep waiting until the list head is no longer nil.
   717	#pragma textflag 7
   718	static M*
   719	lockextra(bool nilokay)
   720	{
   721		M *mp;
   722		void (*yield)(void);
   723	
   724		for(;;) {
   725			mp = runtime·atomicloadp(&runtime·extram);
   726			if(mp == MLOCKED) {
   727				yield = runtime·osyield;
   728				yield();
   729				continue;
   730			}
   731			if(mp == nil && !nilokay) {
   732				runtime·usleep(1);
   733				continue;
   734			}
   735			if(!runtime·casp(&runtime·extram, mp, MLOCKED)) {
   736				yield = runtime·osyield;
   737				yield();
   738				continue;
   739			}
   740			break;
   741		}
   742		return mp;
   743	}
   744	
   745	#pragma textflag 7
   746	static void
   747	unlockextra(M *mp)
   748	{
   749		runtime·atomicstorep(&runtime·extram, mp);
   750	}
   751	
   752	
   753	// Create a new m.  It will start off with a call to fn, or else the scheduler.
   754	static void
   755	newm(void(*fn)(void), P *p)
   756	{
   757		M *mp;
   758	
   759		mp = runtime·allocm(p);
   760		mp->nextp = p;
   761		mp->mstartfn = fn;
   762	
   763		if(runtime·iscgo) {
   764			CgoThreadStart ts;
   765	
   766			if(_cgo_thread_start == nil)
   767				runtime·throw("_cgo_thread_start missing");
   768			ts.m = mp;
   769			ts.g = mp->g0;
   770			ts.fn = runtime·mstart;
   771			runtime·asmcgocall(_cgo_thread_start, &ts);
   772			return;
   773		}
   774		runtime·newosproc(mp, (byte*)mp->g0->stackbase);
   775	}
   776	
   777	// Stops execution of the current m until new work is available.
   778	// Returns with acquired P.
   779	static void
   780	stopm(void)
   781	{
   782		if(m->locks)
   783			runtime·throw("stopm holding locks");
   784		if(m->p)
   785			runtime·throw("stopm holding p");
   786		if(m->spinning) {
   787			m->spinning = false;
   788			runtime·xadd(&runtime·sched.nmspinning, -1);
   789		}
   790	
   791	retry:
   792		runtime·lock(&runtime·sched);
   793		mput(m);
   794		runtime·unlock(&runtime·sched);
   795		runtime·notesleep(&m->park);
   796		runtime·noteclear(&m->park);
   797		if(m->helpgc) {
   798			runtime·gchelper();
   799			m->helpgc = 0;
   800			m->mcache = nil;
   801			goto retry;
   802		}
   803		acquirep(m->nextp);
   804		m->nextp = nil;
   805	}
   806	
   807	static void
   808	mspinning(void)
   809	{
   810		m->spinning = true;
   811	}
   812	
   813	// Schedules some M to run the p (creates an M if necessary).
   814	// If p==nil, tries to get an idle P, if no idle P's returns false.
   815	static void
   816	startm(P *p, bool spinning)
   817	{
   818		M *mp;
   819		void (*fn)(void);
   820	
   821		runtime·lock(&runtime·sched);
   822		if(p == nil) {
   823			p = pidleget();
   824			if(p == nil) {
   825				runtime·unlock(&runtime·sched);
   826				if(spinning)
   827					runtime·xadd(&runtime·sched.nmspinning, -1);
   828				return;
   829			}
   830		}
   831		mp = mget();
   832		runtime·unlock(&runtime·sched);
   833		if(mp == nil) {
   834			fn = nil;
   835			if(spinning)
   836				fn = mspinning;
   837			newm(fn, p);
   838			return;
   839		}
   840		if(mp->spinning)
   841			runtime·throw("startm: m is spinning");
   842		if(mp->nextp)
   843			runtime·throw("startm: m has p");
   844		mp->spinning = spinning;
   845		mp->nextp = p;
   846		runtime·notewakeup(&mp->park);
   847	}
   848	
   849	// Hands off P from syscall or locked M.
   850	static void
   851	handoffp(P *p)
   852	{
   853		// if it has local work, start it straight away
   854		if(p->runqhead != p->runqtail || runtime·sched.runqsize) {
   855			startm(p, false);
   856			return;
   857		}
   858		// no local work, check that there are no spinning/idle M's,
   859		// otherwise our help is not required
   860		if(runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) == 0 &&  // TODO: fast atomic
   861			runtime·cas(&runtime·sched.nmspinning, 0, 1)) {
   862			startm(p, true);
   863			return;
   864		}
   865		runtime·lock(&runtime·sched);
   866		if(runtime·gcwaiting) {
   867			p->status = Pgcstop;
   868			if(--runtime·sched.stopwait == 0)
   869				runtime·notewakeup(&runtime·sched.stopnote);
   870			runtime·unlock(&runtime·sched);
   871			return;
   872		}
   873		if(runtime·sched.runqsize) {
   874			runtime·unlock(&runtime·sched);
   875			startm(p, false);
   876			return;
   877		}
   878		// If this is the last running P and nobody is polling network,
   879		// need to wakeup another M to poll network.
   880		if(runtime·sched.npidle == runtime·gomaxprocs-1 && runtime·atomicload64(&runtime·sched.lastpoll) != 0) {
   881			runtime·unlock(&runtime·sched);
   882			startm(p, false);
   883			return;
   884		}
   885		pidleput(p);
   886		runtime·unlock(&runtime·sched);
   887	}
   888	
   889	// Tries to add one more P to execute G's.
   890	// Called when a G is made runnable (newproc, ready).
   891	static void
   892	wakep(void)
   893	{
   894		// be conservative about spinning threads
   895		if(!runtime·cas(&runtime·sched.nmspinning, 0, 1))
   896			return;
   897		startm(nil, true);
   898	}
   899	
   900	// Stops execution of the current m that is locked to a g until the g is runnable again.
   901	// Returns with acquired P.
   902	static void
   903	stoplockedm(void)
   904	{
   905		P *p;
   906	
   907		if(m->lockedg == nil || m->lockedg->lockedm != m)
   908			runtime·throw("stoplockedm: inconsistent locking");
   909		if(m->p) {
   910			// Schedule another M to run this p.
   911			p = releasep();
   912			handoffp(p);
   913		}
   914		inclocked(1);
   915		// Wait until another thread schedules lockedg again.
   916		runtime·notesleep(&m->park);
   917		runtime·noteclear(&m->park);
   918		if(m->lockedg->status != Grunnable)
   919			runtime·throw("stoplockedm: not runnable");
   920		acquirep(m->nextp);
   921		m->nextp = nil;
   922	}
   923	
   924	// Schedules the locked m to run the locked gp.
   925	static void
   926	startlockedm(G *gp)
   927	{
   928		M *mp;
   929		P *p;
   930	
   931		mp = gp->lockedm;
   932		if(mp == m)
   933			runtime·throw("startlockedm: locked to me");
   934		if(mp->nextp)
   935			runtime·throw("startlockedm: m has p");
   936		// directly handoff current P to the locked m
   937		inclocked(-1);
   938		p = releasep();
   939		mp->nextp = p;
   940		runtime·notewakeup(&mp->park);
   941		stopm();
   942	}
   943	
   944	// Stops the current m for stoptheworld.
   945	// Returns when the world is restarted.
   946	static void
   947	gcstopm(void)
   948	{
   949		P *p;
   950	
   951		if(!runtime·gcwaiting)
   952			runtime·throw("gcstopm: not waiting for gc");
   953		if(m->spinning) {
   954			m->spinning = false;
   955			runtime·xadd(&runtime·sched.nmspinning, -1);
   956		}
   957		p = releasep();
   958		runtime·lock(&runtime·sched);
   959		p->status = Pgcstop;
   960		if(--runtime·sched.stopwait == 0)
   961			runtime·notewakeup(&runtime·sched.stopnote);
   962		runtime·unlock(&runtime·sched);
   963		stopm();
   964	}
   965	
   966	// Schedules gp to run on the current M.
   967	// Never returns.
   968	static void
   969	execute(G *gp)
   970	{
   971		int32 hz;
   972	
   973		if(gp->status != Grunnable) {
   974			runtime·printf("execute: bad g status %d\n", gp->status);
   975			runtime·throw("execute: bad g status");
   976		}
   977		gp->status = Grunning;
   978		m->p->tick++;
   979		m->curg = gp;
   980		gp->m = m;
   981	
   982		// Check whether the profiler needs to be turned on or off.
   983		hz = runtime·sched.profilehz;
   984		if(m->profilehz != hz)
   985			runtime·resetcpuprofiler(hz);
   986	
   987		if(gp->sched.pc == (byte*)runtime·goexit)  // kickoff
   988			runtime·gogocallfn(&gp->sched, gp->fnstart);
   989		runtime·gogo(&gp->sched, 0);
   990	}
   991	
   992	// Finds a runnable goroutine to execute.
   993	// Tries to steal from other P's, get g from global queue, poll network.
   994	static G*
   995	findrunnable(void)
   996	{
   997		G *gp;
   998		P *p;
   999		int32 i;
  1000	
  1001	top:
  1002		if(runtime·gcwaiting) {
  1003			gcstopm();
  1004			goto top;
  1005		}
  1006		// local runq
  1007		gp = runqget(m->p);
  1008		if(gp)
  1009			return gp;
  1010		// global runq
  1011		if(runtime·sched.runqsize) {
  1012			runtime·lock(&runtime·sched);
  1013			gp = globrunqget(m->p);
  1014			runtime·unlock(&runtime·sched);
  1015			if(gp)
  1016				return gp;
  1017		}
  1018		// poll network
  1019		gp = runtime·netpoll(false);  // non-blocking
  1020		if(gp) {
  1021			injectglist(gp->schedlink);
  1022			gp->status = Grunnable;
  1023			return gp;
  1024		}
  1025		// If number of spinning M's >= number of busy P's, block.
  1026		// This is necessary to prevent excessive CPU consumption
  1027		// when GOMAXPROCS>>1 but the program parallelism is low.
  1028		if(!m->spinning && 2 * runtime·atomicload(&runtime·sched.nmspinning) >= runtime·gomaxprocs - runtime·atomicload(&runtime·sched.npidle))  // TODO: fast atomic
  1029			goto stop;
  1030		if(!m->spinning) {
  1031			m->spinning = true;
  1032			runtime·xadd(&runtime·sched.nmspinning, 1);
  1033		}
  1034		// random steal from other P's
  1035		for(i = 0; i < 2*runtime·gomaxprocs; i++) {
  1036			if(runtime·gcwaiting)
  1037				goto top;
  1038			p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs];
  1039			if(p == m->p)
  1040				gp = runqget(p);
  1041			else
  1042				gp = runqsteal(m->p, p);
  1043			if(gp)
  1044				return gp;
  1045		}
  1046	stop:
  1047		// return P and block
  1048		runtime·lock(&runtime·sched);
  1049		if(runtime·gcwaiting) {
  1050			runtime·unlock(&runtime·sched);
  1051			goto top;
  1052		}
  1053		if(runtime·sched.runqsize) {
  1054			gp = globrunqget(m->p);
  1055			runtime·unlock(&runtime·sched);
  1056			return gp;
  1057		}
  1058		p = releasep();
  1059		pidleput(p);
  1060		runtime·unlock(&runtime·sched);
  1061		if(m->spinning) {
  1062			m->spinning = false;
  1063			runtime·xadd(&runtime·sched.nmspinning, -1);
  1064		}
  1065		// check all runqueues once again
  1066		for(i = 0; i < runtime·gomaxprocs; i++) {
  1067			p = runtime·allp[i];
  1068			if(p && p->runqhead != p->runqtail) {
  1069				runtime·lock(&runtime·sched);
  1070				p = pidleget();
  1071				runtime·unlock(&runtime·sched);
  1072				if(p) {
  1073					acquirep(p);
  1074					goto top;
  1075				}
  1076				break;
  1077			}
  1078		}
  1079		// poll network
  1080		if(runtime·xchg64(&runtime·sched.lastpoll, 0) != 0) {
  1081			if(m->p)
  1082				runtime·throw("findrunnable: netpoll with p");
  1083			if(m->spinning)
  1084				runtime·throw("findrunnable: netpoll with spinning");
  1085			gp = runtime·netpoll(true);  // block until new work is available
  1086			runtime·atomicstore64(&runtime·sched.lastpoll, runtime·nanotime());
  1087			if(gp) {
  1088				runtime·lock(&runtime·sched);
  1089				p = pidleget();
  1090				runtime·unlock(&runtime·sched);
  1091				if(p) {
  1092					acquirep(p);
  1093					injectglist(gp->schedlink);
  1094					gp->status = Grunnable;
  1095					return gp;
  1096				}
  1097				injectglist(gp);
  1098			}
  1099		}
  1100		stopm();
  1101		goto top;
  1102	}
  1103	
  1104	// Injects the list of runnable G's into the scheduler.
  1105	// Can run concurrently with GC.
  1106	static void
  1107	injectglist(G *glist)
  1108	{
  1109		int32 n;
  1110		G *gp;
  1111	
  1112		if(glist == nil)
  1113			return;
  1114		runtime·lock(&runtime·sched);
  1115		for(n = 0; glist; n++) {
  1116			gp = glist;
  1117			glist = gp->schedlink;
  1118			gp->status = Grunnable;
  1119			globrunqput(gp);
  1120		}
  1121		runtime·unlock(&runtime·sched);
  1122	
  1123		for(; n && runtime·sched.npidle; n--)
  1124			startm(nil, false);
  1125	}
  1126	
  1127	// One round of scheduler: find a runnable goroutine and execute it.
  1128	// Never returns.
  1129	static void
  1130	schedule(void)
  1131	{
  1132		G *gp;
  1133	
  1134		if(m->locks)
  1135			runtime·throw("schedule: holding locks");
  1136	
  1137	top:
  1138		if(runtime·gcwaiting) {
  1139			gcstopm();
  1140			goto top;
  1141		}
  1142	
  1143		gp = runqget(m->p);
  1144		if(gp == nil)
  1145			gp = findrunnable();
  1146	
  1147		if(m->spinning) {
  1148			m->spinning = false;
  1149			runtime·xadd(&runtime·sched.nmspinning, -1);
  1150		}
  1151	
  1152		// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
  1153		// so see if we need to wakeup another M here.
  1154		if (m->p->runqhead != m->p->runqtail &&
  1155			runtime·atomicload(&runtime·sched.nmspinning) == 0 &&
  1156			runtime·atomicload(&runtime·sched.npidle) > 0)  // TODO: fast atomic
  1157			wakep();
  1158	
  1159		if(gp->lockedm) {
  1160			startlockedm(gp);
  1161			goto top;
  1162		}
  1163	
  1164		execute(gp);
  1165	}
  1166	
  1167	// Puts the current goroutine into a waiting state and unlocks the lock.
  1168	// The goroutine can be made runnable again by calling runtime·ready(gp).
  1169	void
  1170	runtime·park(void(*unlockf)(Lock*), Lock *lock, int8 *reason)
  1171	{
  1172		m->waitlock = lock;
  1173		m->waitunlockf = unlockf;
  1174		g->waitreason = reason;
  1175		runtime·mcall(park0);
  1176	}
  1177	
  1178	// runtime·park continuation on g0.
  1179	static void
  1180	park0(G *gp)
  1181	{
  1182		gp->status = Gwaiting;
  1183		gp->m = nil;
  1184		m->curg = nil;
  1185		if(m->waitunlockf) {
  1186			m->waitunlockf(m->waitlock);
  1187			m->waitunlockf = nil;
  1188			m->waitlock = nil;
  1189		}
  1190		if(m->lockedg) {
  1191			stoplockedm();
  1192			execute(gp);  // Never returns.
  1193		}
  1194		schedule();
  1195	}
  1196	
  1197	// Scheduler yield.
  1198	void
  1199	runtime·gosched(void)
  1200	{
  1201		runtime·mcall(gosched0);
  1202	}
  1203	
  1204	// runtime·gosched continuation on g0.
  1205	static void
  1206	gosched0(G *gp)
  1207	{
  1208		gp->status = Grunnable;
  1209		gp->m = nil;
  1210		m->curg = nil;
  1211		runtime·lock(&runtime·sched);
  1212		globrunqput(gp);
  1213		runtime·unlock(&runtime·sched);
  1214		if(m->lockedg) {
  1215			stoplockedm();
  1216			execute(gp);  // Never returns.
  1217		}
  1218		schedule();
  1219	}
  1220	
  1221	// Finishes execution of the current goroutine.
  1222	void
  1223	runtime·goexit(void)
  1224	{
  1225		if(raceenabled)
  1226			runtime·racegoend();
  1227		runtime·mcall(goexit0);
  1228	}
  1229	
  1230	// runtime·goexit continuation on g0.
  1231	static void
  1232	goexit0(G *gp)
  1233	{
  1234		gp->status = Gdead;
  1235		gp->m = nil;
  1236		gp->lockedm = nil;
  1237		m->curg = nil;
  1238		m->lockedg = nil;
  1239		if(m->locked & ~LockExternal) {
  1240			runtime·printf("invalid m->locked = %d", m->locked);
  1241			runtime·throw("internal lockOSThread error");
  1242		}	
  1243		m->locked = 0;
  1244		runtime·unwindstack(gp, nil);
  1245		gfput(m->p, gp);
  1246		schedule();
  1247	}
  1248	
  1249	// The goroutine g is about to enter a system call.
  1250	// Record that it's not using the cpu anymore.
  1251	// This is called only from the go syscall library and cgocall,
  1252	// not from the low-level system calls used by the runtime.
  1253	//
  1254	// Entersyscall cannot split the stack: the runtime·gosave must
  1255	// make g->sched refer to the caller's stack segment, because
  1256	// entersyscall is going to return immediately after.
  1257	#pragma textflag 7
  1258	void
  1259	·entersyscall(int32 dummy)
  1260	{
  1261		if(m->profilehz > 0)
  1262			runtime·setprof(false);
  1263	
  1264		// Leave SP around for gc and traceback.
  1265		g->sched.sp = (uintptr)runtime·getcallersp(&dummy);
  1266		g->sched.pc = runtime·getcallerpc(&dummy);
  1267		g->sched.g = g;
  1268		g->gcsp = g->sched.sp;
  1269		g->gcpc = g->sched.pc;
  1270		g->gcstack = g->stackbase;
  1271		g->gcguard = g->stackguard;
  1272		g->status = Gsyscall;
  1273		if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) {
  1274			// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
  1275			//	g->gcsp, g->gcguard-StackGuard, g->gcstack);
  1276			runtime·throw("entersyscall");
  1277		}
  1278	
  1279		if(runtime·atomicload(&runtime·sched.sysmonwait)) {  // TODO: fast atomic
  1280			runtime·lock(&runtime·sched);
  1281			if(runtime·atomicload(&runtime·sched.sysmonwait)) {
  1282				runtime·atomicstore(&runtime·sched.sysmonwait, 0);
  1283				runtime·notewakeup(&runtime·sched.sysmonnote);
  1284			}
  1285			runtime·unlock(&runtime·sched);
  1286			runtime·gosave(&g->sched);  // re-save for traceback
  1287		}
  1288	
  1289		m->mcache = nil;
  1290		m->p->tick++;
  1291		m->p->m = nil;
  1292		runtime·atomicstore(&m->p->status, Psyscall);
  1293		if(runtime·gcwaiting) {
  1294			runtime·lock(&runtime·sched);
  1295			if (runtime·sched.stopwait > 0 && runtime·cas(&m->p->status, Psyscall, Pgcstop)) {
  1296				if(--runtime·sched.stopwait == 0)
  1297					runtime·notewakeup(&runtime·sched.stopnote);
  1298			}
  1299			runtime·unlock(&runtime·sched);
  1300			runtime·gosave(&g->sched);  // re-save for traceback
  1301		}
  1302	}
  1303	
  1304	// The same as runtime·entersyscall(), but with a hint that the syscall is blocking.
  1305	#pragma textflag 7
  1306	void
  1307	·entersyscallblock(int32 dummy)
  1308	{
  1309		P *p;
  1310	
  1311		if(m->profilehz > 0)
  1312			runtime·setprof(false);
  1313	
  1314		// Leave SP around for gc and traceback.
  1315		g->sched.sp = (uintptr)runtime·getcallersp(&dummy);
  1316		g->sched.pc = runtime·getcallerpc(&dummy);
  1317		g->sched.g = g;
  1318		g->gcsp = g->sched.sp;
  1319		g->gcpc = g->sched.pc;
  1320		g->gcstack = g->stackbase;
  1321		g->gcguard = g->stackguard;
  1322		g->status = Gsyscall;
  1323		if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) {
  1324			// runtime·printf("entersyscallblock inconsistent %p [%p,%p]\n",
  1325			//	g->gcsp, g->gcguard-StackGuard, g->gcstack);
  1326			runtime·throw("entersyscallblock");
  1327		}
  1328	
  1329		p = releasep();
  1330		handoffp(p);
  1331		if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
  1332			inclocked(1);
  1333		runtime·gosave(&g->sched);  // re-save for traceback
  1334	}
  1335	
  1336	// The goroutine g exited its system call.
  1337	// Arrange for it to run on a cpu again.
  1338	// This is called only from the go syscall library, not
  1339	// from the low-level system calls used by the runtime.
  1340	void
  1341	runtime·exitsyscall(void)
  1342	{
  1343		P *p;
  1344	
  1345		// Check whether the profiler needs to be turned on.
  1346		if(m->profilehz > 0)
  1347			runtime·setprof(true);
  1348	
  1349		// Try to re-acquire the last P.
  1350		if(m->p && m->p->status == Psyscall && runtime·cas(&m->p->status, Psyscall, Prunning)) {
  1351			// There's a cpu for us, so we can run.
  1352			m->mcache = m->p->mcache;
  1353			m->p->m = m;
  1354			m->p->tick++;
  1355			g->status = Grunning;
  1356			// Garbage collector isn't running (since we are),
  1357			// so okay to clear gcstack and gcsp.
  1358			g->gcstack = (uintptr)nil;
  1359			g->gcsp = (uintptr)nil;
  1360			return;
  1361		}
  1362	
  1363		if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
  1364			inclocked(-1);
  1365		// Try to get any other idle P.
  1366		m->p = nil;
  1367		if(runtime·sched.pidle) {
  1368			runtime·lock(&runtime·sched);
  1369			p = pidleget();
  1370			runtime·unlock(&runtime·sched);
  1371			if(p) {
  1372				acquirep(p);
  1373				g->gcstack = (uintptr)nil;
  1374				g->gcsp = (uintptr)nil;
  1375				return;
  1376			}
  1377		}
  1378	
  1379		// Call the scheduler.
  1380		runtime·mcall(exitsyscall0);
  1381	
  1382		// Scheduler returned, so we're allowed to run now.
  1383		// Delete the gcstack information that we left for
  1384		// the garbage collector during the system call.
  1385		// Must wait until now because until gosched returns
  1386		// we don't know for sure that the garbage collector
  1387		// is not running.
  1388		g->gcstack = (uintptr)nil;
  1389		g->gcsp = (uintptr)nil;
  1390	}
  1391	
  1392	// runtime·exitsyscall slow path on g0.
  1393	// Failed to acquire P, enqueue gp as runnable.
  1394	static void
  1395	exitsyscall0(G *gp)
  1396	{
  1397		P *p;
  1398	
  1399		gp->status = Grunnable;
  1400		gp->m = nil;
  1401		m->curg = nil;
  1402		runtime·lock(&runtime·sched);
  1403		p = pidleget();
  1404		if(p == nil)
  1405			globrunqput(gp);
  1406		runtime·unlock(&runtime·sched);
  1407		if(p) {
  1408			acquirep(p);
  1409			execute(gp);  // Never returns.
  1410		}
  1411		if(m->lockedg) {
  1412			// Wait until another thread schedules gp and so m again.
  1413			stoplockedm();
  1414			execute(gp);  // Never returns.
  1415		}
  1416		stopm();
  1417		schedule();  // Never returns.
  1418	}
  1419	
  1420	// Hook used by runtime·malg to call runtime·stackalloc on the
  1421	// scheduler stack.  This exists because runtime·stackalloc insists
  1422	// on being called on the scheduler stack, to avoid trying to grow
  1423	// the stack while allocating a new stack segment.
  1424	static void
  1425	mstackalloc(G *gp)
  1426	{
  1427		gp->param = runtime·stackalloc((uintptr)gp->param);
  1428		runtime·gogo(&gp->sched, 0);
  1429	}
  1430	
  1431	// Allocate a new g, with a stack big enough for stacksize bytes.
  1432	G*
  1433	runtime·malg(int32 stacksize)
  1434	{
  1435		G *newg;
  1436		byte *stk;
  1437	
  1438		if(StackTop < sizeof(Stktop)) {
  1439			runtime·printf("runtime: SizeofStktop=%d, should be >=%d\n", (int32)StackTop, (int32)sizeof(Stktop));
  1440			runtime·throw("runtime: bad stack.h");
  1441		}
  1442	
  1443		newg = runtime·malloc(sizeof(G));
  1444		if(stacksize >= 0) {
  1445			if(g == m->g0) {
  1446				// running on scheduler stack already.
  1447				stk = runtime·stackalloc(StackSystem + stacksize);
  1448			} else {
  1449				// have to call stackalloc on scheduler stack.
  1450				g->param = (void*)(StackSystem + stacksize);
  1451				runtime·mcall(mstackalloc);
  1452				stk = g->param;
  1453				g->param = nil;
  1454			}
  1455			newg->stack0 = (uintptr)stk;
  1456			newg->stackguard = (uintptr)stk + StackGuard;
  1457			newg->stackbase = (uintptr)stk + StackSystem + stacksize - sizeof(Stktop);
  1458			runtime·memclr((byte*)newg->stackbase, sizeof(Stktop));
  1459		}
  1460		return newg;
  1461	}
  1462	
  1463	// Create a new g running fn with siz bytes of arguments.
  1464	// Put it on the queue of g's waiting to run.
  1465	// The compiler turns a go statement into a call to this.
  1466	// Cannot split the stack because it assumes that the arguments
  1467	// are available sequentially after &fn; they would not be
  1468	// copied if a stack split occurred.  It's OK for this to call
  1469	// functions that split the stack.
  1470	#pragma textflag 7
  1471	void
  1472	runtime·newproc(int32 siz, FuncVal* fn, ...)
  1473	{
  1474		byte *argp;
  1475	
  1476		if(thechar == '5')
  1477			argp = (byte*)(&fn+2);  // skip caller's saved LR
  1478		else
  1479			argp = (byte*)(&fn+1);
  1480		runtime·newproc1(fn, argp, siz, 0, runtime·getcallerpc(&siz));
  1481	}
  1482	
  1483	// Create a new g running fn with narg bytes of arguments starting
  1484	// at argp and returning nret bytes of results.  callerpc is the
  1485	// address of the go statement that created this.  The new g is put
  1486	// on the queue of g's waiting to run.
  1487	G*
  1488	runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
  1489	{
  1490		byte *sp;
  1491		G *newg;
  1492		int32 siz;
  1493	
  1494	//printf("newproc1 %p %p narg=%d nret=%d\n", fn, argp, narg, nret);
  1495		siz = narg + nret;
  1496		siz = (siz+7) & ~7;
  1497	
  1498		// We could instead create a secondary stack frame
  1499		// and make it look like goexit was on the original but
  1500		// the call to the actual goroutine function was split.
  1501		// Not worth it: this is almost always an error.
  1502		if(siz > StackMin - 1024)
  1503			runtime·throw("runtime.newproc: function arguments too large for new goroutine");
  1504	
  1505		if((newg = gfget(m->p)) != nil) {
  1506			if(newg->stackguard - StackGuard != newg->stack0)
  1507				runtime·throw("invalid stack in newg");
  1508		} else {
  1509			newg = runtime·malg(StackMin);
  1510			runtime·lock(&runtime·sched);
  1511			if(runtime·lastg == nil)
  1512				runtime·allg = newg;
  1513			else
  1514				runtime·lastg->alllink = newg;
  1515			runtime·lastg = newg;
  1516			runtime·unlock(&runtime·sched);
  1517		}
  1518	
  1519		sp = (byte*)newg->stackbase;
  1520		sp -= siz;
  1521		runtime·memmove(sp, argp, narg);
  1522		if(thechar == '5') {
  1523			// caller's LR
  1524			sp -= sizeof(void*);
  1525			*(void**)sp = nil;
  1526		}
  1527	
  1528		newg->sched.sp = (uintptr)sp;
  1529		newg->sched.pc = (byte*)runtime·goexit;
  1530		newg->sched.g = newg;
  1531		newg->fnstart = fn;
  1532		newg->gopc = (uintptr)callerpc;
  1533		newg->status = Grunnable;
  1534		newg->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
  1535		if(raceenabled)
  1536			newg->racectx = runtime·racegostart(callerpc);
  1537		runqput(m->p, newg);
  1538	
  1539		if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main)  // TODO: fast atomic
  1540			wakep();
  1541		return newg;
  1542	}
  1543	
  1544	// Put on gfree list.
  1545	// If local list is too long, transfer a batch to the global list.
  1546	static void
  1547	gfput(P *p, G *gp)
  1548	{
  1549		if(gp->stackguard - StackGuard != gp->stack0)
  1550			runtime·throw("invalid stack in gfput");
  1551		gp->schedlink = p->gfree;
  1552		p->gfree = gp;
  1553		p->gfreecnt++;
  1554		if(p->gfreecnt >= 64) {
  1555			runtime·lock(&runtime·sched.gflock);
  1556			while(p->gfreecnt >= 32) {
  1557				p->gfreecnt--;
  1558				gp = p->gfree;
  1559				p->gfree = gp->schedlink;
  1560				gp->schedlink = runtime·sched.gfree;
  1561				runtime·sched.gfree = gp;
  1562			}
  1563			runtime·unlock(&runtime·sched.gflock);
  1564		}
  1565	}
  1566	
  1567	// Get from gfree list.
  1568	// If local list is empty, grab a batch from global list.
  1569	static G*
  1570	gfget(P *p)
  1571	{
  1572		G *gp;
  1573	
  1574	retry:
  1575		gp = p->gfree;
  1576		if(gp == nil && runtime·sched.gfree) {
  1577			runtime·lock(&runtime·sched.gflock);
  1578			while(p->gfreecnt < 32 && runtime·sched.gfree) {
  1579				p->gfreecnt++;
  1580				gp = runtime·sched.gfree;
  1581				runtime·sched.gfree = gp->schedlink;
  1582				gp->schedlink = p->gfree;
  1583				p->gfree = gp;
  1584			}
  1585			runtime·unlock(&runtime·sched.gflock);
  1586			goto retry;
  1587		}
  1588		if(gp) {
  1589			p->gfree = gp->schedlink;
  1590			p->gfreecnt--;
  1591		}
  1592		return gp;
  1593	}
  1594	
  1595	// Purge all cached G's from gfree list to the global list.
  1596	static void
  1597	gfpurge(P *p)
  1598	{
  1599		G *gp;
  1600	
  1601		runtime·lock(&runtime·sched.gflock);
  1602		while(p->gfreecnt) {
  1603			p->gfreecnt--;
  1604			gp = p->gfree;
  1605			p->gfree = gp->schedlink;
  1606			gp->schedlink = runtime·sched.gfree;
  1607			runtime·sched.gfree = gp;
  1608		}
  1609		runtime·unlock(&runtime·sched.gflock);
  1610	}
  1611	
  1612	void
  1613	runtime·Breakpoint(void)
  1614	{
  1615		runtime·breakpoint();
  1616	}
  1617	
  1618	void
  1619	runtime·Gosched(void)
  1620	{
  1621		runtime·gosched();
  1622	}
  1623	
  1624	// Implementation of runtime.GOMAXPROCS.
  1625	// delete when scheduler is even stronger
  1626	int32
  1627	runtime·gomaxprocsfunc(int32 n)
  1628	{
  1629		int32 ret;
  1630	
  1631		if(n > MaxGomaxprocs)
  1632			n = MaxGomaxprocs;
  1633		runtime·lock(&runtime·sched);
  1634		ret = runtime·gomaxprocs;
  1635		if(n <= 0 || n == ret) {
  1636			runtime·unlock(&runtime·sched);
  1637			return ret;
  1638		}
  1639		runtime·unlock(&runtime·sched);
  1640	
  1641		runtime·semacquire(&runtime·worldsema);
  1642		m->gcing = 1;
  1643		runtime·stoptheworld();
  1644		newprocs = n;
  1645		m->gcing = 0;
  1646		runtime·semrelease(&runtime·worldsema);
  1647		runtime·starttheworld();
  1648	
  1649		return ret;
  1650	}
  1651	
  1652	static void
  1653	LockOSThread(void)
  1654	{
  1655		m->lockedg = g;
  1656		g->lockedm = m;
  1657	}
  1658	
  1659	void
  1660	runtime·LockOSThread(void)
  1661	{
  1662		m->locked |= LockExternal;
  1663		LockOSThread();
  1664	}
  1665	
  1666	void
  1667	runtime·lockOSThread(void)
  1668	{
  1669		m->locked += LockInternal;
  1670		LockOSThread();
  1671	}
  1672	
  1673	static void
  1674	UnlockOSThread(void)
  1675	{
  1676		if(m->locked != 0)
  1677			return;
  1678		m->lockedg = nil;
  1679		g->lockedm = nil;
  1680	}
  1681	
  1682	void
  1683	runtime·UnlockOSThread(void)
  1684	{
  1685		m->locked &= ~LockExternal;
  1686		UnlockOSThread();
  1687	}
  1688	
  1689	void
  1690	runtime·unlockOSThread(void)
  1691	{
  1692		if(m->locked < LockInternal)
  1693			runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
  1694		m->locked -= LockInternal;
  1695		UnlockOSThread();
  1696	}
  1697	
  1698	bool
  1699	runtime·lockedOSThread(void)
  1700	{
  1701		return g->lockedm != nil && m->lockedg != nil;
  1702	}
  1703	
  1704	// for testing of callbacks
  1705	void
  1706	runtime·golockedOSThread(bool ret)
  1707	{
  1708		ret = runtime·lockedOSThread();
  1709		FLUSH(&ret);
  1710	}
  1711	
  1712	// for testing of wire, unwire
  1713	void
  1714	runtime·mid(uint32 ret)
  1715	{
  1716		ret = m->id;
  1717		FLUSH(&ret);
  1718	}
  1719	
  1720	void
  1721	runtime·NumGoroutine(intgo ret)
  1722	{
  1723		ret = runtime·gcount();
  1724		FLUSH(&ret);
  1725	}
  1726	
  1727	int32
  1728	runtime·gcount(void)
  1729	{
  1730		G *gp;
  1731		int32 n, s;
  1732	
  1733		n = 0;
  1734		runtime·lock(&runtime·sched);
  1735		// TODO(dvyukov): runtime.NumGoroutine() is O(N).
  1736		// We do not want to increment/decrement centralized counter in newproc/goexit,
  1737		// just to make runtime.NumGoroutine() faster.
  1738		// Compromise solution is to introduce per-P counters of active goroutines.
  1739		for(gp = runtime·allg; gp; gp = gp->alllink) {
  1740			s = gp->status;
  1741			if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
  1742				n++;
  1743		}
  1744		runtime·unlock(&runtime·sched);
  1745		return n;
  1746	}
  1747	
  1748	int32
  1749	runtime·mcount(void)
  1750	{
  1751		return runtime·sched.mcount;
  1752	}
  1753	
  1754	void
  1755	runtime·badmcall(void)  // called from assembly
  1756	{
  1757		runtime·throw("runtime: mcall called on m->g0 stack");
  1758	}
  1759	
  1760	void
  1761	runtime·badmcall2(void)  // called from assembly
  1762	{
  1763		runtime·throw("runtime: mcall function returned");
  1764	}
  1765	
  1766	static struct {
  1767		Lock;
  1768		void (*fn)(uintptr*, int32);
  1769		int32 hz;
  1770		uintptr pcbuf[100];
  1771	} prof;
  1772	
  1773	// Called if we receive a SIGPROF signal.
  1774	void
  1775	runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp)
  1776	{
  1777		int32 n;
  1778	
  1779		// Windows does profiling in a dedicated thread w/o m.
  1780		if(!Windows && (m == nil || m->mcache == nil))
  1781			return;
  1782		if(prof.fn == nil || prof.hz == 0)
  1783			return;
  1784	
  1785		runtime·lock(&prof);
  1786		if(prof.fn == nil) {
  1787			runtime·unlock(&prof);
  1788			return;
  1789		}
  1790		n = runtime·gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil);
  1791		if(n > 0)
  1792			prof.fn(prof.pcbuf, n);
  1793		runtime·unlock(&prof);
  1794	}
  1795	
  1796	// Arrange to call fn with a traceback hz times a second.
  1797	void
  1798	runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
  1799	{
  1800		// Force sane arguments.
  1801		if(hz < 0)
  1802			hz = 0;
  1803		if(hz == 0)
  1804			fn = nil;
  1805		if(fn == nil)
  1806			hz = 0;
  1807	
  1808		// Stop profiler on this cpu so that it is safe to lock prof.
  1809		// if a profiling signal came in while we had prof locked,
  1810		// it would deadlock.
  1811		runtime·resetcpuprofiler(0);
  1812	
  1813		runtime·lock(&prof);
  1814		prof.fn = fn;
  1815		prof.hz = hz;
  1816		runtime·unlock(&prof);
  1817		runtime·lock(&runtime·sched);
  1818		runtime·sched.profilehz = hz;
  1819		runtime·unlock(&runtime·sched);
  1820	
  1821		if(hz != 0)
  1822			runtime·resetcpuprofiler(hz);
  1823	}
  1824	
  1825	// Change number of processors.  The world is stopped, sched is locked.
  1826	static void
  1827	procresize(int32 new)
  1828	{
  1829		int32 i, old;
  1830		G *gp;
  1831		P *p;
  1832	
  1833		old = runtime·gomaxprocs;
  1834		if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
  1835			runtime·throw("procresize: invalid arg");
  1836		// initialize new P's
  1837		for(i = 0; i < new; i++) {
  1838			p = runtime·allp[i];
  1839			if(p == nil) {
  1840				p = (P*)runtime·mallocgc(sizeof(*p), 0, 0, 1);
  1841				p->status = Pgcstop;
  1842				runtime·atomicstorep(&runtime·allp[i], p);
  1843			}
  1844			if(p->mcache == nil) {
  1845				if(old==0 && i==0)
  1846					p->mcache = m->mcache;  // bootstrap
  1847				else
  1848					p->mcache = runtime·allocmcache();
  1849			}
  1850			if(p->runq == nil) {
  1851				p->runqsize = 128;
  1852				p->runq = (G**)runtime·mallocgc(p->runqsize*sizeof(G*), 0, 0, 1);
  1853			}
  1854		}
  1855	
  1856		// redistribute runnable G's evenly
  1857		for(i = 0; i < old; i++) {
  1858			p = runtime·allp[i];
  1859			while(gp = runqget(p))
  1860				globrunqput(gp);
  1861		}
  1862		// start at 1 because current M already executes some G and will acquire allp[0] below,
  1863		// so if we have a spare G we want to put it into allp[1].
  1864		for(i = 1; runtime·sched.runqhead; i++) {
  1865			gp = runtime·sched.runqhead;
  1866			runtime·sched.runqhead = gp->schedlink;
  1867			runqput(runtime·allp[i%new], gp);
  1868		}
  1869		runtime·sched.runqtail = nil;
  1870		runtime·sched.runqsize = 0;
  1871	
  1872		// free unused P's
  1873		for(i = new; i < old; i++) {
  1874			p = runtime·allp[i];
  1875			runtime·freemcache(p->mcache);
  1876			p->mcache = nil;
  1877			gfpurge(p);
  1878			p->status = Pdead;
  1879			// can't free P itself because it can be referenced by an M in syscall
  1880		}
  1881	
  1882		if(m->p)
  1883			m->p->m = nil;
  1884		m->p = nil;
  1885		m->mcache = nil;
  1886		p = runtime·allp[0];
  1887		p->m = nil;
  1888		p->status = Pidle;
  1889		acquirep(p);
  1890		for(i = new-1; i > 0; i--) {
  1891			p = runtime·allp[i];
  1892			p->status = Pidle;
  1893			pidleput(p);
  1894		}
  1895		runtime·singleproc = new == 1;
  1896		runtime·atomicstore((uint32*)&runtime·gomaxprocs, new);
  1897	}
  1898	
  1899	// Associate p and the current m.
  1900	static void
  1901	acquirep(P *p)
  1902	{
  1903		if(m->p || m->mcache)
  1904			runtime·throw("acquirep: already in go");
  1905		if(p->m || p->status != Pidle) {
  1906			runtime·printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
  1907			runtime·throw("acquirep: invalid p state");
  1908		}
  1909		m->mcache = p->mcache;
  1910		m->p = p;
  1911		p->m = m;
  1912		p->status = Prunning;
  1913	}
  1914	
  1915	// Disassociate p and the current m.
  1916	static P*
  1917	releasep(void)
  1918	{
  1919		P *p;
  1920	
  1921		if(m->p == nil || m->mcache == nil)
  1922			runtime·throw("releasep: invalid arg");
  1923		p = m->p;
  1924		if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
  1925			runtime·printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
  1926				m, m->p, p->m, m->mcache, p->mcache, p->status);
  1927			runtime·throw("releasep: invalid p state");
  1928		}
  1929		m->p = nil;
  1930		m->mcache = nil;
  1931		p->m = nil;
  1932		p->status = Pidle;
  1933		return p;
  1934	}
  1935	
  1936	static void
  1937	inclocked(int32 v)
  1938	{
  1939		runtime·lock(&runtime·sched);
  1940		runtime·sched.mlocked += v;
  1941		if(v > 0)
  1942			checkdead();
  1943		runtime·unlock(&runtime·sched);
  1944	}
  1945	
  1946	// Check for deadlock situation.
  1947	// The check is based on number of running M's, if 0 -> deadlock.
  1948	static void
  1949	checkdead(void)
  1950	{
  1951		G *gp;
  1952		int32 run, grunning, s;
  1953	
  1954		// -1 for sysmon
  1955		run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.mlocked - 1;
  1956		if(run > 0)
  1957			return;
  1958		if(run < 0) {
  1959			runtime·printf("checkdead: nmidle=%d mlocked=%d mcount=%d\n",
  1960				runtime·sched.nmidle, runtime·sched.mlocked, runtime·sched.mcount);
  1961			runtime·throw("checkdead: inconsistent counts");
  1962		}
  1963		grunning = 0;
  1964		for(gp = runtime·allg; gp; gp = gp->alllink) {
  1965			if(gp->isbackground)
  1966				continue;
  1967			s = gp->status;
  1968			if(s == Gwaiting)
  1969				grunning++;
  1970			else if(s == Grunnable || s == Grunning || s == Gsyscall) {
  1971				runtime·printf("checkdead: find g %D in status %d\n", gp->goid, s);
  1972				runtime·throw("checkdead: runnable g");
  1973			}
  1974		}
  1975		if(grunning == 0)  // possible if main goroutine calls runtime·Goexit()
  1976			runtime·exit(0);
  1977		m->throwing = -1;  // do not dump full stacks
  1978		runtime·throw("all goroutines are asleep - deadlock!");
  1979	}
  1980	
  1981	static void
  1982	sysmon(void)
  1983	{
  1984		uint32 idle, delay;
  1985		int64 now, lastpoll;
  1986		G *gp;
  1987		uint32 ticks[MaxGomaxprocs];
  1988	
  1989		idle = 0;  // how many cycles in succession we had not wokeup somebody
  1990		delay = 0;
  1991		for(;;) {
  1992			if(idle == 0)  // start with 20us sleep...
  1993				delay = 20;
  1994			else if(idle > 50)  // start doubling the sleep after 1ms...
  1995				delay *= 2;
  1996			if(delay > 10*1000)  // up to 10ms
  1997				delay = 10*1000;
  1998			runtime·usleep(delay);
  1999			if(runtime·gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {  // TODO: fast atomic
  2000				runtime·lock(&runtime·sched);
  2001				if(runtime·atomicload(&runtime·gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {
  2002					runtime·atomicstore(&runtime·sched.sysmonwait, 1);
  2003					runtime·unlock(&runtime·sched);
  2004					runtime·notesleep(&runtime·sched.sysmonnote);
  2005					runtime·noteclear(&runtime·sched.sysmonnote);
  2006					idle = 0;
  2007					delay = 20;
  2008				} else
  2009					runtime·unlock(&runtime·sched);
  2010			}
  2011			// poll network if not polled for more than 10ms
  2012			lastpoll = runtime·atomicload64(&runtime·sched.lastpoll);
  2013			now = runtime·nanotime();
  2014			if(lastpoll != 0 && lastpoll + 10*1000*1000 > now) {
  2015				gp = runtime·netpoll(false);  // non-blocking
  2016				injectglist(gp);
  2017			}
  2018			// retake P's blocked in syscalls
  2019			if(retake(ticks))
  2020				idle = 0;
  2021			else
  2022				idle++;
  2023		}
  2024	}
  2025	
  2026	static uint32
  2027	retake(uint32 *ticks)
  2028	{
  2029		uint32 i, s, n;
  2030		int64 t;
  2031		P *p;
  2032	
  2033		n = 0;
  2034		for(i = 0; i < runtime·gomaxprocs; i++) {
  2035			p = runtime·allp[i];
  2036			if(p==nil)
  2037				continue;
  2038			t = p->tick;
  2039			if(ticks[i] != t) {
  2040				ticks[i] = t;
  2041				continue;
  2042			}
  2043			s = p->status;
  2044			if(s != Psyscall)
  2045				continue;
  2046			if(p->runqhead == p->runqtail && runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0)  // TODO: fast atomic
  2047				continue;
  2048			// Need to increment number of locked M's before the CAS.
  2049			// Otherwise the M from which we retake can exit the syscall,
  2050			// increment nmidle and report deadlock.
  2051			inclocked(-1);
  2052			if(runtime·cas(&p->status, s, Pidle)) {
  2053				n++;
  2054				handoffp(p);
  2055			}
  2056			inclocked(1);
  2057		}
  2058		return n;
  2059	}
  2060	
  2061	// Put mp on midle list.
  2062	// Sched must be locked.
  2063	static void
  2064	mput(M *mp)
  2065	{
  2066		mp->schedlink = runtime·sched.midle;
  2067		runtime·sched.midle = mp;
  2068		runtime·sched.nmidle++;
  2069		checkdead();
  2070	}
  2071	
  2072	// Try to get an m from midle list.
  2073	// Sched must be locked.
  2074	static M*
  2075	mget(void)
  2076	{
  2077		M *mp;
  2078	
  2079		if((mp = runtime·sched.midle) != nil){
  2080			runtime·sched.midle = mp->schedlink;
  2081			runtime·sched.nmidle--;
  2082		}
  2083		return mp;
  2084	}
  2085	
  2086	// Put gp on the global runnable queue.
  2087	// Sched must be locked.
  2088	static void
  2089	globrunqput(G *gp)
  2090	{
  2091		gp->schedlink = nil;
  2092		if(runtime·sched.runqtail)
  2093			runtime·sched.runqtail->schedlink = gp;
  2094		else
  2095			runtime·sched.runqhead = gp;
  2096		runtime·sched.runqtail = gp;
  2097		runtime·sched.runqsize++;
  2098	}
  2099	
  2100	// Try get a batch of G's from the global runnable queue.
  2101	// Sched must be locked.
  2102	static G*
  2103	globrunqget(P *p)
  2104	{
  2105		G *gp, *gp1;
  2106		int32 n;
  2107	
  2108		if(runtime·sched.runqsize == 0)
  2109			return nil;
  2110		n = runtime·sched.runqsize/runtime·gomaxprocs+1;
  2111		if(n > runtime·sched.runqsize)
  2112			n = runtime·sched.runqsize;
  2113		runtime·sched.runqsize -= n;
  2114		if(runtime·sched.runqsize == 0)
  2115			runtime·sched.runqtail = nil;
  2116		gp = runtime·sched.runqhead;
  2117		runtime·sched.runqhead = gp->schedlink;
  2118		n--;
  2119		while(n--) {
  2120			gp1 = runtime·sched.runqhead;
  2121			runtime·sched.runqhead = gp1->schedlink;
  2122			runqput(p, gp1);
  2123		}
  2124		return gp;
  2125	}
  2126	
  2127	// Put p to on pidle list.
  2128	// Sched must be locked.
  2129	static void
  2130	pidleput(P *p)
  2131	{
  2132		p->link = runtime·sched.pidle;
  2133		runtime·sched.pidle = p;
  2134		runtime·xadd(&runtime·sched.npidle, 1);  // TODO: fast atomic
  2135	}
  2136	
  2137	// Try get a p from pidle list.
  2138	// Sched must be locked.
  2139	static P*
  2140	pidleget(void)
  2141	{
  2142		P *p;
  2143	
  2144		p = runtime·sched.pidle;
  2145		if(p) {
  2146			runtime·sched.pidle = p->link;
  2147			runtime·xadd(&runtime·sched.npidle, -1);  // TODO: fast atomic
  2148		}
  2149		return p;
  2150	}
  2151	
  2152	// Put g on local runnable queue.
  2153	// TODO(dvyukov): consider using lock-free queue.
  2154	static void
  2155	runqput(P *p, G *gp)
  2156	{
  2157		int32 h, t, s;
  2158	
  2159		runtime·lock(p);
  2160	retry:
  2161		h = p->runqhead;
  2162		t = p->runqtail;
  2163		s = p->runqsize;
  2164		if(t == h-1 || (h == 0 && t == s-1)) {
  2165			runqgrow(p);
  2166			goto retry;
  2167		}
  2168		p->runq[t++] = gp;
  2169		if(t == s)
  2170			t = 0;
  2171		p->runqtail = t;
  2172		runtime·unlock(p);
  2173	}
  2174	
  2175	// Get g from local runnable queue.
  2176	static G*
  2177	runqget(P *p)
  2178	{
  2179		G *gp;
  2180		int32 t, h, s;
  2181	
  2182		if(p->runqhead == p->runqtail)
  2183			return nil;
  2184		runtime·lock(p);
  2185		h = p->runqhead;
  2186		t = p->runqtail;
  2187		s = p->runqsize;
  2188		if(t == h) {
  2189			runtime·unlock(p);
  2190			return nil;
  2191		}
  2192		gp = p->runq[h++];
  2193		if(h == s)
  2194			h = 0;
  2195		p->runqhead = h;
  2196		runtime·unlock(p);
  2197		return gp;
  2198	}
  2199	
  2200	// Grow local runnable queue.
  2201	// TODO(dvyukov): consider using fixed-size array
  2202	// and transfer excess to the global list (local queue can grow way too big).
  2203	static void
  2204	runqgrow(P *p)
  2205	{
  2206		G **q;
  2207		int32 s, t, h, t2;
  2208	
  2209		h = p->runqhead;
  2210		t = p->runqtail;
  2211		s = p->runqsize;
  2212		t2 = 0;
  2213		q = runtime·malloc(2*s*sizeof(*q));
  2214		while(t != h) {
  2215			q[t2++] = p->runq[h++];
  2216			if(h == s)
  2217				h = 0;
  2218		}
  2219		runtime·free(p->runq);
  2220		p->runq = q;
  2221		p->runqhead = 0;
  2222		p->runqtail = t2;
  2223		p->runqsize = 2*s;
  2224	}
  2225	
  2226	// Steal half of elements from local runnable queue of p2
  2227	// and put onto local runnable queue of p.
  2228	// Returns one of the stolen elements (or nil if failed).
  2229	static G*
  2230	runqsteal(P *p, P *p2)
  2231	{
  2232		G *gp, *gp1;
  2233		int32 t, h, s, t2, h2, s2, c, i;
  2234	
  2235		if(p2->runqhead == p2->runqtail)
  2236			return nil;
  2237		// sort locks to prevent deadlocks
  2238		if(p < p2)
  2239			runtime·lock(p);
  2240		runtime·lock(p2);
  2241		if(p2->runqhead == p2->runqtail) {
  2242			runtime·unlock(p2);
  2243			if(p < p2)
  2244				runtime·unlock(p);
  2245			return nil;
  2246		}
  2247		if(p >= p2)
  2248			runtime·lock(p);
  2249		// now we've locked both queues and know the victim is not empty
  2250		h = p->runqhead;
  2251		t = p->runqtail;
  2252		s = p->runqsize;
  2253		h2 = p2->runqhead;
  2254		t2 = p2->runqtail;
  2255		s2 = p2->runqsize;
  2256		gp = p2->runq[h2++];  // return value
  2257		if(h2 == s2)
  2258			h2 = 0;
  2259		// steal roughly half
  2260		if(t2 > h2)
  2261			c = (t2 - h2) / 2;
  2262		else
  2263			c = (s2 - h2 + t2) / 2;
  2264		// copy
  2265		for(i = 0; i != c; i++) {
  2266			// the target queue is full?
  2267			if(t == h-1 || (h == 0 && t == s-1))
  2268				break;
  2269			// the victim queue is empty?
  2270			if(t2 == h2)
  2271				break;
  2272			gp1 = p2->runq[h2++];
  2273			if(h2 == s2)
  2274				h2 = 0;
  2275			p->runq[t++] = gp1;
  2276			if(t == s)
  2277				t = 0;
  2278		}
  2279		p->runqtail = t;
  2280		p2->runqhead = h2;
  2281		runtime·unlock(p2);
  2282		runtime·unlock(p);
  2283		return gp;
  2284	}
  2285	
  2286	void
  2287	runtime·testSchedLocalQueue(void)
  2288	{
  2289		P p;
  2290		G gs[1000];
  2291		int32 i, j;
  2292	
  2293		runtime·memclr((byte*)&p, sizeof(p));
  2294		p.runqsize = 1;
  2295		p.runqhead = 0;
  2296		p.runqtail = 0;
  2297		p.runq = runtime·malloc(p.runqsize*sizeof(*p.runq));
  2298	
  2299		for(i = 0; i < nelem(gs); i++) {
  2300			if(runqget(&p) != nil)
  2301				runtime·throw("runq is not empty initially");
  2302			for(j = 0; j < i; j++)
  2303				runqput(&p, &gs[i]);
  2304			for(j = 0; j < i; j++) {
  2305				if(runqget(&p) != &gs[i]) {
  2306					runtime·printf("bad element at iter %d/%d\n", i, j);
  2307					runtime·throw("bad element");
  2308				}
  2309			}
  2310			if(runqget(&p) != nil)
  2311				runtime·throw("runq is not empty afterwards");
  2312		}
  2313	}
  2314	
  2315	void
  2316	runtime·testSchedLocalQueueSteal(void)
  2317	{
  2318		P p1, p2;
  2319		G gs[1000], *gp;
  2320		int32 i, j, s;
  2321	
  2322		runtime·memclr((byte*)&p1, sizeof(p1));
  2323		p1.runqsize = 1;
  2324		p1.runqhead = 0;
  2325		p1.runqtail = 0;
  2326		p1.runq = runtime·malloc(p1.runqsize*sizeof(*p1.runq));
  2327	
  2328		runtime·memclr((byte*)&p2, sizeof(p2));
  2329		p2.runqsize = nelem(gs);
  2330		p2.runqhead = 0;
  2331		p2.runqtail = 0;
  2332		p2.runq = runtime·malloc(p2.runqsize*sizeof(*p2.runq));
  2333	
  2334		for(i = 0; i < nelem(gs); i++) {
  2335			for(j = 0; j < i; j++) {
  2336				gs[j].sig = 0;
  2337				runqput(&p1, &gs[j]);
  2338			}
  2339			gp = runqsteal(&p2, &p1);
  2340			s = 0;
  2341			if(gp) {
  2342				s++;
  2343				gp->sig++;
  2344			}
  2345			while(gp = runqget(&p2)) {
  2346				s++;
  2347				gp->sig++;
  2348			}
  2349			while(gp = runqget(&p1))
  2350				gp->sig++;
  2351			for(j = 0; j < i; j++) {
  2352				if(gs[j].sig != 1) {
  2353					runtime·printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
  2354					runtime·throw("bad element");
  2355				}
  2356			}
  2357			if(s != i/2 && s != i/2+1) {
  2358				runtime·printf("bad steal %d, want %d or %d, iter %d\n",
  2359					s, i/2, i/2+1, i);
  2360				runtime·throw("bad steal");
  2361			}
  2362		}
  2363	}
  2364	

View as plain text