...
Run Format

Text file src/pkg/runtime/proc.c

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "runtime.h"
     6	#include "arch_GOARCH.h"
     7	#include "zaexperiment.h"
     8	#include "malloc.h"
     9	#include "stack.h"
    10	#include "race.h"
    11	#include "type.h"
    12	#include "../../cmd/ld/textflag.h"
    13	
    14	// Goroutine scheduler
    15	// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
    16	//
    17	// The main concepts are:
    18	// G - goroutine.
    19	// M - worker thread, or machine.
    20	// P - processor, a resource that is required to execute Go code.
    21	//     M must have an associated P to execute Go code, however it can be
    22	//     blocked or in a syscall w/o an associated P.
    23	//
    24	// Design doc at http://golang.org/s/go11sched.
    25	
    26	typedef struct Sched Sched;
    27	struct Sched {
    28		Lock;
    29	
    30		uint64	goidgen;
    31	
    32		M*	midle;	 // idle m's waiting for work
    33		int32	nmidle;	 // number of idle m's waiting for work
    34		int32	nmidlelocked; // number of locked m's waiting for work
    35		int32	mcount;	 // number of m's that have been created
    36		int32	maxmcount;	// maximum number of m's allowed (or die)
    37	
    38		P*	pidle;  // idle P's
    39		uint32	npidle;
    40		uint32	nmspinning;
    41	
    42		// Global runnable queue.
    43		G*	runqhead;
    44		G*	runqtail;
    45		int32	runqsize;
    46	
    47		// Global cache of dead G's.
    48		Lock	gflock;
    49		G*	gfree;
    50	
    51		uint32	gcwaiting;	// gc is waiting to run
    52		int32	stopwait;
    53		Note	stopnote;
    54		uint32	sysmonwait;
    55		Note	sysmonnote;
    56		uint64	lastpoll;
    57	
    58		int32	profilehz;	// cpu profiling rate
    59	};
    60	
    61	enum
    62	{
    63		// The max value of GOMAXPROCS.
    64		// There are no fundamental restrictions on the value.
    65		MaxGomaxprocs = 1<<8,
    66	
    67		// Number of goroutine ids to grab from runtime·sched.goidgen to local per-P cache at once.
    68		// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
    69		GoidCacheBatch = 16,
    70	};
    71	
    72	Sched	runtime·sched;
    73	int32	runtime·gomaxprocs;
    74	uint32	runtime·needextram;
    75	bool	runtime·iscgo;
    76	M	runtime·m0;
    77	G	runtime·g0;	// idle goroutine for m0
    78	G*	runtime·lastg;
    79	M*	runtime·allm;
    80	M*	runtime·extram;
    81	int8*	runtime·goos;
    82	int32	runtime·ncpu;
    83	static int32	newprocs;
    84	
    85	static	Lock allglock;	// the following vars are protected by this lock or by stoptheworld
    86	G**	runtime·allg;
    87	uintptr runtime·allglen;
    88	static	uintptr allgcap;
    89	
    90	void runtime·mstart(void);
    91	static void runqput(P*, G*);
    92	static G* runqget(P*);
    93	static bool runqputslow(P*, G*, uint32, uint32);
    94	static G* runqsteal(P*, P*);
    95	static void mput(M*);
    96	static M* mget(void);
    97	static void mcommoninit(M*);
    98	static void schedule(void);
    99	static void procresize(int32);
   100	static void acquirep(P*);
   101	static P* releasep(void);
   102	static void newm(void(*)(void), P*);
   103	static void stopm(void);
   104	static void startm(P*, bool);
   105	static void handoffp(P*);
   106	static void wakep(void);
   107	static void stoplockedm(void);
   108	static void startlockedm(G*);
   109	static void sysmon(void);
   110	static uint32 retake(int64);
   111	static void incidlelocked(int32);
   112	static void checkdead(void);
   113	static void exitsyscall0(G*);
   114	static void park0(G*);
   115	static void goexit0(G*);
   116	static void gfput(P*, G*);
   117	static G* gfget(P*);
   118	static void gfpurge(P*);
   119	static void globrunqput(G*);
   120	static void globrunqputbatch(G*, G*, int32);
   121	static G* globrunqget(P*, int32);
   122	static P* pidleget(void);
   123	static void pidleput(P*);
   124	static void injectglist(G*);
   125	static bool preemptall(void);
   126	static bool preemptone(P*);
   127	static bool exitsyscallfast(void);
   128	static bool haveexperiment(int8*);
   129	static void allgadd(G*);
   130	
   131	// The bootstrap sequence is:
   132	//
   133	//	call osinit
   134	//	call schedinit
   135	//	make & queue new G
   136	//	call runtime·mstart
   137	//
   138	// The new G calls runtime·main.
   139	void
   140	runtime·schedinit(void)
   141	{
   142		int32 n, procs;
   143		byte *p;
   144		Eface i;
   145	
   146		runtime·sched.maxmcount = 10000;
   147		runtime·precisestack = true; // haveexperiment("precisestack");
   148	
   149		runtime·symtabinit();
   150		runtime·mallocinit();
   151		mcommoninit(m);
   152		
   153		// Initialize the itable value for newErrorCString,
   154		// so that the next time it gets called, possibly
   155		// in a fault during a garbage collection, it will not
   156		// need to allocated memory.
   157		runtime·newErrorCString(0, &i);
   158		
   159		// Initialize the cached gotraceback value, since
   160		// gotraceback calls getenv, which mallocs on Plan 9.
   161		runtime·gotraceback(nil);
   162	
   163		runtime·goargs();
   164		runtime·goenvs();
   165		runtime·parsedebugvars();
   166	
   167		runtime·sched.lastpoll = runtime·nanotime();
   168		procs = 1;
   169		p = runtime·getenv("GOMAXPROCS");
   170		if(p != nil && (n = runtime·atoi(p)) > 0) {
   171			if(n > MaxGomaxprocs)
   172				n = MaxGomaxprocs;
   173			procs = n;
   174		}
   175		runtime·allp = runtime·malloc((MaxGomaxprocs+1)*sizeof(runtime·allp[0]));
   176		procresize(procs);
   177	
   178		runtime·copystack = runtime·precisestack;
   179		p = runtime·getenv("GOCOPYSTACK");
   180		if(p != nil && !runtime·strcmp(p, (byte*)"0"))
   181			runtime·copystack = false;
   182	
   183		mstats.enablegc = 1;
   184	
   185		if(raceenabled)
   186			g->racectx = runtime·raceinit();
   187	}
   188	
   189	extern void main·init(void);
   190	extern void main·main(void);
   191	
   192	static FuncVal scavenger = {runtime·MHeap_Scavenger};
   193	
   194	static FuncVal initDone = { runtime·unlockOSThread };
   195	
   196	// The main goroutine.
   197	// Note: C frames in general are not copyable during stack growth, for two reasons:
   198	//   1) We don't know where in a frame to find pointers to other stack locations.
   199	//   2) There's no guarantee that globals or heap values do not point into the frame.
   200	//
   201	// The C frame for runtime.main is copyable, because:
   202	//   1) There are no pointers to other stack locations in the frame
   203	//      (d.fn points at a global, d.link is nil, d.argp is -1).
   204	//   2) The only pointer into this frame is from the defer chain,
   205	//      which is explicitly handled during stack copying.
   206	void
   207	runtime·main(void)
   208	{
   209		Defer d;
   210		
   211		// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
   212		// Using decimal instead of binary GB and MB because
   213		// they look nicer in the stack overflow failure message.
   214		if(sizeof(void*) == 8)
   215			runtime·maxstacksize = 1000000000;
   216		else
   217			runtime·maxstacksize = 250000000;
   218	
   219		newm(sysmon, nil);
   220	
   221		// Lock the main goroutine onto this, the main OS thread,
   222		// during initialization.  Most programs won't care, but a few
   223		// do require certain calls to be made by the main thread.
   224		// Those can arrange for main.main to run in the main thread
   225		// by calling runtime.LockOSThread during initialization
   226		// to preserve the lock.
   227		runtime·lockOSThread();
   228		
   229		// Defer unlock so that runtime.Goexit during init does the unlock too.
   230		d.fn = &initDone;
   231		d.siz = 0;
   232		d.link = g->defer;
   233		d.argp = NoArgs;
   234		d.special = true;
   235		g->defer = &d;
   236	
   237		if(m != &runtime·m0)
   238			runtime·throw("runtime·main not on m0");
   239		runtime·newproc1(&scavenger, nil, 0, 0, runtime·main);
   240		main·init();
   241	
   242		if(g->defer != &d || d.fn != &initDone)
   243			runtime·throw("runtime: bad defer entry after init");
   244		g->defer = d.link;
   245		runtime·unlockOSThread();
   246	
   247		main·main();
   248		if(raceenabled)
   249			runtime·racefini();
   250	
   251		// Make racy client program work: if panicking on
   252		// another goroutine at the same time as main returns,
   253		// let the other goroutine finish printing the panic trace.
   254		// Once it does, it will exit. See issue 3934.
   255		if(runtime·panicking)
   256			runtime·park(nil, nil, "panicwait");
   257	
   258		runtime·exit(0);
   259		for(;;)
   260			*(int32*)runtime·main = 0;
   261	}
   262	
   263	void
   264	runtime·goroutineheader(G *gp)
   265	{
   266		int8 *status;
   267		int64 waitfor;
   268	
   269		switch(gp->status) {
   270		case Gidle:
   271			status = "idle";
   272			break;
   273		case Grunnable:
   274			status = "runnable";
   275			break;
   276		case Grunning:
   277			status = "running";
   278			break;
   279		case Gsyscall:
   280			status = "syscall";
   281			break;
   282		case Gwaiting:
   283			if(gp->waitreason)
   284				status = gp->waitreason;
   285			else
   286				status = "waiting";
   287			break;
   288		default:
   289			status = "???";
   290			break;
   291		}
   292	
   293		// approx time the G is blocked, in minutes
   294		waitfor = 0;
   295		if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince != 0)
   296			waitfor = (runtime·nanotime() - gp->waitsince) / (60LL*1000*1000*1000);
   297	
   298		if(waitfor < 1)
   299			runtime·printf("goroutine %D [%s]:\n", gp->goid, status);
   300		else
   301			runtime·printf("goroutine %D [%s, %D minutes]:\n", gp->goid, status, waitfor);
   302	}
   303	
   304	void
   305	runtime·tracebackothers(G *me)
   306	{
   307		G *gp;
   308		int32 traceback;
   309		uintptr i;
   310	
   311		traceback = runtime·gotraceback(nil);
   312		
   313		// Show the current goroutine first, if we haven't already.
   314		if((gp = m->curg) != nil && gp != me) {
   315			runtime·printf("\n");
   316			runtime·goroutineheader(gp);
   317			runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
   318		}
   319	
   320		runtime·lock(&allglock);
   321		for(i = 0; i < runtime·allglen; i++) {
   322			gp = runtime·allg[i];
   323			if(gp == me || gp == m->curg || gp->status == Gdead)
   324				continue;
   325			if(gp->issystem && traceback < 2)
   326				continue;
   327			runtime·printf("\n");
   328			runtime·goroutineheader(gp);
   329			if(gp->status == Grunning) {
   330				runtime·printf("\tgoroutine running on other thread; stack unavailable\n");
   331				runtime·printcreatedby(gp);
   332			} else
   333				runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
   334		}
   335		runtime·unlock(&allglock);
   336	}
   337	
   338	static void
   339	checkmcount(void)
   340	{
   341		// sched lock is held
   342		if(runtime·sched.mcount > runtime·sched.maxmcount) {
   343			runtime·printf("runtime: program exceeds %d-thread limit\n", runtime·sched.maxmcount);
   344			runtime·throw("thread exhaustion");
   345		}
   346	}
   347	
   348	static void
   349	mcommoninit(M *mp)
   350	{
   351		// If there is no mcache runtime·callers() will crash,
   352		// and we are most likely in sysmon thread so the stack is senseless anyway.
   353		if(m->mcache)
   354			runtime·callers(1, mp->createstack, nelem(mp->createstack));
   355	
   356		mp->fastrand = 0x49f6428aUL + mp->id + runtime·cputicks();
   357	
   358		runtime·lock(&runtime·sched);
   359		mp->id = runtime·sched.mcount++;
   360		checkmcount();
   361		runtime·mpreinit(mp);
   362	
   363		// Add to runtime·allm so garbage collector doesn't free m
   364		// when it is just in a register or thread-local storage.
   365		mp->alllink = runtime·allm;
   366		// runtime·NumCgoCall() iterates over allm w/o schedlock,
   367		// so we need to publish it safely.
   368		runtime·atomicstorep(&runtime·allm, mp);
   369		runtime·unlock(&runtime·sched);
   370	}
   371	
   372	// Mark gp ready to run.
   373	void
   374	runtime·ready(G *gp)
   375	{
   376		// Mark runnable.
   377		m->locks++;  // disable preemption because it can be holding p in a local var
   378		if(gp->status != Gwaiting) {
   379			runtime·printf("goroutine %D has status %d\n", gp->goid, gp->status);
   380			runtime·throw("bad g->status in ready");
   381		}
   382		gp->status = Grunnable;
   383		runqput(m->p, gp);
   384		if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0)  // TODO: fast atomic
   385			wakep();
   386		m->locks--;
   387		if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
   388			g->stackguard0 = StackPreempt;
   389	}
   390	
   391	int32
   392	runtime·gcprocs(void)
   393	{
   394		int32 n;
   395	
   396		// Figure out how many CPUs to use during GC.
   397		// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
   398		runtime·lock(&runtime·sched);
   399		n = runtime·gomaxprocs;
   400		if(n > runtime·ncpu)
   401			n = runtime·ncpu;
   402		if(n > MaxGcproc)
   403			n = MaxGcproc;
   404		if(n > runtime·sched.nmidle+1) // one M is currently running
   405			n = runtime·sched.nmidle+1;
   406		runtime·unlock(&runtime·sched);
   407		return n;
   408	}
   409	
   410	static bool
   411	needaddgcproc(void)
   412	{
   413		int32 n;
   414	
   415		runtime·lock(&runtime·sched);
   416		n = runtime·gomaxprocs;
   417		if(n > runtime·ncpu)
   418			n = runtime·ncpu;
   419		if(n > MaxGcproc)
   420			n = MaxGcproc;
   421		n -= runtime·sched.nmidle+1; // one M is currently running
   422		runtime·unlock(&runtime·sched);
   423		return n > 0;
   424	}
   425	
   426	void
   427	runtime·helpgc(int32 nproc)
   428	{
   429		M *mp;
   430		int32 n, pos;
   431	
   432		runtime·lock(&runtime·sched);
   433		pos = 0;
   434		for(n = 1; n < nproc; n++) {  // one M is currently running
   435			if(runtime·allp[pos]->mcache == m->mcache)
   436				pos++;
   437			mp = mget();
   438			if(mp == nil)
   439				runtime·throw("runtime·gcprocs inconsistency");
   440			mp->helpgc = n;
   441			mp->mcache = runtime·allp[pos]->mcache;
   442			pos++;
   443			runtime·notewakeup(&mp->park);
   444		}
   445		runtime·unlock(&runtime·sched);
   446	}
   447	
   448	// Similar to stoptheworld but best-effort and can be called several times.
   449	// There is no reverse operation, used during crashing.
   450	// This function must not lock any mutexes.
   451	void
   452	runtime·freezetheworld(void)
   453	{
   454		int32 i;
   455	
   456		if(runtime·gomaxprocs == 1)
   457			return;
   458		// stopwait and preemption requests can be lost
   459		// due to races with concurrently executing threads,
   460		// so try several times
   461		for(i = 0; i < 5; i++) {
   462			// this should tell the scheduler to not start any new goroutines
   463			runtime·sched.stopwait = 0x7fffffff;
   464			runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
   465			// this should stop running goroutines
   466			if(!preemptall())
   467				break;  // no running goroutines
   468			runtime·usleep(1000);
   469		}
   470		// to be sure
   471		runtime·usleep(1000);
   472		preemptall();
   473		runtime·usleep(1000);
   474	}
   475	
   476	void
   477	runtime·stoptheworld(void)
   478	{
   479		int32 i;
   480		uint32 s;
   481		P *p;
   482		bool wait;
   483	
   484		runtime·lock(&runtime·sched);
   485		runtime·sched.stopwait = runtime·gomaxprocs;
   486		runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
   487		preemptall();
   488		// stop current P
   489		m->p->status = Pgcstop;
   490		runtime·sched.stopwait--;
   491		// try to retake all P's in Psyscall status
   492		for(i = 0; i < runtime·gomaxprocs; i++) {
   493			p = runtime·allp[i];
   494			s = p->status;
   495			if(s == Psyscall && runtime·cas(&p->status, s, Pgcstop))
   496				runtime·sched.stopwait--;
   497		}
   498		// stop idle P's
   499		while(p = pidleget()) {
   500			p->status = Pgcstop;
   501			runtime·sched.stopwait--;
   502		}
   503		wait = runtime·sched.stopwait > 0;
   504		runtime·unlock(&runtime·sched);
   505	
   506		// wait for remaining P's to stop voluntarily
   507		if(wait) {
   508			for(;;) {
   509				// wait for 100us, then try to re-preempt in case of any races
   510				if(runtime·notetsleep(&runtime·sched.stopnote, 100*1000)) {
   511					runtime·noteclear(&runtime·sched.stopnote);
   512					break;
   513				}
   514				preemptall();
   515			}
   516		}
   517		if(runtime·sched.stopwait)
   518			runtime·throw("stoptheworld: not stopped");
   519		for(i = 0; i < runtime·gomaxprocs; i++) {
   520			p = runtime·allp[i];
   521			if(p->status != Pgcstop)
   522				runtime·throw("stoptheworld: not stopped");
   523		}
   524	}
   525	
   526	static void
   527	mhelpgc(void)
   528	{
   529		m->helpgc = -1;
   530	}
   531	
   532	void
   533	runtime·starttheworld(void)
   534	{
   535		P *p, *p1;
   536		M *mp;
   537		G *gp;
   538		bool add;
   539	
   540		m->locks++;  // disable preemption because it can be holding p in a local var
   541		gp = runtime·netpoll(false);  // non-blocking
   542		injectglist(gp);
   543		add = needaddgcproc();
   544		runtime·lock(&runtime·sched);
   545		if(newprocs) {
   546			procresize(newprocs);
   547			newprocs = 0;
   548		} else
   549			procresize(runtime·gomaxprocs);
   550		runtime·sched.gcwaiting = 0;
   551	
   552		p1 = nil;
   553		while(p = pidleget()) {
   554			// procresize() puts p's with work at the beginning of the list.
   555			// Once we reach a p without a run queue, the rest don't have one either.
   556			if(p->runqhead == p->runqtail) {
   557				pidleput(p);
   558				break;
   559			}
   560			p->m = mget();
   561			p->link = p1;
   562			p1 = p;
   563		}
   564		if(runtime·sched.sysmonwait) {
   565			runtime·sched.sysmonwait = false;
   566			runtime·notewakeup(&runtime·sched.sysmonnote);
   567		}
   568		runtime·unlock(&runtime·sched);
   569	
   570		while(p1) {
   571			p = p1;
   572			p1 = p1->link;
   573			if(p->m) {
   574				mp = p->m;
   575				p->m = nil;
   576				if(mp->nextp)
   577					runtime·throw("starttheworld: inconsistent mp->nextp");
   578				mp->nextp = p;
   579				runtime·notewakeup(&mp->park);
   580			} else {
   581				// Start M to run P.  Do not start another M below.
   582				newm(nil, p);
   583				add = false;
   584			}
   585		}
   586	
   587		if(add) {
   588			// If GC could have used another helper proc, start one now,
   589			// in the hope that it will be available next time.
   590			// It would have been even better to start it before the collection,
   591			// but doing so requires allocating memory, so it's tricky to
   592			// coordinate.  This lazy approach works out in practice:
   593			// we don't mind if the first couple gc rounds don't have quite
   594			// the maximum number of procs.
   595			newm(mhelpgc, nil);
   596		}
   597		m->locks--;
   598		if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
   599			g->stackguard0 = StackPreempt;
   600	}
   601	
   602	// Called to start an M.
   603	void
   604	runtime·mstart(void)
   605	{
   606		if(g != m->g0)
   607			runtime·throw("bad runtime·mstart");
   608	
   609		// Record top of stack for use by mcall.
   610		// Once we call schedule we're never coming back,
   611		// so other calls can reuse this stack space.
   612		runtime·gosave(&m->g0->sched);
   613		m->g0->sched.pc = (uintptr)-1;  // make sure it is never used
   614		m->g0->stackguard = m->g0->stackguard0;  // cgo sets only stackguard0, copy it to stackguard
   615		runtime·asminit();
   616		runtime·minit();
   617	
   618		// Install signal handlers; after minit so that minit can
   619		// prepare the thread to be able to handle the signals.
   620		if(m == &runtime·m0)
   621			runtime·initsig();
   622		
   623		if(m->mstartfn)
   624			m->mstartfn();
   625	
   626		if(m->helpgc) {
   627			m->helpgc = 0;
   628			stopm();
   629		} else if(m != &runtime·m0) {
   630			acquirep(m->nextp);
   631			m->nextp = nil;
   632		}
   633		schedule();
   634	
   635		// TODO(brainman): This point is never reached, because scheduler
   636		// does not release os threads at the moment. But once this path
   637		// is enabled, we must remove our seh here.
   638	}
   639	
   640	// When running with cgo, we call _cgo_thread_start
   641	// to start threads for us so that we can play nicely with
   642	// foreign code.
   643	void (*_cgo_thread_start)(void*);
   644	
   645	typedef struct CgoThreadStart CgoThreadStart;
   646	struct CgoThreadStart
   647	{
   648		M *m;
   649		G *g;
   650		uintptr *tls;
   651		void (*fn)(void);
   652	};
   653	
   654	// Allocate a new m unassociated with any thread.
   655	// Can use p for allocation context if needed.
   656	M*
   657	runtime·allocm(P *p)
   658	{
   659		M *mp;
   660		static Type *mtype;  // The Go type M
   661	
   662		m->locks++;  // disable GC because it can be called from sysmon
   663		if(m->p == nil)
   664			acquirep(p);  // temporarily borrow p for mallocs in this function
   665		if(mtype == nil) {
   666			Eface e;
   667			runtime·gc_m_ptr(&e);
   668			mtype = ((PtrType*)e.type)->elem;
   669		}
   670	
   671		mp = runtime·cnew(mtype);
   672		mcommoninit(mp);
   673	
   674		// In case of cgo or Solaris, pthread_create will make us a stack.
   675		// Windows will layout sched stack on OS stack.
   676		if(runtime·iscgo || Solaris || Windows)
   677			mp->g0 = runtime·malg(-1);
   678		else
   679			mp->g0 = runtime·malg(8192);
   680	
   681		if(p == m->p)
   682			releasep();
   683		m->locks--;
   684		if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
   685			g->stackguard0 = StackPreempt;
   686	
   687		return mp;
   688	}
   689	
   690	static G*
   691	allocg(void)
   692	{
   693		G *gp;
   694		static Type *gtype;
   695		
   696		if(gtype == nil) {
   697			Eface e;
   698			runtime·gc_g_ptr(&e);
   699			gtype = ((PtrType*)e.type)->elem;
   700		}
   701		gp = runtime·cnew(gtype);
   702		return gp;
   703	}
   704	
   705	static M* lockextra(bool nilokay);
   706	static void unlockextra(M*);
   707	
   708	// needm is called when a cgo callback happens on a
   709	// thread without an m (a thread not created by Go).
   710	// In this case, needm is expected to find an m to use
   711	// and return with m, g initialized correctly.
   712	// Since m and g are not set now (likely nil, but see below)
   713	// needm is limited in what routines it can call. In particular
   714	// it can only call nosplit functions (textflag 7) and cannot
   715	// do any scheduling that requires an m.
   716	//
   717	// In order to avoid needing heavy lifting here, we adopt
   718	// the following strategy: there is a stack of available m's
   719	// that can be stolen. Using compare-and-swap
   720	// to pop from the stack has ABA races, so we simulate
   721	// a lock by doing an exchange (via casp) to steal the stack
   722	// head and replace the top pointer with MLOCKED (1).
   723	// This serves as a simple spin lock that we can use even
   724	// without an m. The thread that locks the stack in this way
   725	// unlocks the stack by storing a valid stack head pointer.
   726	//
   727	// In order to make sure that there is always an m structure
   728	// available to be stolen, we maintain the invariant that there
   729	// is always one more than needed. At the beginning of the
   730	// program (if cgo is in use) the list is seeded with a single m.
   731	// If needm finds that it has taken the last m off the list, its job
   732	// is - once it has installed its own m so that it can do things like
   733	// allocate memory - to create a spare m and put it on the list.
   734	//
   735	// Each of these extra m's also has a g0 and a curg that are
   736	// pressed into service as the scheduling stack and current
   737	// goroutine for the duration of the cgo callback.
   738	//
   739	// When the callback is done with the m, it calls dropm to
   740	// put the m back on the list.
   741	#pragma textflag NOSPLIT
   742	void
   743	runtime·needm(byte x)
   744	{
   745		M *mp;
   746	
   747		if(runtime·needextram) {
   748			// Can happen if C/C++ code calls Go from a global ctor.
   749			// Can not throw, because scheduler is not initialized yet.
   750			runtime·write(2, "fatal error: cgo callback before cgo call\n",
   751				sizeof("fatal error: cgo callback before cgo call\n")-1);
   752			runtime·exit(1);
   753		}
   754	
   755		// Lock extra list, take head, unlock popped list.
   756		// nilokay=false is safe here because of the invariant above,
   757		// that the extra list always contains or will soon contain
   758		// at least one m.
   759		mp = lockextra(false);
   760	
   761		// Set needextram when we've just emptied the list,
   762		// so that the eventual call into cgocallbackg will
   763		// allocate a new m for the extra list. We delay the
   764		// allocation until then so that it can be done
   765		// after exitsyscall makes sure it is okay to be
   766		// running at all (that is, there's no garbage collection
   767		// running right now).
   768		mp->needextram = mp->schedlink == nil;
   769		unlockextra(mp->schedlink);
   770	
   771		// Install m and g (= m->g0) and set the stack bounds
   772		// to match the current stack. We don't actually know
   773		// how big the stack is, like we don't know how big any
   774		// scheduling stack is, but we assume there's at least 32 kB,
   775		// which is more than enough for us.
   776		runtime·setmg(mp, mp->g0);
   777		g->stackbase = (uintptr)(&x + 1024);
   778		g->stackguard = (uintptr)(&x - 32*1024);
   779		g->stackguard0 = g->stackguard;
   780	
   781		// Initialize this thread to use the m.
   782		runtime·asminit();
   783		runtime·minit();
   784	}
   785	
   786	// newextram allocates an m and puts it on the extra list.
   787	// It is called with a working local m, so that it can do things
   788	// like call schedlock and allocate.
   789	void
   790	runtime·newextram(void)
   791	{
   792		M *mp, *mnext;
   793		G *gp;
   794	
   795		// Create extra goroutine locked to extra m.
   796		// The goroutine is the context in which the cgo callback will run.
   797		// The sched.pc will never be returned to, but setting it to
   798		// runtime.goexit makes clear to the traceback routines where
   799		// the goroutine stack ends.
   800		mp = runtime·allocm(nil);
   801		gp = runtime·malg(4096);
   802		gp->sched.pc = (uintptr)runtime·goexit;
   803		gp->sched.sp = gp->stackbase;
   804		gp->sched.lr = 0;
   805		gp->sched.g = gp;
   806		gp->syscallpc = gp->sched.pc;
   807		gp->syscallsp = gp->sched.sp;
   808		gp->syscallstack = gp->stackbase;
   809		gp->syscallguard = gp->stackguard;
   810		gp->status = Gsyscall;
   811		mp->curg = gp;
   812		mp->locked = LockInternal;
   813		mp->lockedg = gp;
   814		gp->lockedm = mp;
   815		gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
   816		if(raceenabled)
   817			gp->racectx = runtime·racegostart(runtime·newextram);
   818		// put on allg for garbage collector
   819		allgadd(gp);
   820	
   821		// Add m to the extra list.
   822		mnext = lockextra(true);
   823		mp->schedlink = mnext;
   824		unlockextra(mp);
   825	}
   826	
   827	// dropm is called when a cgo callback has called needm but is now
   828	// done with the callback and returning back into the non-Go thread.
   829	// It puts the current m back onto the extra list.
   830	//
   831	// The main expense here is the call to signalstack to release the
   832	// m's signal stack, and then the call to needm on the next callback
   833	// from this thread. It is tempting to try to save the m for next time,
   834	// which would eliminate both these costs, but there might not be
   835	// a next time: the current thread (which Go does not control) might exit.
   836	// If we saved the m for that thread, there would be an m leak each time
   837	// such a thread exited. Instead, we acquire and release an m on each
   838	// call. These should typically not be scheduling operations, just a few
   839	// atomics, so the cost should be small.
   840	//
   841	// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
   842	// variable using pthread_key_create. Unlike the pthread keys we already use
   843	// on OS X, this dummy key would never be read by Go code. It would exist
   844	// only so that we could register at thread-exit-time destructor.
   845	// That destructor would put the m back onto the extra list.
   846	// This is purely a performance optimization. The current version,
   847	// in which dropm happens on each cgo call, is still correct too.
   848	// We may have to keep the current version on systems with cgo
   849	// but without pthreads, like Windows.
   850	void
   851	runtime·dropm(void)
   852	{
   853		M *mp, *mnext;
   854	
   855		// Undo whatever initialization minit did during needm.
   856		runtime·unminit();
   857	
   858		// Clear m and g, and return m to the extra list.
   859		// After the call to setmg we can only call nosplit functions.
   860		mp = m;
   861		runtime·setmg(nil, nil);
   862	
   863		mnext = lockextra(true);
   864		mp->schedlink = mnext;
   865		unlockextra(mp);
   866	}
   867	
   868	#define MLOCKED ((M*)1)
   869	
   870	// lockextra locks the extra list and returns the list head.
   871	// The caller must unlock the list by storing a new list head
   872	// to runtime.extram. If nilokay is true, then lockextra will
   873	// return a nil list head if that's what it finds. If nilokay is false,
   874	// lockextra will keep waiting until the list head is no longer nil.
   875	#pragma textflag NOSPLIT
   876	static M*
   877	lockextra(bool nilokay)
   878	{
   879		M *mp;
   880		void (*yield)(void);
   881	
   882		for(;;) {
   883			mp = runtime·atomicloadp(&runtime·extram);
   884			if(mp == MLOCKED) {
   885				yield = runtime·osyield;
   886				yield();
   887				continue;
   888			}
   889			if(mp == nil && !nilokay) {
   890				runtime·usleep(1);
   891				continue;
   892			}
   893			if(!runtime·casp(&runtime·extram, mp, MLOCKED)) {
   894				yield = runtime·osyield;
   895				yield();
   896				continue;
   897			}
   898			break;
   899		}
   900		return mp;
   901	}
   902	
   903	#pragma textflag NOSPLIT
   904	static void
   905	unlockextra(M *mp)
   906	{
   907		runtime·atomicstorep(&runtime·extram, mp);
   908	}
   909	
   910	
   911	// Create a new m.  It will start off with a call to fn, or else the scheduler.
   912	static void
   913	newm(void(*fn)(void), P *p)
   914	{
   915		M *mp;
   916	
   917		mp = runtime·allocm(p);
   918		mp->nextp = p;
   919		mp->mstartfn = fn;
   920	
   921		if(runtime·iscgo) {
   922			CgoThreadStart ts;
   923	
   924			if(_cgo_thread_start == nil)
   925				runtime·throw("_cgo_thread_start missing");
   926			ts.m = mp;
   927			ts.g = mp->g0;
   928			ts.tls = mp->tls;
   929			ts.fn = runtime·mstart;
   930			runtime·asmcgocall(_cgo_thread_start, &ts);
   931			return;
   932		}
   933		runtime·newosproc(mp, (byte*)mp->g0->stackbase);
   934	}
   935	
   936	// Stops execution of the current m until new work is available.
   937	// Returns with acquired P.
   938	static void
   939	stopm(void)
   940	{
   941		if(m->locks)
   942			runtime·throw("stopm holding locks");
   943		if(m->p)
   944			runtime·throw("stopm holding p");
   945		if(m->spinning) {
   946			m->spinning = false;
   947			runtime·xadd(&runtime·sched.nmspinning, -1);
   948		}
   949	
   950	retry:
   951		runtime·lock(&runtime·sched);
   952		mput(m);
   953		runtime·unlock(&runtime·sched);
   954		runtime·notesleep(&m->park);
   955		runtime·noteclear(&m->park);
   956		if(m->helpgc) {
   957			runtime·gchelper();
   958			m->helpgc = 0;
   959			m->mcache = nil;
   960			goto retry;
   961		}
   962		acquirep(m->nextp);
   963		m->nextp = nil;
   964	}
   965	
   966	static void
   967	mspinning(void)
   968	{
   969		m->spinning = true;
   970	}
   971	
   972	// Schedules some M to run the p (creates an M if necessary).
   973	// If p==nil, tries to get an idle P, if no idle P's does nothing.
   974	static void
   975	startm(P *p, bool spinning)
   976	{
   977		M *mp;
   978		void (*fn)(void);
   979	
   980		runtime·lock(&runtime·sched);
   981		if(p == nil) {
   982			p = pidleget();
   983			if(p == nil) {
   984				runtime·unlock(&runtime·sched);
   985				if(spinning)
   986					runtime·xadd(&runtime·sched.nmspinning, -1);
   987				return;
   988			}
   989		}
   990		mp = mget();
   991		runtime·unlock(&runtime·sched);
   992		if(mp == nil) {
   993			fn = nil;
   994			if(spinning)
   995				fn = mspinning;
   996			newm(fn, p);
   997			return;
   998		}
   999		if(mp->spinning)
  1000			runtime·throw("startm: m is spinning");
  1001		if(mp->nextp)
  1002			runtime·throw("startm: m has p");
  1003		mp->spinning = spinning;
  1004		mp->nextp = p;
  1005		runtime·notewakeup(&mp->park);
  1006	}
  1007	
  1008	// Hands off P from syscall or locked M.
  1009	static void
  1010	handoffp(P *p)
  1011	{
  1012		// if it has local work, start it straight away
  1013		if(p->runqhead != p->runqtail || runtime·sched.runqsize) {
  1014			startm(p, false);
  1015			return;
  1016		}
  1017		// no local work, check that there are no spinning/idle M's,
  1018		// otherwise our help is not required
  1019		if(runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) == 0 &&  // TODO: fast atomic
  1020			runtime·cas(&runtime·sched.nmspinning, 0, 1)) {
  1021			startm(p, true);
  1022			return;
  1023		}
  1024		runtime·lock(&runtime·sched);
  1025		if(runtime·sched.gcwaiting) {
  1026			p->status = Pgcstop;
  1027			if(--runtime·sched.stopwait == 0)
  1028				runtime·notewakeup(&runtime·sched.stopnote);
  1029			runtime·unlock(&runtime·sched);
  1030			return;
  1031		}
  1032		if(runtime·sched.runqsize) {
  1033			runtime·unlock(&runtime·sched);
  1034			startm(p, false);
  1035			return;
  1036		}
  1037		// If this is the last running P and nobody is polling network,
  1038		// need to wakeup another M to poll network.
  1039		if(runtime·sched.npidle == runtime·gomaxprocs-1 && runtime·atomicload64(&runtime·sched.lastpoll) != 0) {
  1040			runtime·unlock(&runtime·sched);
  1041			startm(p, false);
  1042			return;
  1043		}
  1044		pidleput(p);
  1045		runtime·unlock(&runtime·sched);
  1046	}
  1047	
  1048	// Tries to add one more P to execute G's.
  1049	// Called when a G is made runnable (newproc, ready).
  1050	static void
  1051	wakep(void)
  1052	{
  1053		// be conservative about spinning threads
  1054		if(!runtime·cas(&runtime·sched.nmspinning, 0, 1))
  1055			return;
  1056		startm(nil, true);
  1057	}
  1058	
  1059	// Stops execution of the current m that is locked to a g until the g is runnable again.
  1060	// Returns with acquired P.
  1061	static void
  1062	stoplockedm(void)
  1063	{
  1064		P *p;
  1065	
  1066		if(m->lockedg == nil || m->lockedg->lockedm != m)
  1067			runtime·throw("stoplockedm: inconsistent locking");
  1068		if(m->p) {
  1069			// Schedule another M to run this p.
  1070			p = releasep();
  1071			handoffp(p);
  1072		}
  1073		incidlelocked(1);
  1074		// Wait until another thread schedules lockedg again.
  1075		runtime·notesleep(&m->park);
  1076		runtime·noteclear(&m->park);
  1077		if(m->lockedg->status != Grunnable)
  1078			runtime·throw("stoplockedm: not runnable");
  1079		acquirep(m->nextp);
  1080		m->nextp = nil;
  1081	}
  1082	
  1083	// Schedules the locked m to run the locked gp.
  1084	static void
  1085	startlockedm(G *gp)
  1086	{
  1087		M *mp;
  1088		P *p;
  1089	
  1090		mp = gp->lockedm;
  1091		if(mp == m)
  1092			runtime·throw("startlockedm: locked to me");
  1093		if(mp->nextp)
  1094			runtime·throw("startlockedm: m has p");
  1095		// directly handoff current P to the locked m
  1096		incidlelocked(-1);
  1097		p = releasep();
  1098		mp->nextp = p;
  1099		runtime·notewakeup(&mp->park);
  1100		stopm();
  1101	}
  1102	
  1103	// Stops the current m for stoptheworld.
  1104	// Returns when the world is restarted.
  1105	static void
  1106	gcstopm(void)
  1107	{
  1108		P *p;
  1109	
  1110		if(!runtime·sched.gcwaiting)
  1111			runtime·throw("gcstopm: not waiting for gc");
  1112		if(m->spinning) {
  1113			m->spinning = false;
  1114			runtime·xadd(&runtime·sched.nmspinning, -1);
  1115		}
  1116		p = releasep();
  1117		runtime·lock(&runtime·sched);
  1118		p->status = Pgcstop;
  1119		if(--runtime·sched.stopwait == 0)
  1120			runtime·notewakeup(&runtime·sched.stopnote);
  1121		runtime·unlock(&runtime·sched);
  1122		stopm();
  1123	}
  1124	
  1125	// Schedules gp to run on the current M.
  1126	// Never returns.
  1127	static void
  1128	execute(G *gp)
  1129	{
  1130		int32 hz;
  1131	
  1132		if(gp->status != Grunnable) {
  1133			runtime·printf("execute: bad g status %d\n", gp->status);
  1134			runtime·throw("execute: bad g status");
  1135		}
  1136		gp->status = Grunning;
  1137		gp->waitsince = 0;
  1138		gp->preempt = false;
  1139		gp->stackguard0 = gp->stackguard;
  1140		m->p->schedtick++;
  1141		m->curg = gp;
  1142		gp->m = m;
  1143	
  1144		// Check whether the profiler needs to be turned on or off.
  1145		hz = runtime·sched.profilehz;
  1146		if(m->profilehz != hz)
  1147			runtime·resetcpuprofiler(hz);
  1148	
  1149		runtime·gogo(&gp->sched);
  1150	}
  1151	
  1152	// Finds a runnable goroutine to execute.
  1153	// Tries to steal from other P's, get g from global queue, poll network.
  1154	static G*
  1155	findrunnable(void)
  1156	{
  1157		G *gp;
  1158		P *p;
  1159		int32 i;
  1160	
  1161	top:
  1162		if(runtime·sched.gcwaiting) {
  1163			gcstopm();
  1164			goto top;
  1165		}
  1166		if(runtime·fingwait && runtime·fingwake && (gp = runtime·wakefing()) != nil)
  1167			runtime·ready(gp);
  1168		// local runq
  1169		gp = runqget(m->p);
  1170		if(gp)
  1171			return gp;
  1172		// global runq
  1173		if(runtime·sched.runqsize) {
  1174			runtime·lock(&runtime·sched);
  1175			gp = globrunqget(m->p, 0);
  1176			runtime·unlock(&runtime·sched);
  1177			if(gp)
  1178				return gp;
  1179		}
  1180		// poll network
  1181		gp = runtime·netpoll(false);  // non-blocking
  1182		if(gp) {
  1183			injectglist(gp->schedlink);
  1184			gp->status = Grunnable;
  1185			return gp;
  1186		}
  1187		// If number of spinning M's >= number of busy P's, block.
  1188		// This is necessary to prevent excessive CPU consumption
  1189		// when GOMAXPROCS>>1 but the program parallelism is low.
  1190		if(!m->spinning && 2 * runtime·atomicload(&runtime·sched.nmspinning) >= runtime·gomaxprocs - runtime·atomicload(&runtime·sched.npidle))  // TODO: fast atomic
  1191			goto stop;
  1192		if(!m->spinning) {
  1193			m->spinning = true;
  1194			runtime·xadd(&runtime·sched.nmspinning, 1);
  1195		}
  1196		// random steal from other P's
  1197		for(i = 0; i < 2*runtime·gomaxprocs; i++) {
  1198			if(runtime·sched.gcwaiting)
  1199				goto top;
  1200			p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs];
  1201			if(p == m->p)
  1202				gp = runqget(p);
  1203			else
  1204				gp = runqsteal(m->p, p);
  1205			if(gp)
  1206				return gp;
  1207		}
  1208	stop:
  1209		// return P and block
  1210		runtime·lock(&runtime·sched);
  1211		if(runtime·sched.gcwaiting) {
  1212			runtime·unlock(&runtime·sched);
  1213			goto top;
  1214		}
  1215		if(runtime·sched.runqsize) {
  1216			gp = globrunqget(m->p, 0);
  1217			runtime·unlock(&runtime·sched);
  1218			return gp;
  1219		}
  1220		p = releasep();
  1221		pidleput(p);
  1222		runtime·unlock(&runtime·sched);
  1223		if(m->spinning) {
  1224			m->spinning = false;
  1225			runtime·xadd(&runtime·sched.nmspinning, -1);
  1226		}
  1227		// check all runqueues once again
  1228		for(i = 0; i < runtime·gomaxprocs; i++) {
  1229			p = runtime·allp[i];
  1230			if(p && p->runqhead != p->runqtail) {
  1231				runtime·lock(&runtime·sched);
  1232				p = pidleget();
  1233				runtime·unlock(&runtime·sched);
  1234				if(p) {
  1235					acquirep(p);
  1236					goto top;
  1237				}
  1238				break;
  1239			}
  1240		}
  1241		// poll network
  1242		if(runtime·xchg64(&runtime·sched.lastpoll, 0) != 0) {
  1243			if(m->p)
  1244				runtime·throw("findrunnable: netpoll with p");
  1245			if(m->spinning)
  1246				runtime·throw("findrunnable: netpoll with spinning");
  1247			gp = runtime·netpoll(true);  // block until new work is available
  1248			runtime·atomicstore64(&runtime·sched.lastpoll, runtime·nanotime());
  1249			if(gp) {
  1250				runtime·lock(&runtime·sched);
  1251				p = pidleget();
  1252				runtime·unlock(&runtime·sched);
  1253				if(p) {
  1254					acquirep(p);
  1255					injectglist(gp->schedlink);
  1256					gp->status = Grunnable;
  1257					return gp;
  1258				}
  1259				injectglist(gp);
  1260			}
  1261		}
  1262		stopm();
  1263		goto top;
  1264	}
  1265	
  1266	static void
  1267	resetspinning(void)
  1268	{
  1269		int32 nmspinning;
  1270	
  1271		if(m->spinning) {
  1272			m->spinning = false;
  1273			nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1);
  1274			if(nmspinning < 0)
  1275				runtime·throw("findrunnable: negative nmspinning");
  1276		} else
  1277			nmspinning = runtime·atomicload(&runtime·sched.nmspinning);
  1278	
  1279		// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
  1280		// so see if we need to wakeup another P here.
  1281		if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0)
  1282			wakep();
  1283	}
  1284	
  1285	// Injects the list of runnable G's into the scheduler.
  1286	// Can run concurrently with GC.
  1287	static void
  1288	injectglist(G *glist)
  1289	{
  1290		int32 n;
  1291		G *gp;
  1292	
  1293		if(glist == nil)
  1294			return;
  1295		runtime·lock(&runtime·sched);
  1296		for(n = 0; glist; n++) {
  1297			gp = glist;
  1298			glist = gp->schedlink;
  1299			gp->status = Grunnable;
  1300			globrunqput(gp);
  1301		}
  1302		runtime·unlock(&runtime·sched);
  1303	
  1304		for(; n && runtime·sched.npidle; n--)
  1305			startm(nil, false);
  1306	}
  1307	
  1308	// One round of scheduler: find a runnable goroutine and execute it.
  1309	// Never returns.
  1310	static void
  1311	schedule(void)
  1312	{
  1313		G *gp;
  1314		uint32 tick;
  1315	
  1316		if(m->locks)
  1317			runtime·throw("schedule: holding locks");
  1318	
  1319	top:
  1320		if(runtime·sched.gcwaiting) {
  1321			gcstopm();
  1322			goto top;
  1323		}
  1324	
  1325		gp = nil;
  1326		// Check the global runnable queue once in a while to ensure fairness.
  1327		// Otherwise two goroutines can completely occupy the local runqueue
  1328		// by constantly respawning each other.
  1329		tick = m->p->schedtick;
  1330		// This is a fancy way to say tick%61==0,
  1331		// it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
  1332		if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
  1333			runtime·lock(&runtime·sched);
  1334			gp = globrunqget(m->p, 1);
  1335			runtime·unlock(&runtime·sched);
  1336			if(gp)
  1337				resetspinning();
  1338		}
  1339		if(gp == nil) {
  1340			gp = runqget(m->p);
  1341			if(gp && m->spinning)
  1342				runtime·throw("schedule: spinning with local work");
  1343		}
  1344		if(gp == nil) {
  1345			gp = findrunnable();  // blocks until work is available
  1346			resetspinning();
  1347		}
  1348	
  1349		if(gp->lockedm) {
  1350			// Hands off own p to the locked m,
  1351			// then blocks waiting for a new p.
  1352			startlockedm(gp);
  1353			goto top;
  1354		}
  1355	
  1356		execute(gp);
  1357	}
  1358	
  1359	// Puts the current goroutine into a waiting state and calls unlockf.
  1360	// If unlockf returns false, the goroutine is resumed.
  1361	void
  1362	runtime·park(bool(*unlockf)(G*, void*), void *lock, int8 *reason)
  1363	{
  1364		if(g->status != Grunning)
  1365			runtime·throw("bad g status");
  1366		m->waitlock = lock;
  1367		m->waitunlockf = unlockf;
  1368		g->waitreason = reason;
  1369		runtime·mcall(park0);
  1370	}
  1371	
  1372	static bool
  1373	parkunlock(G *gp, void *lock)
  1374	{
  1375		USED(gp);
  1376		runtime·unlock(lock);
  1377		return true;
  1378	}
  1379	
  1380	// Puts the current goroutine into a waiting state and unlocks the lock.
  1381	// The goroutine can be made runnable again by calling runtime·ready(gp).
  1382	void
  1383	runtime·parkunlock(Lock *lock, int8 *reason)
  1384	{
  1385		runtime·park(parkunlock, lock, reason);
  1386	}
  1387	
  1388	// runtime·park continuation on g0.
  1389	static void
  1390	park0(G *gp)
  1391	{
  1392		bool ok;
  1393	
  1394		gp->status = Gwaiting;
  1395		gp->m = nil;
  1396		m->curg = nil;
  1397		if(m->waitunlockf) {
  1398			ok = m->waitunlockf(gp, m->waitlock);
  1399			m->waitunlockf = nil;
  1400			m->waitlock = nil;
  1401			if(!ok) {
  1402				gp->status = Grunnable;
  1403				execute(gp);  // Schedule it back, never returns.
  1404			}
  1405		}
  1406		if(m->lockedg) {
  1407			stoplockedm();
  1408			execute(gp);  // Never returns.
  1409		}
  1410		schedule();
  1411	}
  1412	
  1413	// Scheduler yield.
  1414	void
  1415	runtime·gosched(void)
  1416	{
  1417		if(g->status != Grunning)
  1418			runtime·throw("bad g status");
  1419		runtime·mcall(runtime·gosched0);
  1420	}
  1421	
  1422	// runtime·gosched continuation on g0.
  1423	void
  1424	runtime·gosched0(G *gp)
  1425	{
  1426		gp->status = Grunnable;
  1427		gp->m = nil;
  1428		m->curg = nil;
  1429		runtime·lock(&runtime·sched);
  1430		globrunqput(gp);
  1431		runtime·unlock(&runtime·sched);
  1432		if(m->lockedg) {
  1433			stoplockedm();
  1434			execute(gp);  // Never returns.
  1435		}
  1436		schedule();
  1437	}
  1438	
  1439	// Finishes execution of the current goroutine.
  1440	// Need to mark it as nosplit, because it runs with sp > stackbase (as runtime·lessstack).
  1441	// Since it does not return it does not matter.  But if it is preempted
  1442	// at the split stack check, GC will complain about inconsistent sp.
  1443	#pragma textflag NOSPLIT
  1444	void
  1445	runtime·goexit(void)
  1446	{
  1447		if(g->status != Grunning)
  1448			runtime·throw("bad g status");
  1449		if(raceenabled)
  1450			runtime·racegoend();
  1451		runtime·mcall(goexit0);
  1452	}
  1453	
  1454	// runtime·goexit continuation on g0.
  1455	static void
  1456	goexit0(G *gp)
  1457	{
  1458		gp->status = Gdead;
  1459		gp->m = nil;
  1460		gp->lockedm = nil;
  1461		gp->paniconfault = 0;
  1462		gp->defer = nil; // should be true already but just in case.
  1463		gp->panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
  1464		gp->writenbuf = 0;
  1465		gp->writebuf = nil;
  1466		gp->waitreason = nil;
  1467		gp->param = nil;
  1468		m->curg = nil;
  1469		m->lockedg = nil;
  1470		if(m->locked & ~LockExternal) {
  1471			runtime·printf("invalid m->locked = %d\n", m->locked);
  1472			runtime·throw("internal lockOSThread error");
  1473		}	
  1474		m->locked = 0;
  1475		runtime·unwindstack(gp, nil);
  1476		gfput(m->p, gp);
  1477		schedule();
  1478	}
  1479	
  1480	#pragma textflag NOSPLIT
  1481	static void
  1482	save(void *pc, uintptr sp)
  1483	{
  1484		g->sched.pc = (uintptr)pc;
  1485		g->sched.sp = sp;
  1486		g->sched.lr = 0;
  1487		g->sched.ret = 0;
  1488		g->sched.ctxt = 0;
  1489		g->sched.g = g;
  1490	}
  1491	
  1492	// The goroutine g is about to enter a system call.
  1493	// Record that it's not using the cpu anymore.
  1494	// This is called only from the go syscall library and cgocall,
  1495	// not from the low-level system calls used by the runtime.
  1496	//
  1497	// Entersyscall cannot split the stack: the runtime·gosave must
  1498	// make g->sched refer to the caller's stack segment, because
  1499	// entersyscall is going to return immediately after.
  1500	#pragma textflag NOSPLIT
  1501	void
  1502	·entersyscall(int32 dummy)
  1503	{
  1504		// Disable preemption because during this function g is in Gsyscall status,
  1505		// but can have inconsistent g->sched, do not let GC observe it.
  1506		m->locks++;
  1507	
  1508		// Leave SP around for GC and traceback.
  1509		save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1510		g->syscallsp = g->sched.sp;
  1511		g->syscallpc = g->sched.pc;
  1512		g->syscallstack = g->stackbase;
  1513		g->syscallguard = g->stackguard;
  1514		g->status = Gsyscall;
  1515		if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
  1516			// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
  1517			//	g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
  1518			runtime·throw("entersyscall");
  1519		}
  1520	
  1521		if(runtime·atomicload(&runtime·sched.sysmonwait)) {  // TODO: fast atomic
  1522			runtime·lock(&runtime·sched);
  1523			if(runtime·atomicload(&runtime·sched.sysmonwait)) {
  1524				runtime·atomicstore(&runtime·sched.sysmonwait, 0);
  1525				runtime·notewakeup(&runtime·sched.sysmonnote);
  1526			}
  1527			runtime·unlock(&runtime·sched);
  1528			save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1529		}
  1530	
  1531		m->mcache = nil;
  1532		m->p->m = nil;
  1533		runtime·atomicstore(&m->p->status, Psyscall);
  1534		if(runtime·sched.gcwaiting) {
  1535			runtime·lock(&runtime·sched);
  1536			if (runtime·sched.stopwait > 0 && runtime·cas(&m->p->status, Psyscall, Pgcstop)) {
  1537				if(--runtime·sched.stopwait == 0)
  1538					runtime·notewakeup(&runtime·sched.stopnote);
  1539			}
  1540			runtime·unlock(&runtime·sched);
  1541			save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1542		}
  1543	
  1544		// Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
  1545		// We set stackguard to StackPreempt so that first split stack check calls morestack.
  1546		// Morestack detects this case and throws.
  1547		g->stackguard0 = StackPreempt;
  1548		m->locks--;
  1549	}
  1550	
  1551	// The same as runtime·entersyscall(), but with a hint that the syscall is blocking.
  1552	#pragma textflag NOSPLIT
  1553	void
  1554	·entersyscallblock(int32 dummy)
  1555	{
  1556		P *p;
  1557	
  1558		m->locks++;  // see comment in entersyscall
  1559	
  1560		// Leave SP around for GC and traceback.
  1561		save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1562		g->syscallsp = g->sched.sp;
  1563		g->syscallpc = g->sched.pc;
  1564		g->syscallstack = g->stackbase;
  1565		g->syscallguard = g->stackguard;
  1566		g->status = Gsyscall;
  1567		if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
  1568			// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
  1569			//	g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
  1570			runtime·throw("entersyscallblock");
  1571		}
  1572	
  1573		p = releasep();
  1574		handoffp(p);
  1575		if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
  1576			incidlelocked(1);
  1577	
  1578		// Resave for traceback during blocked call.
  1579		save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1580	
  1581		g->stackguard0 = StackPreempt;  // see comment in entersyscall
  1582		m->locks--;
  1583	}
  1584	
  1585	// The goroutine g exited its system call.
  1586	// Arrange for it to run on a cpu again.
  1587	// This is called only from the go syscall library, not
  1588	// from the low-level system calls used by the runtime.
  1589	#pragma textflag NOSPLIT
  1590	void
  1591	runtime·exitsyscall(void)
  1592	{
  1593		m->locks++;  // see comment in entersyscall
  1594	
  1595		if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
  1596			incidlelocked(-1);
  1597	
  1598		g->waitsince = 0;
  1599		if(exitsyscallfast()) {
  1600			// There's a cpu for us, so we can run.
  1601			m->p->syscalltick++;
  1602			g->status = Grunning;
  1603			// Garbage collector isn't running (since we are),
  1604			// so okay to clear gcstack and gcsp.
  1605			g->syscallstack = (uintptr)nil;
  1606			g->syscallsp = (uintptr)nil;
  1607			m->locks--;
  1608			if(g->preempt) {
  1609				// restore the preemption request in case we've cleared it in newstack
  1610				g->stackguard0 = StackPreempt;
  1611			} else {
  1612				// otherwise restore the real stackguard, we've spoiled it in entersyscall/entersyscallblock
  1613				g->stackguard0 = g->stackguard;
  1614			}
  1615			return;
  1616		}
  1617	
  1618		m->locks--;
  1619	
  1620		// Call the scheduler.
  1621		runtime·mcall(exitsyscall0);
  1622	
  1623		// Scheduler returned, so we're allowed to run now.
  1624		// Delete the gcstack information that we left for
  1625		// the garbage collector during the system call.
  1626		// Must wait until now because until gosched returns
  1627		// we don't know for sure that the garbage collector
  1628		// is not running.
  1629		g->syscallstack = (uintptr)nil;
  1630		g->syscallsp = (uintptr)nil;
  1631		m->p->syscalltick++;
  1632	}
  1633	
  1634	#pragma textflag NOSPLIT
  1635	static bool
  1636	exitsyscallfast(void)
  1637	{
  1638		P *p;
  1639	
  1640		// Freezetheworld sets stopwait but does not retake P's.
  1641		if(runtime·sched.stopwait) {
  1642			m->p = nil;
  1643			return false;
  1644		}
  1645	
  1646		// Try to re-acquire the last P.
  1647		if(m->p && m->p->status == Psyscall && runtime·cas(&m->p->status, Psyscall, Prunning)) {
  1648			// There's a cpu for us, so we can run.
  1649			m->mcache = m->p->mcache;
  1650			m->p->m = m;
  1651			return true;
  1652		}
  1653		// Try to get any other idle P.
  1654		m->p = nil;
  1655		if(runtime·sched.pidle) {
  1656			runtime·lock(&runtime·sched);
  1657			p = pidleget();
  1658			if(p && runtime·atomicload(&runtime·sched.sysmonwait)) {
  1659				runtime·atomicstore(&runtime·sched.sysmonwait, 0);
  1660				runtime·notewakeup(&runtime·sched.sysmonnote);
  1661			}
  1662			runtime·unlock(&runtime·sched);
  1663			if(p) {
  1664				acquirep(p);
  1665				return true;
  1666			}
  1667		}
  1668		return false;
  1669	}
  1670	
  1671	// runtime·exitsyscall slow path on g0.
  1672	// Failed to acquire P, enqueue gp as runnable.
  1673	static void
  1674	exitsyscall0(G *gp)
  1675	{
  1676		P *p;
  1677	
  1678		gp->status = Grunnable;
  1679		gp->m = nil;
  1680		m->curg = nil;
  1681		runtime·lock(&runtime·sched);
  1682		p = pidleget();
  1683		if(p == nil)
  1684			globrunqput(gp);
  1685		else if(runtime·atomicload(&runtime·sched.sysmonwait)) {
  1686			runtime·atomicstore(&runtime·sched.sysmonwait, 0);
  1687			runtime·notewakeup(&runtime·sched.sysmonnote);
  1688		}
  1689		runtime·unlock(&runtime·sched);
  1690		if(p) {
  1691			acquirep(p);
  1692			execute(gp);  // Never returns.
  1693		}
  1694		if(m->lockedg) {
  1695			// Wait until another thread schedules gp and so m again.
  1696			stoplockedm();
  1697			execute(gp);  // Never returns.
  1698		}
  1699		stopm();
  1700		schedule();  // Never returns.
  1701	}
  1702	
  1703	// Called from syscall package before fork.
  1704	#pragma textflag NOSPLIT
  1705	void
  1706	syscall·runtime_BeforeFork(void)
  1707	{
  1708		// Fork can hang if preempted with signals frequently enough (see issue 5517).
  1709		// Ensure that we stay on the same M where we disable profiling.
  1710		m->locks++;
  1711		if(m->profilehz != 0)
  1712			runtime·resetcpuprofiler(0);
  1713	
  1714		// This function is called before fork in syscall package.
  1715		// Code between fork and exec must not allocate memory nor even try to grow stack.
  1716		// Here we spoil g->stackguard to reliably detect any attempts to grow stack.
  1717		// runtime_AfterFork will undo this in parent process, but not in child.
  1718		m->forkstackguard = g->stackguard;
  1719		g->stackguard0 = StackPreempt-1;
  1720		g->stackguard = StackPreempt-1;
  1721	}
  1722	
  1723	// Called from syscall package after fork in parent.
  1724	#pragma textflag NOSPLIT
  1725	void
  1726	syscall·runtime_AfterFork(void)
  1727	{
  1728		int32 hz;
  1729	
  1730		// See the comment in runtime_BeforeFork.
  1731		g->stackguard0 = m->forkstackguard;
  1732		g->stackguard = m->forkstackguard;
  1733		m->forkstackguard = 0;
  1734	
  1735		hz = runtime·sched.profilehz;
  1736		if(hz != 0)
  1737			runtime·resetcpuprofiler(hz);
  1738		m->locks--;
  1739	}
  1740	
  1741	// Hook used by runtime·malg to call runtime·stackalloc on the
  1742	// scheduler stack.  This exists because runtime·stackalloc insists
  1743	// on being called on the scheduler stack, to avoid trying to grow
  1744	// the stack while allocating a new stack segment.
  1745	static void
  1746	mstackalloc(G *gp)
  1747	{
  1748		G *newg;
  1749		uintptr size;
  1750	
  1751		newg = (G*)gp->param;
  1752		size = newg->stacksize;
  1753		newg->stacksize = 0;
  1754		gp->param = runtime·stackalloc(newg, size);
  1755		runtime·gogo(&gp->sched);
  1756	}
  1757	
  1758	// Allocate a new g, with a stack big enough for stacksize bytes.
  1759	G*
  1760	runtime·malg(int32 stacksize)
  1761	{
  1762		G *newg;
  1763		byte *stk;
  1764	
  1765		if(StackTop < sizeof(Stktop)) {
  1766			runtime·printf("runtime: SizeofStktop=%d, should be >=%d\n", (int32)StackTop, (int32)sizeof(Stktop));
  1767			runtime·throw("runtime: bad stack.h");
  1768		}
  1769	
  1770		newg = allocg();
  1771		if(stacksize >= 0) {
  1772			stacksize = runtime·round2(StackSystem + stacksize);
  1773			if(g == m->g0) {
  1774				// running on scheduler stack already.
  1775				stk = runtime·stackalloc(newg, stacksize);
  1776			} else {
  1777				// have to call stackalloc on scheduler stack.
  1778				newg->stacksize = stacksize;
  1779				g->param = newg;
  1780				runtime·mcall(mstackalloc);
  1781				stk = g->param;
  1782				g->param = nil;
  1783			}
  1784			newg->stack0 = (uintptr)stk;
  1785			newg->stackguard = (uintptr)stk + StackGuard;
  1786			newg->stackguard0 = newg->stackguard;
  1787			newg->stackbase = (uintptr)stk + stacksize - sizeof(Stktop);
  1788		}
  1789		return newg;
  1790	}
  1791	
  1792	// Create a new g running fn with siz bytes of arguments.
  1793	// Put it on the queue of g's waiting to run.
  1794	// The compiler turns a go statement into a call to this.
  1795	// Cannot split the stack because it assumes that the arguments
  1796	// are available sequentially after &fn; they would not be
  1797	// copied if a stack split occurred.  It's OK for this to call
  1798	// functions that split the stack.
  1799	#pragma textflag NOSPLIT
  1800	void
  1801	runtime·newproc(int32 siz, FuncVal* fn, ...)
  1802	{
  1803		byte *argp;
  1804	
  1805		if(thechar == '5')
  1806			argp = (byte*)(&fn+2);  // skip caller's saved LR
  1807		else
  1808			argp = (byte*)(&fn+1);
  1809		runtime·newproc1(fn, argp, siz, 0, runtime·getcallerpc(&siz));
  1810	}
  1811	
  1812	// Create a new g running fn with narg bytes of arguments starting
  1813	// at argp and returning nret bytes of results.  callerpc is the
  1814	// address of the go statement that created this.  The new g is put
  1815	// on the queue of g's waiting to run.
  1816	G*
  1817	runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
  1818	{
  1819		byte *sp;
  1820		G *newg;
  1821		P *p;
  1822		int32 siz;
  1823	
  1824	//runtime·printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
  1825		if(fn == nil) {
  1826			m->throwing = -1;  // do not dump full stacks
  1827			runtime·throw("go of nil func value");
  1828		}
  1829		m->locks++;  // disable preemption because it can be holding p in a local var
  1830		siz = narg + nret;
  1831		siz = (siz+7) & ~7;
  1832	
  1833		// We could instead create a secondary stack frame
  1834		// and make it look like goexit was on the original but
  1835		// the call to the actual goroutine function was split.
  1836		// Not worth it: this is almost always an error.
  1837		if(siz > StackMin - 1024)
  1838			runtime·throw("runtime.newproc: function arguments too large for new goroutine");
  1839	
  1840		p = m->p;
  1841		if((newg = gfget(p)) != nil) {
  1842			if(newg->stackguard - StackGuard != newg->stack0)
  1843				runtime·throw("invalid stack in newg");
  1844		} else {
  1845			newg = runtime·malg(StackMin);
  1846			allgadd(newg);
  1847		}
  1848	
  1849		sp = (byte*)newg->stackbase;
  1850		sp -= siz;
  1851		runtime·memmove(sp, argp, narg);
  1852		if(thechar == '5') {
  1853			// caller's LR
  1854			sp -= sizeof(void*);
  1855			*(void**)sp = nil;
  1856		}
  1857	
  1858		runtime·memclr((byte*)&newg->sched, sizeof newg->sched);
  1859		newg->sched.sp = (uintptr)sp;
  1860		newg->sched.pc = (uintptr)runtime·goexit;
  1861		newg->sched.g = newg;
  1862		runtime·gostartcallfn(&newg->sched, fn);
  1863		newg->gopc = (uintptr)callerpc;
  1864		newg->status = Grunnable;
  1865		if(p->goidcache == p->goidcacheend) {
  1866			p->goidcache = runtime·xadd64(&runtime·sched.goidgen, GoidCacheBatch);
  1867			p->goidcacheend = p->goidcache + GoidCacheBatch;
  1868		}
  1869		newg->goid = p->goidcache++;
  1870		newg->panicwrap = 0;
  1871		if(raceenabled)
  1872			newg->racectx = runtime·racegostart((void*)callerpc);
  1873		runqput(p, newg);
  1874	
  1875		if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main)  // TODO: fast atomic
  1876			wakep();
  1877		m->locks--;
  1878		if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
  1879			g->stackguard0 = StackPreempt;
  1880		return newg;
  1881	}
  1882	
  1883	static void
  1884	allgadd(G *gp)
  1885	{
  1886		G **new;
  1887		uintptr cap;
  1888	
  1889		runtime·lock(&allglock);
  1890		if(runtime·allglen >= allgcap) {
  1891			cap = 4096/sizeof(new[0]);
  1892			if(cap < 2*allgcap)
  1893				cap = 2*allgcap;
  1894			new = runtime·malloc(cap*sizeof(new[0]));
  1895			if(new == nil)
  1896				runtime·throw("runtime: cannot allocate memory");
  1897			if(runtime·allg != nil) {
  1898				runtime·memmove(new, runtime·allg, runtime·allglen*sizeof(new[0]));
  1899				runtime·free(runtime·allg);
  1900			}
  1901			runtime·allg = new;
  1902			allgcap = cap;
  1903		}
  1904		runtime·allg[runtime·allglen++] = gp;
  1905		runtime·unlock(&allglock);
  1906	}
  1907	
  1908	// Put on gfree list.
  1909	// If local list is too long, transfer a batch to the global list.
  1910	static void
  1911	gfput(P *p, G *gp)
  1912	{
  1913		uintptr stksize;
  1914		Stktop *top;
  1915	
  1916		if(gp->stackguard - StackGuard != gp->stack0)
  1917			runtime·throw("invalid stack in gfput");
  1918		stksize = gp->stackbase + sizeof(Stktop) - gp->stack0;
  1919		if(stksize != gp->stacksize) {
  1920			runtime·printf("runtime: bad stacksize, goroutine %D, remain=%d, last=%d\n",
  1921				gp->goid, (int32)gp->stacksize, (int32)stksize);
  1922			runtime·throw("gfput: bad stacksize");
  1923		}
  1924		top = (Stktop*)gp->stackbase;
  1925		if(top->malloced) {
  1926			// non-standard stack size - free it.
  1927			runtime·stackfree(gp, (void*)gp->stack0, top);
  1928			gp->stack0 = 0;
  1929			gp->stackguard = 0;
  1930			gp->stackguard0 = 0;
  1931			gp->stackbase = 0;
  1932		}
  1933		gp->schedlink = p->gfree;
  1934		p->gfree = gp;
  1935		p->gfreecnt++;
  1936		if(p->gfreecnt >= 64) {
  1937			runtime·lock(&runtime·sched.gflock);
  1938			while(p->gfreecnt >= 32) {
  1939				p->gfreecnt--;
  1940				gp = p->gfree;
  1941				p->gfree = gp->schedlink;
  1942				gp->schedlink = runtime·sched.gfree;
  1943				runtime·sched.gfree = gp;
  1944			}
  1945			runtime·unlock(&runtime·sched.gflock);
  1946		}
  1947	}
  1948	
  1949	// Get from gfree list.
  1950	// If local list is empty, grab a batch from global list.
  1951	static G*
  1952	gfget(P *p)
  1953	{
  1954		G *gp;
  1955		byte *stk;
  1956	
  1957	retry:
  1958		gp = p->gfree;
  1959		if(gp == nil && runtime·sched.gfree) {
  1960			runtime·lock(&runtime·sched.gflock);
  1961			while(p->gfreecnt < 32 && runtime·sched.gfree) {
  1962				p->gfreecnt++;
  1963				gp = runtime·sched.gfree;
  1964				runtime·sched.gfree = gp->schedlink;
  1965				gp->schedlink = p->gfree;
  1966				p->gfree = gp;
  1967			}
  1968			runtime·unlock(&runtime·sched.gflock);
  1969			goto retry;
  1970		}
  1971		if(gp) {
  1972			p->gfree = gp->schedlink;
  1973			p->gfreecnt--;
  1974	
  1975			if(gp->stack0 == 0) {
  1976				// Stack was deallocated in gfput.  Allocate a new one.
  1977				if(g == m->g0) {
  1978					stk = runtime·stackalloc(gp, FixedStack);
  1979				} else {
  1980					gp->stacksize = FixedStack;
  1981					g->param = gp;
  1982					runtime·mcall(mstackalloc);
  1983					stk = g->param;
  1984					g->param = nil;
  1985				}
  1986				gp->stack0 = (uintptr)stk;
  1987				gp->stackbase = (uintptr)stk + FixedStack - sizeof(Stktop);
  1988				gp->stackguard = (uintptr)stk + StackGuard;
  1989				gp->stackguard0 = gp->stackguard;
  1990			}
  1991		}
  1992		return gp;
  1993	}
  1994	
  1995	// Purge all cached G's from gfree list to the global list.
  1996	static void
  1997	gfpurge(P *p)
  1998	{
  1999		G *gp;
  2000	
  2001		runtime·lock(&runtime·sched.gflock);
  2002		while(p->gfreecnt) {
  2003			p->gfreecnt--;
  2004			gp = p->gfree;
  2005			p->gfree = gp->schedlink;
  2006			gp->schedlink = runtime·sched.gfree;
  2007			runtime·sched.gfree = gp;
  2008		}
  2009		runtime·unlock(&runtime·sched.gflock);
  2010	}
  2011	
  2012	void
  2013	runtime·Breakpoint(void)
  2014	{
  2015		runtime·breakpoint();
  2016	}
  2017	
  2018	void
  2019	runtime·Gosched(void)
  2020	{
  2021		runtime·gosched();
  2022	}
  2023	
  2024	// Implementation of runtime.GOMAXPROCS.
  2025	// delete when scheduler is even stronger
  2026	int32
  2027	runtime·gomaxprocsfunc(int32 n)
  2028	{
  2029		int32 ret;
  2030	
  2031		if(n > MaxGomaxprocs)
  2032			n = MaxGomaxprocs;
  2033		runtime·lock(&runtime·sched);
  2034		ret = runtime·gomaxprocs;
  2035		if(n <= 0 || n == ret) {
  2036			runtime·unlock(&runtime·sched);
  2037			return ret;
  2038		}
  2039		runtime·unlock(&runtime·sched);
  2040	
  2041		runtime·semacquire(&runtime·worldsema, false);
  2042		m->gcing = 1;
  2043		runtime·stoptheworld();
  2044		newprocs = n;
  2045		m->gcing = 0;
  2046		runtime·semrelease(&runtime·worldsema);
  2047		runtime·starttheworld();
  2048	
  2049		return ret;
  2050	}
  2051	
  2052	// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
  2053	// after they modify m->locked. Do not allow preemption during this call,
  2054	// or else the m might be different in this function than in the caller.
  2055	#pragma textflag NOSPLIT
  2056	static void
  2057	lockOSThread(void)
  2058	{
  2059		m->lockedg = g;
  2060		g->lockedm = m;
  2061	}
  2062	
  2063	void
  2064	runtime·LockOSThread(void)
  2065	{
  2066		m->locked |= LockExternal;
  2067		lockOSThread();
  2068	}
  2069	
  2070	void
  2071	runtime·lockOSThread(void)
  2072	{
  2073		m->locked += LockInternal;
  2074		lockOSThread();
  2075	}
  2076	
  2077	
  2078	// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
  2079	// after they update m->locked. Do not allow preemption during this call,
  2080	// or else the m might be in different in this function than in the caller.
  2081	#pragma textflag NOSPLIT
  2082	static void
  2083	unlockOSThread(void)
  2084	{
  2085		if(m->locked != 0)
  2086			return;
  2087		m->lockedg = nil;
  2088		g->lockedm = nil;
  2089	}
  2090	
  2091	void
  2092	runtime·UnlockOSThread(void)
  2093	{
  2094		m->locked &= ~LockExternal;
  2095		unlockOSThread();
  2096	}
  2097	
  2098	void
  2099	runtime·unlockOSThread(void)
  2100	{
  2101		if(m->locked < LockInternal)
  2102			runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
  2103		m->locked -= LockInternal;
  2104		unlockOSThread();
  2105	}
  2106	
  2107	bool
  2108	runtime·lockedOSThread(void)
  2109	{
  2110		return g->lockedm != nil && m->lockedg != nil;
  2111	}
  2112	
  2113	int32
  2114	runtime·gcount(void)
  2115	{
  2116		G *gp;
  2117		int32 n, s;
  2118		uintptr i;
  2119	
  2120		n = 0;
  2121		runtime·lock(&allglock);
  2122		// TODO(dvyukov): runtime.NumGoroutine() is O(N).
  2123		// We do not want to increment/decrement centralized counter in newproc/goexit,
  2124		// just to make runtime.NumGoroutine() faster.
  2125		// Compromise solution is to introduce per-P counters of active goroutines.
  2126		for(i = 0; i < runtime·allglen; i++) {
  2127			gp = runtime·allg[i];
  2128			s = gp->status;
  2129			if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
  2130				n++;
  2131		}
  2132		runtime·unlock(&allglock);
  2133		return n;
  2134	}
  2135	
  2136	int32
  2137	runtime·mcount(void)
  2138	{
  2139		return runtime·sched.mcount;
  2140	}
  2141	
  2142	void
  2143	runtime·badmcall(void (*fn)(G*))  // called from assembly
  2144	{
  2145		USED(fn); // TODO: print fn?
  2146		runtime·throw("runtime: mcall called on m->g0 stack");
  2147	}
  2148	
  2149	void
  2150	runtime·badmcall2(void (*fn)(G*))  // called from assembly
  2151	{
  2152		USED(fn);
  2153		runtime·throw("runtime: mcall function returned");
  2154	}
  2155	
  2156	void
  2157	runtime·badreflectcall(void) // called from assembly
  2158	{
  2159		runtime·panicstring("runtime: arg size to reflect.call more than 1GB");
  2160	}
  2161	
  2162	static struct {
  2163		Lock;
  2164		void (*fn)(uintptr*, int32);
  2165		int32 hz;
  2166		uintptr pcbuf[100];
  2167	} prof;
  2168	
  2169	static void System(void) {}
  2170	static void ExternalCode(void) {}
  2171	static void GC(void) {}
  2172	extern byte etext[];
  2173	
  2174	// Called if we receive a SIGPROF signal.
  2175	void
  2176	runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp)
  2177	{
  2178		int32 n;
  2179		bool traceback;
  2180		// Do not use global m in this function, use mp instead.
  2181		// On windows one m is sending reports about all the g's, so m means a wrong thing.
  2182		byte m;
  2183	
  2184		m = 0;
  2185		USED(m);
  2186	
  2187		if(prof.fn == nil || prof.hz == 0)
  2188			return;
  2189	
  2190		// Profiling runs concurrently with GC, so it must not allocate.
  2191		mp->mallocing++;
  2192	
  2193		// Define that a "user g" is a user-created goroutine, and a "system g"
  2194		// is one that is m->g0 or m->gsignal. We've only made sure that we
  2195		// can unwind user g's, so exclude the system g's.
  2196		//
  2197		// It is not quite as easy as testing gp == m->curg (the current user g)
  2198		// because we might be interrupted for profiling halfway through a
  2199		// goroutine switch. The switch involves updating three (or four) values:
  2200		// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
  2201		// because once it gets updated the new g is running.
  2202		//
  2203		// When switching from a user g to a system g, LR is not considered live,
  2204		// so the update only affects g, SP, and PC. Since PC must be last, there
  2205		// the possible partial transitions in ordinary execution are (1) g alone is updated,
  2206		// (2) both g and SP are updated, and (3) SP alone is updated.
  2207		// If g is updated, we'll see a system g and not look closer.
  2208		// If SP alone is updated, we can detect the partial transition by checking
  2209		// whether the SP is within g's stack bounds. (We could also require that SP
  2210		// be changed only after g, but the stack bounds check is needed by other
  2211		// cases, so there is no need to impose an additional requirement.)
  2212		//
  2213		// There is one exceptional transition to a system g, not in ordinary execution.
  2214		// When a signal arrives, the operating system starts the signal handler running
  2215		// with an updated PC and SP. The g is updated last, at the beginning of the
  2216		// handler. There are two reasons this is okay. First, until g is updated the
  2217		// g and SP do not match, so the stack bounds check detects the partial transition.
  2218		// Second, signal handlers currently run with signals disabled, so a profiling
  2219		// signal cannot arrive during the handler.
  2220		//
  2221		// When switching from a system g to a user g, there are three possibilities.
  2222		//
  2223		// First, it may be that the g switch has no PC update, because the SP
  2224		// either corresponds to a user g throughout (as in runtime.asmcgocall)
  2225		// or because it has been arranged to look like a user g frame
  2226		// (as in runtime.cgocallback_gofunc). In this case, since the entire
  2227		// transition is a g+SP update, a partial transition updating just one of 
  2228		// those will be detected by the stack bounds check.
  2229		//
  2230		// Second, when returning from a signal handler, the PC and SP updates
  2231		// are performed by the operating system in an atomic update, so the g
  2232		// update must be done before them. The stack bounds check detects
  2233		// the partial transition here, and (again) signal handlers run with signals
  2234		// disabled, so a profiling signal cannot arrive then anyway.
  2235		//
  2236		// Third, the common case: it may be that the switch updates g, SP, and PC
  2237		// separately, as in runtime.gogo.
  2238		//
  2239		// Because runtime.gogo is the only instance, we check whether the PC lies
  2240		// within that function, and if so, not ask for a traceback. This approach
  2241		// requires knowing the size of the runtime.gogo function, which we
  2242		// record in arch_*.h and check in runtime_test.go.
  2243		//
  2244		// There is another apparently viable approach, recorded here in case
  2245		// the "PC within runtime.gogo" check turns out not to be usable.
  2246		// It would be possible to delay the update of either g or SP until immediately
  2247		// before the PC update instruction. Then, because of the stack bounds check,
  2248		// the only problematic interrupt point is just before that PC update instruction,
  2249		// and the sigprof handler can detect that instruction and simulate stepping past
  2250		// it in order to reach a consistent state. On ARM, the update of g must be made
  2251		// in two places (in R10 and also in a TLS slot), so the delayed update would
  2252		// need to be the SP update. The sigprof handler must read the instruction at
  2253		// the current PC and if it was the known instruction (for example, JMP BX or 
  2254		// MOV R2, PC), use that other register in place of the PC value.
  2255		// The biggest drawback to this solution is that it requires that we can tell
  2256		// whether it's safe to read from the memory pointed at by PC.
  2257		// In a correct program, we can test PC == nil and otherwise read,
  2258		// but if a profiling signal happens at the instant that a program executes
  2259		// a bad jump (before the program manages to handle the resulting fault)
  2260		// the profiling handler could fault trying to read nonexistent memory.
  2261		//
  2262		// To recap, there are no constraints on the assembly being used for the
  2263		// transition. We simply require that g and SP match and that the PC is not
  2264		// in runtime.gogo.
  2265		traceback = true;
  2266		if(gp == nil || gp != mp->curg ||
  2267		   (uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr)sp ||
  2268		   ((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGogoBytes))
  2269			traceback = false;
  2270	
  2271		runtime·lock(&prof);
  2272		if(prof.fn == nil) {
  2273			runtime·unlock(&prof);
  2274			mp->mallocing--;
  2275			return;
  2276		}
  2277		n = 0;
  2278		if(traceback)
  2279			n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
  2280		if(!traceback || n <= 0) {
  2281			// Normal traceback is impossible or has failed.
  2282			// See if it falls into several common cases.
  2283			n = 0;
  2284			if(mp->ncgo > 0 && mp->curg != nil &&
  2285				mp->curg->syscallpc != 0 && mp->curg->syscallsp != 0) {
  2286				// Cgo, we can't unwind and symbolize arbitrary C code,
  2287				// so instead collect Go stack that leads to the cgo call.
  2288				// This is especially important on windows, since all syscalls are cgo calls.
  2289				n = runtime·gentraceback(mp->curg->syscallpc, mp->curg->syscallsp, 0, mp->curg, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
  2290			}
  2291	#ifdef GOOS_windows
  2292			if(n == 0 && mp->libcallg != nil && mp->libcallpc != 0 && mp->libcallsp != 0) {
  2293				// Libcall, i.e. runtime syscall on windows.
  2294				// Collect Go stack that leads to the call.
  2295				n = runtime·gentraceback(mp->libcallpc, mp->libcallsp, 0, mp->libcallg, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
  2296			}
  2297	#endif
  2298			if(n == 0) {
  2299				// If all of the above has failed, account it against abstract "System" or "GC".
  2300				n = 2;
  2301				// "ExternalCode" is better than "etext".
  2302				if((uintptr)pc > (uintptr)etext)
  2303					pc = (byte*)ExternalCode + PCQuantum;
  2304				prof.pcbuf[0] = (uintptr)pc;
  2305				if(mp->gcing || mp->helpgc)
  2306					prof.pcbuf[1] = (uintptr)GC + PCQuantum;
  2307				else
  2308					prof.pcbuf[1] = (uintptr)System + PCQuantum;
  2309			}
  2310		}
  2311		prof.fn(prof.pcbuf, n);
  2312		runtime·unlock(&prof);
  2313		mp->mallocing--;
  2314	}
  2315	
  2316	// Arrange to call fn with a traceback hz times a second.
  2317	void
  2318	runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
  2319	{
  2320		// Force sane arguments.
  2321		if(hz < 0)
  2322			hz = 0;
  2323		if(hz == 0)
  2324			fn = nil;
  2325		if(fn == nil)
  2326			hz = 0;
  2327	
  2328		// Disable preemption, otherwise we can be rescheduled to another thread
  2329		// that has profiling enabled.
  2330		m->locks++;
  2331	
  2332		// Stop profiler on this thread so that it is safe to lock prof.
  2333		// if a profiling signal came in while we had prof locked,
  2334		// it would deadlock.
  2335		runtime·resetcpuprofiler(0);
  2336	
  2337		runtime·lock(&prof);
  2338		prof.fn = fn;
  2339		prof.hz = hz;
  2340		runtime·unlock(&prof);
  2341		runtime·lock(&runtime·sched);
  2342		runtime·sched.profilehz = hz;
  2343		runtime·unlock(&runtime·sched);
  2344	
  2345		if(hz != 0)
  2346			runtime·resetcpuprofiler(hz);
  2347	
  2348		m->locks--;
  2349	}
  2350	
  2351	// Change number of processors.  The world is stopped, sched is locked.
  2352	static void
  2353	procresize(int32 new)
  2354	{
  2355		int32 i, old;
  2356		bool empty;
  2357		G *gp;
  2358		P *p;
  2359	
  2360		old = runtime·gomaxprocs;
  2361		if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
  2362			runtime·throw("procresize: invalid arg");
  2363		// initialize new P's
  2364		for(i = 0; i < new; i++) {
  2365			p = runtime·allp[i];
  2366			if(p == nil) {
  2367				p = (P*)runtime·mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
  2368				p->id = i;
  2369				p->status = Pgcstop;
  2370				runtime·atomicstorep(&runtime·allp[i], p);
  2371			}
  2372			if(p->mcache == nil) {
  2373				if(old==0 && i==0)
  2374					p->mcache = m->mcache;  // bootstrap
  2375				else
  2376					p->mcache = runtime·allocmcache();
  2377			}
  2378		}
  2379	
  2380		// redistribute runnable G's evenly
  2381		// collect all runnable goroutines in global queue preserving FIFO order
  2382		// FIFO order is required to ensure fairness even during frequent GCs
  2383		// see http://golang.org/issue/7126
  2384		empty = false;
  2385		while(!empty) {
  2386			empty = true;
  2387			for(i = 0; i < old; i++) {
  2388				p = runtime·allp[i];
  2389				if(p->runqhead == p->runqtail)
  2390					continue;
  2391				empty = false;
  2392				// pop from tail of local queue
  2393				p->runqtail--;
  2394				gp = p->runq[p->runqtail%nelem(p->runq)];
  2395				// push onto head of global queue
  2396				gp->schedlink = runtime·sched.runqhead;
  2397				runtime·sched.runqhead = gp;
  2398				if(runtime·sched.runqtail == nil)
  2399					runtime·sched.runqtail = gp;
  2400				runtime·sched.runqsize++;
  2401			}
  2402		}
  2403		// fill local queues with at most nelem(p->runq)/2 goroutines
  2404		// start at 1 because current M already executes some G and will acquire allp[0] below,
  2405		// so if we have a spare G we want to put it into allp[1].
  2406		for(i = 1; i < new * nelem(p->runq)/2 && runtime·sched.runqsize > 0; i++) {
  2407			gp = runtime·sched.runqhead;
  2408			runtime·sched.runqhead = gp->schedlink;
  2409			if(runtime·sched.runqhead == nil)
  2410				runtime·sched.runqtail = nil;
  2411			runtime·sched.runqsize--;
  2412			runqput(runtime·allp[i%new], gp);
  2413		}
  2414	
  2415		// free unused P's
  2416		for(i = new; i < old; i++) {
  2417			p = runtime·allp[i];
  2418			runtime·freemcache(p->mcache);
  2419			p->mcache = nil;
  2420			gfpurge(p);
  2421			p->status = Pdead;
  2422			// can't free P itself because it can be referenced by an M in syscall
  2423		}
  2424	
  2425		if(m->p)
  2426			m->p->m = nil;
  2427		m->p = nil;
  2428		m->mcache = nil;
  2429		p = runtime·allp[0];
  2430		p->m = nil;
  2431		p->status = Pidle;
  2432		acquirep(p);
  2433		for(i = new-1; i > 0; i--) {
  2434			p = runtime·allp[i];
  2435			p->status = Pidle;
  2436			pidleput(p);
  2437		}
  2438		runtime·atomicstore((uint32*)&runtime·gomaxprocs, new);
  2439	}
  2440	
  2441	// Associate p and the current m.
  2442	static void
  2443	acquirep(P *p)
  2444	{
  2445		if(m->p || m->mcache)
  2446			runtime·throw("acquirep: already in go");
  2447		if(p->m || p->status != Pidle) {
  2448			runtime·printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
  2449			runtime·throw("acquirep: invalid p state");
  2450		}
  2451		m->mcache = p->mcache;
  2452		m->p = p;
  2453		p->m = m;
  2454		p->status = Prunning;
  2455	}
  2456	
  2457	// Disassociate p and the current m.
  2458	static P*
  2459	releasep(void)
  2460	{
  2461		P *p;
  2462	
  2463		if(m->p == nil || m->mcache == nil)
  2464			runtime·throw("releasep: invalid arg");
  2465		p = m->p;
  2466		if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
  2467			runtime·printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
  2468				m, m->p, p->m, m->mcache, p->mcache, p->status);
  2469			runtime·throw("releasep: invalid p state");
  2470		}
  2471		m->p = nil;
  2472		m->mcache = nil;
  2473		p->m = nil;
  2474		p->status = Pidle;
  2475		return p;
  2476	}
  2477	
  2478	static void
  2479	incidlelocked(int32 v)
  2480	{
  2481		runtime·lock(&runtime·sched);
  2482		runtime·sched.nmidlelocked += v;
  2483		if(v > 0)
  2484			checkdead();
  2485		runtime·unlock(&runtime·sched);
  2486	}
  2487	
  2488	// Check for deadlock situation.
  2489	// The check is based on number of running M's, if 0 -> deadlock.
  2490	static void
  2491	checkdead(void)
  2492	{
  2493		G *gp;
  2494		int32 run, grunning, s;
  2495		uintptr i;
  2496	
  2497		// -1 for sysmon
  2498		run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidlelocked - 1;
  2499		if(run > 0)
  2500			return;
  2501		// If we are dying because of a signal caught on an already idle thread,
  2502		// freezetheworld will cause all running threads to block.
  2503		// And runtime will essentially enter into deadlock state,
  2504		// except that there is a thread that will call runtime·exit soon.
  2505		if(runtime·panicking > 0)
  2506			return;
  2507		if(run < 0) {
  2508			runtime·printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
  2509				runtime·sched.nmidle, runtime·sched.nmidlelocked, runtime·sched.mcount);
  2510			runtime·throw("checkdead: inconsistent counts");
  2511		}
  2512		grunning = 0;
  2513		runtime·lock(&allglock);
  2514		for(i = 0; i < runtime·allglen; i++) {
  2515			gp = runtime·allg[i];
  2516			if(gp->isbackground)
  2517				continue;
  2518			s = gp->status;
  2519			if(s == Gwaiting)
  2520				grunning++;
  2521			else if(s == Grunnable || s == Grunning || s == Gsyscall) {
  2522				runtime·unlock(&allglock);
  2523				runtime·printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
  2524				runtime·throw("checkdead: runnable g");
  2525			}
  2526		}
  2527		runtime·unlock(&allglock);
  2528		if(grunning == 0)  // possible if main goroutine calls runtime·Goexit()
  2529			runtime·throw("no goroutines (main called runtime.Goexit) - deadlock!");
  2530		m->throwing = -1;  // do not dump full stacks
  2531		runtime·throw("all goroutines are asleep - deadlock!");
  2532	}
  2533	
  2534	static void
  2535	sysmon(void)
  2536	{
  2537		uint32 idle, delay;
  2538		int64 now, lastpoll, lasttrace;
  2539		G *gp;
  2540	
  2541		lasttrace = 0;
  2542		idle = 0;  // how many cycles in succession we had not wokeup somebody
  2543		delay = 0;
  2544		for(;;) {
  2545			if(idle == 0)  // start with 20us sleep...
  2546				delay = 20;
  2547			else if(idle > 50)  // start doubling the sleep after 1ms...
  2548				delay *= 2;
  2549			if(delay > 10*1000)  // up to 10ms
  2550				delay = 10*1000;
  2551			runtime·usleep(delay);
  2552			if(runtime·debug.schedtrace <= 0 &&
  2553				(runtime·sched.gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs)) {  // TODO: fast atomic
  2554				runtime·lock(&runtime·sched);
  2555				if(runtime·atomicload(&runtime·sched.gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {
  2556					runtime·atomicstore(&runtime·sched.sysmonwait, 1);
  2557					runtime·unlock(&runtime·sched);
  2558					runtime·notesleep(&runtime·sched.sysmonnote);
  2559					runtime·noteclear(&runtime·sched.sysmonnote);
  2560					idle = 0;
  2561					delay = 20;
  2562				} else
  2563					runtime·unlock(&runtime·sched);
  2564			}
  2565			// poll network if not polled for more than 10ms
  2566			lastpoll = runtime·atomicload64(&runtime·sched.lastpoll);
  2567			now = runtime·nanotime();
  2568			if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
  2569				runtime·cas64(&runtime·sched.lastpoll, lastpoll, now);
  2570				gp = runtime·netpoll(false);  // non-blocking
  2571				if(gp) {
  2572					// Need to decrement number of idle locked M's
  2573					// (pretending that one more is running) before injectglist.
  2574					// Otherwise it can lead to the following situation:
  2575					// injectglist grabs all P's but before it starts M's to run the P's,
  2576					// another M returns from syscall, finishes running its G,
  2577					// observes that there is no work to do and no other running M's
  2578					// and reports deadlock.
  2579					incidlelocked(-1);
  2580					injectglist(gp);
  2581					incidlelocked(1);
  2582				}
  2583			}
  2584			// retake P's blocked in syscalls
  2585			// and preempt long running G's
  2586			if(retake(now))
  2587				idle = 0;
  2588			else
  2589				idle++;
  2590	
  2591			if(runtime·debug.schedtrace > 0 && lasttrace + runtime·debug.schedtrace*1000000ll <= now) {
  2592				lasttrace = now;
  2593				runtime·schedtrace(runtime·debug.scheddetail);
  2594			}
  2595		}
  2596	}
  2597	
  2598	typedef struct Pdesc Pdesc;
  2599	struct Pdesc
  2600	{
  2601		uint32	schedtick;
  2602		int64	schedwhen;
  2603		uint32	syscalltick;
  2604		int64	syscallwhen;
  2605	};
  2606	#pragma dataflag NOPTR
  2607	static Pdesc pdesc[MaxGomaxprocs];
  2608	
  2609	static uint32
  2610	retake(int64 now)
  2611	{
  2612		uint32 i, s, n;
  2613		int64 t;
  2614		P *p;
  2615		Pdesc *pd;
  2616	
  2617		n = 0;
  2618		for(i = 0; i < runtime·gomaxprocs; i++) {
  2619			p = runtime·allp[i];
  2620			if(p==nil)
  2621				continue;
  2622			pd = &pdesc[i];
  2623			s = p->status;
  2624			if(s == Psyscall) {
  2625				// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
  2626				t = p->syscalltick;
  2627				if(pd->syscalltick != t) {
  2628					pd->syscalltick = t;
  2629					pd->syscallwhen = now;
  2630					continue;
  2631				}
  2632				// On the one hand we don't want to retake Ps if there is no other work to do,
  2633				// but on the other hand we want to retake them eventually
  2634				// because they can prevent the sysmon thread from deep sleep.
  2635				if(p->runqhead == p->runqtail &&
  2636					runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0 &&
  2637					pd->syscallwhen + 10*1000*1000 > now)
  2638					continue;
  2639				// Need to decrement number of idle locked M's
  2640				// (pretending that one more is running) before the CAS.
  2641				// Otherwise the M from which we retake can exit the syscall,
  2642				// increment nmidle and report deadlock.
  2643				incidlelocked(-1);
  2644				if(runtime·cas(&p->status, s, Pidle)) {
  2645					n++;
  2646					handoffp(p);
  2647				}
  2648				incidlelocked(1);
  2649			} else if(s == Prunning) {
  2650				// Preempt G if it's running for more than 10ms.
  2651				t = p->schedtick;
  2652				if(pd->schedtick != t) {
  2653					pd->schedtick = t;
  2654					pd->schedwhen = now;
  2655					continue;
  2656				}
  2657				if(pd->schedwhen + 10*1000*1000 > now)
  2658					continue;
  2659				preemptone(p);
  2660			}
  2661		}
  2662		return n;
  2663	}
  2664	
  2665	// Tell all goroutines that they have been preempted and they should stop.
  2666	// This function is purely best-effort.  It can fail to inform a goroutine if a
  2667	// processor just started running it.
  2668	// No locks need to be held.
  2669	// Returns true if preemption request was issued to at least one goroutine.
  2670	static bool
  2671	preemptall(void)
  2672	{
  2673		P *p;
  2674		int32 i;
  2675		bool res;
  2676	
  2677		res = false;
  2678		for(i = 0; i < runtime·gomaxprocs; i++) {
  2679			p = runtime·allp[i];
  2680			if(p == nil || p->status != Prunning)
  2681				continue;
  2682			res |= preemptone(p);
  2683		}
  2684		return res;
  2685	}
  2686	
  2687	// Tell the goroutine running on processor P to stop.
  2688	// This function is purely best-effort.  It can incorrectly fail to inform the
  2689	// goroutine.  It can send inform the wrong goroutine.  Even if it informs the
  2690	// correct goroutine, that goroutine might ignore the request if it is
  2691	// simultaneously executing runtime·newstack.
  2692	// No lock needs to be held.
  2693	// Returns true if preemption request was issued.
  2694	static bool
  2695	preemptone(P *p)
  2696	{
  2697		M *mp;
  2698		G *gp;
  2699	
  2700		mp = p->m;
  2701		if(mp == nil || mp == m)
  2702			return false;
  2703		gp = mp->curg;
  2704		if(gp == nil || gp == mp->g0)
  2705			return false;
  2706		gp->preempt = true;
  2707		gp->stackguard0 = StackPreempt;
  2708		return true;
  2709	}
  2710	
  2711	void
  2712	runtime·schedtrace(bool detailed)
  2713	{
  2714		static int64 starttime;
  2715		int64 now;
  2716		int64 id1, id2, id3;
  2717		int32 i, t, h;
  2718		uintptr gi;
  2719		int8 *fmt;
  2720		M *mp, *lockedm;
  2721		G *gp, *lockedg;
  2722		P *p;
  2723	
  2724		now = runtime·nanotime();
  2725		if(starttime == 0)
  2726			starttime = now;
  2727	
  2728		runtime·lock(&runtime·sched);
  2729		runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
  2730			(now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidle, runtime·sched.mcount,
  2731			runtime·sched.nmidle, runtime·sched.runqsize);
  2732		if(detailed) {
  2733			runtime·printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
  2734				runtime·sched.gcwaiting, runtime·sched.nmidlelocked, runtime·sched.nmspinning,
  2735				runtime·sched.stopwait, runtime·sched.sysmonwait);
  2736		}
  2737		// We must be careful while reading data from P's, M's and G's.
  2738		// Even if we hold schedlock, most data can be changed concurrently.
  2739		// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
  2740		for(i = 0; i < runtime·gomaxprocs; i++) {
  2741			p = runtime·allp[i];
  2742			if(p == nil)
  2743				continue;
  2744			mp = p->m;
  2745			h = runtime·atomicload(&p->runqhead);
  2746			t = runtime·atomicload(&p->runqtail);
  2747			if(detailed)
  2748				runtime·printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
  2749					i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
  2750			else {
  2751				// In non-detailed mode format lengths of per-P run queues as:
  2752				// [len1 len2 len3 len4]
  2753				fmt = " %d";
  2754				if(runtime·gomaxprocs == 1)
  2755					fmt = " [%d]\n";
  2756				else if(i == 0)
  2757					fmt = " [%d";
  2758				else if(i == runtime·gomaxprocs-1)
  2759					fmt = " %d]\n";
  2760				runtime·printf(fmt, t-h);
  2761			}
  2762		}
  2763		if(!detailed) {
  2764			runtime·unlock(&runtime·sched);
  2765			return;
  2766		}
  2767		for(mp = runtime·allm; mp; mp = mp->alllink) {
  2768			p = mp->p;
  2769			gp = mp->curg;
  2770			lockedg = mp->lockedg;
  2771			id1 = -1;
  2772			if(p)
  2773				id1 = p->id;
  2774			id2 = -1;
  2775			if(gp)
  2776				id2 = gp->goid;
  2777			id3 = -1;
  2778			if(lockedg)
  2779				id3 = lockedg->goid;
  2780			runtime·printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
  2781				" locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
  2782				mp->id, id1, id2,
  2783				mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
  2784				mp->spinning, m->blocked, id3);
  2785		}
  2786		runtime·lock(&allglock);
  2787		for(gi = 0; gi < runtime·allglen; gi++) {
  2788			gp = runtime·allg[gi];
  2789			mp = gp->m;
  2790			lockedm = gp->lockedm;
  2791			runtime·printf("  G%D: status=%d(%s) m=%d lockedm=%d\n",
  2792				gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
  2793				lockedm ? lockedm->id : -1);
  2794		}
  2795		runtime·unlock(&allglock);
  2796		runtime·unlock(&runtime·sched);
  2797	}
  2798	
  2799	// Put mp on midle list.
  2800	// Sched must be locked.
  2801	static void
  2802	mput(M *mp)
  2803	{
  2804		mp->schedlink = runtime·sched.midle;
  2805		runtime·sched.midle = mp;
  2806		runtime·sched.nmidle++;
  2807		checkdead();
  2808	}
  2809	
  2810	// Try to get an m from midle list.
  2811	// Sched must be locked.
  2812	static M*
  2813	mget(void)
  2814	{
  2815		M *mp;
  2816	
  2817		if((mp = runtime·sched.midle) != nil){
  2818			runtime·sched.midle = mp->schedlink;
  2819			runtime·sched.nmidle--;
  2820		}
  2821		return mp;
  2822	}
  2823	
  2824	// Put gp on the global runnable queue.
  2825	// Sched must be locked.
  2826	static void
  2827	globrunqput(G *gp)
  2828	{
  2829		gp->schedlink = nil;
  2830		if(runtime·sched.runqtail)
  2831			runtime·sched.runqtail->schedlink = gp;
  2832		else
  2833			runtime·sched.runqhead = gp;
  2834		runtime·sched.runqtail = gp;
  2835		runtime·sched.runqsize++;
  2836	}
  2837	
  2838	// Put a batch of runnable goroutines on the global runnable queue.
  2839	// Sched must be locked.
  2840	static void
  2841	globrunqputbatch(G *ghead, G *gtail, int32 n)
  2842	{
  2843		gtail->schedlink = nil;
  2844		if(runtime·sched.runqtail)
  2845			runtime·sched.runqtail->schedlink = ghead;
  2846		else
  2847			runtime·sched.runqhead = ghead;
  2848		runtime·sched.runqtail = gtail;
  2849		runtime·sched.runqsize += n;
  2850	}
  2851	
  2852	// Try get a batch of G's from the global runnable queue.
  2853	// Sched must be locked.
  2854	static G*
  2855	globrunqget(P *p, int32 max)
  2856	{
  2857		G *gp, *gp1;
  2858		int32 n;
  2859	
  2860		if(runtime·sched.runqsize == 0)
  2861			return nil;
  2862		n = runtime·sched.runqsize/runtime·gomaxprocs+1;
  2863		if(n > runtime·sched.runqsize)
  2864			n = runtime·sched.runqsize;
  2865		if(max > 0 && n > max)
  2866			n = max;
  2867		if(n > nelem(p->runq)/2)
  2868			n = nelem(p->runq)/2;
  2869		runtime·sched.runqsize -= n;
  2870		if(runtime·sched.runqsize == 0)
  2871			runtime·sched.runqtail = nil;
  2872		gp = runtime·sched.runqhead;
  2873		runtime·sched.runqhead = gp->schedlink;
  2874		n--;
  2875		while(n--) {
  2876			gp1 = runtime·sched.runqhead;
  2877			runtime·sched.runqhead = gp1->schedlink;
  2878			runqput(p, gp1);
  2879		}
  2880		return gp;
  2881	}
  2882	
  2883	// Put p to on pidle list.
  2884	// Sched must be locked.
  2885	static void
  2886	pidleput(P *p)
  2887	{
  2888		p->link = runtime·sched.pidle;
  2889		runtime·sched.pidle = p;
  2890		runtime·xadd(&runtime·sched.npidle, 1);  // TODO: fast atomic
  2891	}
  2892	
  2893	// Try get a p from pidle list.
  2894	// Sched must be locked.
  2895	static P*
  2896	pidleget(void)
  2897	{
  2898		P *p;
  2899	
  2900		p = runtime·sched.pidle;
  2901		if(p) {
  2902			runtime·sched.pidle = p->link;
  2903			runtime·xadd(&runtime·sched.npidle, -1);  // TODO: fast atomic
  2904		}
  2905		return p;
  2906	}
  2907	
  2908	// Try to put g on local runnable queue.
  2909	// If it's full, put onto global queue.
  2910	// Executed only by the owner P.
  2911	static void
  2912	runqput(P *p, G *gp)
  2913	{
  2914		uint32 h, t;
  2915	
  2916	retry:
  2917		h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
  2918		t = p->runqtail;
  2919		if(t - h < nelem(p->runq)) {
  2920			p->runq[t%nelem(p->runq)] = gp;
  2921			runtime·atomicstore(&p->runqtail, t+1);  // store-release, makes the item available for consumption
  2922			return;
  2923		}
  2924		if(runqputslow(p, gp, h, t))
  2925			return;
  2926		// the queue is not full, now the put above must suceed
  2927		goto retry;
  2928	}
  2929	
  2930	// Put g and a batch of work from local runnable queue on global queue.
  2931	// Executed only by the owner P.
  2932	static bool
  2933	runqputslow(P *p, G *gp, uint32 h, uint32 t)
  2934	{
  2935		G *batch[nelem(p->runq)/2+1];
  2936		uint32 n, i;
  2937	
  2938		// First, grab a batch from local queue.
  2939		n = t-h;
  2940		n = n/2;
  2941		if(n != nelem(p->runq)/2)
  2942			runtime·throw("runqputslow: queue is not full");
  2943		for(i=0; i<n; i++)
  2944			batch[i] = p->runq[(h+i)%nelem(p->runq)];
  2945		if(!runtime·cas(&p->runqhead, h, h+n))  // cas-release, commits consume
  2946			return false;
  2947		batch[n] = gp;
  2948		// Link the goroutines.
  2949		for(i=0; i<n; i++)
  2950			batch[i]->schedlink = batch[i+1];
  2951		// Now put the batch on global queue.
  2952		runtime·lock(&runtime·sched);
  2953		globrunqputbatch(batch[0], batch[n], n+1);
  2954		runtime·unlock(&runtime·sched);
  2955		return true;
  2956	}
  2957	
  2958	// Get g from local runnable queue.
  2959	// Executed only by the owner P.
  2960	static G*
  2961	runqget(P *p)
  2962	{
  2963		G *gp;
  2964		uint32 t, h;
  2965	
  2966		for(;;) {
  2967			h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
  2968			t = p->runqtail;
  2969			if(t == h)
  2970				return nil;
  2971			gp = p->runq[h%nelem(p->runq)];
  2972			if(runtime·cas(&p->runqhead, h, h+1))  // cas-release, commits consume
  2973				return gp;
  2974		}
  2975	}
  2976	
  2977	// Grabs a batch of goroutines from local runnable queue.
  2978	// batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
  2979	// Can be executed by any P.
  2980	static uint32
  2981	runqgrab(P *p, G **batch)
  2982	{
  2983		uint32 t, h, n, i;
  2984	
  2985		for(;;) {
  2986			h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
  2987			t = runtime·atomicload(&p->runqtail);  // load-acquire, synchronize with the producer
  2988			n = t-h;
  2989			n = n - n/2;
  2990			if(n == 0)
  2991				break;
  2992			if(n > nelem(p->runq)/2)  // read inconsistent h and t
  2993				continue;
  2994			for(i=0; i<n; i++)
  2995				batch[i] = p->runq[(h+i)%nelem(p->runq)];
  2996			if(runtime·cas(&p->runqhead, h, h+n))  // cas-release, commits consume
  2997				break;
  2998		}
  2999		return n;
  3000	}
  3001	
  3002	// Steal half of elements from local runnable queue of p2
  3003	// and put onto local runnable queue of p.
  3004	// Returns one of the stolen elements (or nil if failed).
  3005	static G*
  3006	runqsteal(P *p, P *p2)
  3007	{
  3008		G *gp;
  3009		G *batch[nelem(p->runq)/2];
  3010		uint32 t, h, n, i;
  3011	
  3012		n = runqgrab(p2, batch);
  3013		if(n == 0)
  3014			return nil;
  3015		n--;
  3016		gp = batch[n];
  3017		if(n == 0)
  3018			return gp;
  3019		h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
  3020		t = p->runqtail;
  3021		if(t - h + n >= nelem(p->runq))
  3022			runtime·throw("runqsteal: runq overflow");
  3023		for(i=0; i<n; i++, t++)
  3024			p->runq[t%nelem(p->runq)] = batch[i];
  3025		runtime·atomicstore(&p->runqtail, t);  // store-release, makes the item available for consumption
  3026		return gp;
  3027	}
  3028	
  3029	void
  3030	runtime·testSchedLocalQueue(void)
  3031	{
  3032		P p;
  3033		G gs[nelem(p.runq)];
  3034		int32 i, j;
  3035	
  3036		runtime·memclr((byte*)&p, sizeof(p));
  3037	
  3038		for(i = 0; i < nelem(gs); i++) {
  3039			if(runqget(&p) != nil)
  3040				runtime·throw("runq is not empty initially");
  3041			for(j = 0; j < i; j++)
  3042				runqput(&p, &gs[i]);
  3043			for(j = 0; j < i; j++) {
  3044				if(runqget(&p) != &gs[i]) {
  3045					runtime·printf("bad element at iter %d/%d\n", i, j);
  3046					runtime·throw("bad element");
  3047				}
  3048			}
  3049			if(runqget(&p) != nil)
  3050				runtime·throw("runq is not empty afterwards");
  3051		}
  3052	}
  3053	
  3054	void
  3055	runtime·testSchedLocalQueueSteal(void)
  3056	{
  3057		P p1, p2;
  3058		G gs[nelem(p1.runq)], *gp;
  3059		int32 i, j, s;
  3060	
  3061		runtime·memclr((byte*)&p1, sizeof(p1));
  3062		runtime·memclr((byte*)&p2, sizeof(p2));
  3063	
  3064		for(i = 0; i < nelem(gs); i++) {
  3065			for(j = 0; j < i; j++) {
  3066				gs[j].sig = 0;
  3067				runqput(&p1, &gs[j]);
  3068			}
  3069			gp = runqsteal(&p2, &p1);
  3070			s = 0;
  3071			if(gp) {
  3072				s++;
  3073				gp->sig++;
  3074			}
  3075			while(gp = runqget(&p2)) {
  3076				s++;
  3077				gp->sig++;
  3078			}
  3079			while(gp = runqget(&p1))
  3080				gp->sig++;
  3081			for(j = 0; j < i; j++) {
  3082				if(gs[j].sig != 1) {
  3083					runtime·printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
  3084					runtime·throw("bad element");
  3085				}
  3086			}
  3087			if(s != i/2 && s != i/2+1) {
  3088				runtime·printf("bad steal %d, want %d or %d, iter %d\n",
  3089					s, i/2, i/2+1, i);
  3090				runtime·throw("bad steal");
  3091			}
  3092		}
  3093	}
  3094	
  3095	extern void runtime·morestack(void);
  3096	uintptr runtime·externalthreadhandlerp;
  3097	
  3098	// Does f mark the top of a goroutine stack?
  3099	bool
  3100	runtime·topofstack(Func *f)
  3101	{
  3102		return f->entry == (uintptr)runtime·goexit ||
  3103			f->entry == (uintptr)runtime·mstart ||
  3104			f->entry == (uintptr)runtime·mcall ||
  3105			f->entry == (uintptr)runtime·morestack ||
  3106			f->entry == (uintptr)runtime·lessstack ||
  3107			f->entry == (uintptr)_rt0_go ||
  3108			(runtime·externalthreadhandlerp != 0 && f->entry == runtime·externalthreadhandlerp);
  3109	}
  3110	
  3111	int32
  3112	runtime·setmaxthreads(int32 in)
  3113	{
  3114		int32 out;
  3115	
  3116		runtime·lock(&runtime·sched);
  3117		out = runtime·sched.maxmcount;
  3118		runtime·sched.maxmcount = in;
  3119		checkmcount();
  3120		runtime·unlock(&runtime·sched);
  3121		return out;
  3122	}
  3123	
  3124	static int8 experiment[] = GOEXPERIMENT; // defined in zaexperiment.h
  3125	
  3126	static bool
  3127	haveexperiment(int8 *name)
  3128	{
  3129		int32 i, j;
  3130		
  3131		for(i=0; i<sizeof(experiment); i++) {
  3132			if((i == 0 || experiment[i-1] == ',') && experiment[i] == name[0]) {
  3133				for(j=0; name[j]; j++)
  3134					if(experiment[i+j] != name[j])
  3135						goto nomatch;
  3136				if(experiment[i+j] != '\0' && experiment[i+j] != ',')
  3137					goto nomatch;
  3138				return 1;
  3139			}
  3140		nomatch:;
  3141		}
  3142		return 0;
  3143	}

View as plain text