...
Run Format

Text file src/pkg/runtime/proc.c

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "runtime.h"
     6	#include "arch_GOARCH.h"
     7	#include "zaexperiment.h"
     8	#include "malloc.h"
     9	#include "stack.h"
    10	#include "race.h"
    11	#include "type.h"
    12	#include "../../cmd/ld/textflag.h"
    13	
    14	// Goroutine scheduler
    15	// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
    16	//
    17	// The main concepts are:
    18	// G - goroutine.
    19	// M - worker thread, or machine.
    20	// P - processor, a resource that is required to execute Go code.
    21	//     M must have an associated P to execute Go code, however it can be
    22	//     blocked or in a syscall w/o an associated P.
    23	//
    24	// Design doc at http://golang.org/s/go11sched.
    25	
    26	typedef struct Sched Sched;
    27	struct Sched {
    28		Lock;
    29	
    30		uint64	goidgen;
    31	
    32		M*	midle;	 // idle m's waiting for work
    33		int32	nmidle;	 // number of idle m's waiting for work
    34		int32	nmidlelocked; // number of locked m's waiting for work
    35		int32	mcount;	 // number of m's that have been created
    36		int32	maxmcount;	// maximum number of m's allowed (or die)
    37	
    38		P*	pidle;  // idle P's
    39		uint32	npidle;
    40		uint32	nmspinning;
    41	
    42		// Global runnable queue.
    43		G*	runqhead;
    44		G*	runqtail;
    45		int32	runqsize;
    46	
    47		// Global cache of dead G's.
    48		Lock	gflock;
    49		G*	gfree;
    50	
    51		uint32	gcwaiting;	// gc is waiting to run
    52		int32	stopwait;
    53		Note	stopnote;
    54		uint32	sysmonwait;
    55		Note	sysmonnote;
    56		uint64	lastpoll;
    57	
    58		int32	profilehz;	// cpu profiling rate
    59	};
    60	
    61	enum
    62	{
    63		// The max value of GOMAXPROCS.
    64		// There are no fundamental restrictions on the value.
    65		MaxGomaxprocs = 1<<8,
    66	
    67		// Number of goroutine ids to grab from runtime·sched.goidgen to local per-P cache at once.
    68		// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
    69		GoidCacheBatch = 16,
    70	};
    71	
    72	Sched	runtime·sched;
    73	int32	runtime·gomaxprocs;
    74	uint32	runtime·needextram;
    75	bool	runtime·iscgo;
    76	M	runtime·m0;
    77	G	runtime·g0;	// idle goroutine for m0
    78	G*	runtime·lastg;
    79	M*	runtime·allm;
    80	M*	runtime·extram;
    81	int8*	runtime·goos;
    82	int32	runtime·ncpu;
    83	static int32	newprocs;
    84	
    85	static	Lock allglock;	// the following vars are protected by this lock or by stoptheworld
    86	G**	runtime·allg;
    87	uintptr runtime·allglen;
    88	static	uintptr allgcap;
    89	
    90	void runtime·mstart(void);
    91	static void runqput(P*, G*);
    92	static G* runqget(P*);
    93	static bool runqputslow(P*, G*, uint32, uint32);
    94	static G* runqsteal(P*, P*);
    95	static void mput(M*);
    96	static M* mget(void);
    97	static void mcommoninit(M*);
    98	static void schedule(void);
    99	static void procresize(int32);
   100	static void acquirep(P*);
   101	static P* releasep(void);
   102	static void newm(void(*)(void), P*);
   103	static void stopm(void);
   104	static void startm(P*, bool);
   105	static void handoffp(P*);
   106	static void wakep(void);
   107	static void stoplockedm(void);
   108	static void startlockedm(G*);
   109	static void sysmon(void);
   110	static uint32 retake(int64);
   111	static void incidlelocked(int32);
   112	static void checkdead(void);
   113	static void exitsyscall0(G*);
   114	static void park0(G*);
   115	static void goexit0(G*);
   116	static void gfput(P*, G*);
   117	static G* gfget(P*);
   118	static void gfpurge(P*);
   119	static void globrunqput(G*);
   120	static void globrunqputbatch(G*, G*, int32);
   121	static G* globrunqget(P*, int32);
   122	static P* pidleget(void);
   123	static void pidleput(P*);
   124	static void injectglist(G*);
   125	static bool preemptall(void);
   126	static bool preemptone(P*);
   127	static bool exitsyscallfast(void);
   128	static bool haveexperiment(int8*);
   129	static void allgadd(G*);
   130	
   131	// The bootstrap sequence is:
   132	//
   133	//	call osinit
   134	//	call schedinit
   135	//	make & queue new G
   136	//	call runtime·mstart
   137	//
   138	// The new G calls runtime·main.
   139	void
   140	runtime·schedinit(void)
   141	{
   142		int32 n, procs;
   143		byte *p;
   144		Eface i;
   145	
   146		runtime·sched.maxmcount = 10000;
   147		runtime·precisestack = true; // haveexperiment("precisestack");
   148	
   149		runtime·symtabinit();
   150		runtime·mallocinit();
   151		mcommoninit(m);
   152		
   153		// Initialize the itable value for newErrorCString,
   154		// so that the next time it gets called, possibly
   155		// in a fault during a garbage collection, it will not
   156		// need to allocated memory.
   157		runtime·newErrorCString(0, &i);
   158		
   159		// Initialize the cached gotraceback value, since
   160		// gotraceback calls getenv, which mallocs on Plan 9.
   161		runtime·gotraceback(nil);
   162	
   163		runtime·goargs();
   164		runtime·goenvs();
   165		runtime·parsedebugvars();
   166	
   167		runtime·sched.lastpoll = runtime·nanotime();
   168		procs = 1;
   169		p = runtime·getenv("GOMAXPROCS");
   170		if(p != nil && (n = runtime·atoi(p)) > 0) {
   171			if(n > MaxGomaxprocs)
   172				n = MaxGomaxprocs;
   173			procs = n;
   174		}
   175		runtime·allp = runtime·malloc((MaxGomaxprocs+1)*sizeof(runtime·allp[0]));
   176		procresize(procs);
   177	
   178		runtime·copystack = runtime·precisestack;
   179		p = runtime·getenv("GOCOPYSTACK");
   180		if(p != nil && !runtime·strcmp(p, (byte*)"0"))
   181			runtime·copystack = false;
   182	
   183		mstats.enablegc = 1;
   184	
   185		if(raceenabled)
   186			g->racectx = runtime·raceinit();
   187	}
   188	
   189	extern void main·init(void);
   190	extern void main·main(void);
   191	
   192	static FuncVal scavenger = {runtime·MHeap_Scavenger};
   193	
   194	static FuncVal initDone = { runtime·unlockOSThread };
   195	
   196	// The main goroutine.
   197	// Note: C frames in general are not copyable during stack growth, for two reasons:
   198	//   1) We don't know where in a frame to find pointers to other stack locations.
   199	//   2) There's no guarantee that globals or heap values do not point into the frame.
   200	//
   201	// The C frame for runtime.main is copyable, because:
   202	//   1) There are no pointers to other stack locations in the frame
   203	//      (d.fn points at a global, d.link is nil, d.argp is -1).
   204	//   2) The only pointer into this frame is from the defer chain,
   205	//      which is explicitly handled during stack copying.
   206	void
   207	runtime·main(void)
   208	{
   209		Defer d;
   210		
   211		// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
   212		// Using decimal instead of binary GB and MB because
   213		// they look nicer in the stack overflow failure message.
   214		if(sizeof(void*) == 8)
   215			runtime·maxstacksize = 1000000000;
   216		else
   217			runtime·maxstacksize = 250000000;
   218	
   219		newm(sysmon, nil);
   220	
   221		// Lock the main goroutine onto this, the main OS thread,
   222		// during initialization.  Most programs won't care, but a few
   223		// do require certain calls to be made by the main thread.
   224		// Those can arrange for main.main to run in the main thread
   225		// by calling runtime.LockOSThread during initialization
   226		// to preserve the lock.
   227		runtime·lockOSThread();
   228		
   229		// Defer unlock so that runtime.Goexit during init does the unlock too.
   230		d.fn = &initDone;
   231		d.siz = 0;
   232		d.link = g->defer;
   233		d.argp = NoArgs;
   234		d.special = true;
   235		g->defer = &d;
   236	
   237		if(m != &runtime·m0)
   238			runtime·throw("runtime·main not on m0");
   239		runtime·newproc1(&scavenger, nil, 0, 0, runtime·main);
   240		main·init();
   241	
   242		if(g->defer != &d || d.fn != &initDone)
   243			runtime·throw("runtime: bad defer entry after init");
   244		g->defer = d.link;
   245		runtime·unlockOSThread();
   246	
   247		main·main();
   248		if(raceenabled)
   249			runtime·racefini();
   250	
   251		// Make racy client program work: if panicking on
   252		// another goroutine at the same time as main returns,
   253		// let the other goroutine finish printing the panic trace.
   254		// Once it does, it will exit. See issue 3934.
   255		if(runtime·panicking)
   256			runtime·park(nil, nil, "panicwait");
   257	
   258		runtime·exit(0);
   259		for(;;)
   260			*(int32*)runtime·main = 0;
   261	}
   262	
   263	void
   264	runtime·goroutineheader(G *gp)
   265	{
   266		int8 *status;
   267		int64 waitfor;
   268	
   269		switch(gp->status) {
   270		case Gidle:
   271			status = "idle";
   272			break;
   273		case Grunnable:
   274			status = "runnable";
   275			break;
   276		case Grunning:
   277			status = "running";
   278			break;
   279		case Gsyscall:
   280			status = "syscall";
   281			break;
   282		case Gwaiting:
   283			if(gp->waitreason)
   284				status = gp->waitreason;
   285			else
   286				status = "waiting";
   287			break;
   288		default:
   289			status = "???";
   290			break;
   291		}
   292	
   293		// approx time the G is blocked, in minutes
   294		waitfor = 0;
   295		if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince != 0)
   296			waitfor = (runtime·nanotime() - gp->waitsince) / (60LL*1000*1000*1000);
   297	
   298		if(waitfor < 1)
   299			runtime·printf("goroutine %D [%s]:\n", gp->goid, status);
   300		else
   301			runtime·printf("goroutine %D [%s, %D minutes]:\n", gp->goid, status, waitfor);
   302	}
   303	
   304	void
   305	runtime·tracebackothers(G *me)
   306	{
   307		G *gp;
   308		int32 traceback;
   309		uintptr i;
   310	
   311		traceback = runtime·gotraceback(nil);
   312		
   313		// Show the current goroutine first, if we haven't already.
   314		if((gp = m->curg) != nil && gp != me) {
   315			runtime·printf("\n");
   316			runtime·goroutineheader(gp);
   317			runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
   318		}
   319	
   320		runtime·lock(&allglock);
   321		for(i = 0; i < runtime·allglen; i++) {
   322			gp = runtime·allg[i];
   323			if(gp == me || gp == m->curg || gp->status == Gdead)
   324				continue;
   325			if(gp->issystem && traceback < 2)
   326				continue;
   327			runtime·printf("\n");
   328			runtime·goroutineheader(gp);
   329			if(gp->status == Grunning) {
   330				runtime·printf("\tgoroutine running on other thread; stack unavailable\n");
   331				runtime·printcreatedby(gp);
   332			} else
   333				runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
   334		}
   335		runtime·unlock(&allglock);
   336	}
   337	
   338	static void
   339	checkmcount(void)
   340	{
   341		// sched lock is held
   342		if(runtime·sched.mcount > runtime·sched.maxmcount) {
   343			runtime·printf("runtime: program exceeds %d-thread limit\n", runtime·sched.maxmcount);
   344			runtime·throw("thread exhaustion");
   345		}
   346	}
   347	
   348	static void
   349	mcommoninit(M *mp)
   350	{
   351		// If there is no mcache runtime·callers() will crash,
   352		// and we are most likely in sysmon thread so the stack is senseless anyway.
   353		if(m->mcache)
   354			runtime·callers(1, mp->createstack, nelem(mp->createstack));
   355	
   356		mp->fastrand = 0x49f6428aUL + mp->id + runtime·cputicks();
   357	
   358		runtime·lock(&runtime·sched);
   359		mp->id = runtime·sched.mcount++;
   360		checkmcount();
   361		runtime·mpreinit(mp);
   362	
   363		// Add to runtime·allm so garbage collector doesn't free m
   364		// when it is just in a register or thread-local storage.
   365		mp->alllink = runtime·allm;
   366		// runtime·NumCgoCall() iterates over allm w/o schedlock,
   367		// so we need to publish it safely.
   368		runtime·atomicstorep(&runtime·allm, mp);
   369		runtime·unlock(&runtime·sched);
   370	}
   371	
   372	// Mark gp ready to run.
   373	void
   374	runtime·ready(G *gp)
   375	{
   376		// Mark runnable.
   377		m->locks++;  // disable preemption because it can be holding p in a local var
   378		if(gp->status != Gwaiting) {
   379			runtime·printf("goroutine %D has status %d\n", gp->goid, gp->status);
   380			runtime·throw("bad g->status in ready");
   381		}
   382		gp->status = Grunnable;
   383		runqput(m->p, gp);
   384		if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0)  // TODO: fast atomic
   385			wakep();
   386		m->locks--;
   387		if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
   388			g->stackguard0 = StackPreempt;
   389	}
   390	
   391	int32
   392	runtime·gcprocs(void)
   393	{
   394		int32 n;
   395	
   396		// Figure out how many CPUs to use during GC.
   397		// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
   398		runtime·lock(&runtime·sched);
   399		n = runtime·gomaxprocs;
   400		if(n > runtime·ncpu)
   401			n = runtime·ncpu;
   402		if(n > MaxGcproc)
   403			n = MaxGcproc;
   404		if(n > runtime·sched.nmidle+1) // one M is currently running
   405			n = runtime·sched.nmidle+1;
   406		runtime·unlock(&runtime·sched);
   407		return n;
   408	}
   409	
   410	static bool
   411	needaddgcproc(void)
   412	{
   413		int32 n;
   414	
   415		runtime·lock(&runtime·sched);
   416		n = runtime·gomaxprocs;
   417		if(n > runtime·ncpu)
   418			n = runtime·ncpu;
   419		if(n > MaxGcproc)
   420			n = MaxGcproc;
   421		n -= runtime·sched.nmidle+1; // one M is currently running
   422		runtime·unlock(&runtime·sched);
   423		return n > 0;
   424	}
   425	
   426	void
   427	runtime·helpgc(int32 nproc)
   428	{
   429		M *mp;
   430		int32 n, pos;
   431	
   432		runtime·lock(&runtime·sched);
   433		pos = 0;
   434		for(n = 1; n < nproc; n++) {  // one M is currently running
   435			if(runtime·allp[pos]->mcache == m->mcache)
   436				pos++;
   437			mp = mget();
   438			if(mp == nil)
   439				runtime·throw("runtime·gcprocs inconsistency");
   440			mp->helpgc = n;
   441			mp->mcache = runtime·allp[pos]->mcache;
   442			pos++;
   443			runtime·notewakeup(&mp->park);
   444		}
   445		runtime·unlock(&runtime·sched);
   446	}
   447	
   448	// Similar to stoptheworld but best-effort and can be called several times.
   449	// There is no reverse operation, used during crashing.
   450	// This function must not lock any mutexes.
   451	void
   452	runtime·freezetheworld(void)
   453	{
   454		int32 i;
   455	
   456		if(runtime·gomaxprocs == 1)
   457			return;
   458		// stopwait and preemption requests can be lost
   459		// due to races with concurrently executing threads,
   460		// so try several times
   461		for(i = 0; i < 5; i++) {
   462			// this should tell the scheduler to not start any new goroutines
   463			runtime·sched.stopwait = 0x7fffffff;
   464			runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
   465			// this should stop running goroutines
   466			if(!preemptall())
   467				break;  // no running goroutines
   468			runtime·usleep(1000);
   469		}
   470		// to be sure
   471		runtime·usleep(1000);
   472		preemptall();
   473		runtime·usleep(1000);
   474	}
   475	
   476	void
   477	runtime·stoptheworld(void)
   478	{
   479		int32 i;
   480		uint32 s;
   481		P *p;
   482		bool wait;
   483	
   484		runtime·lock(&runtime·sched);
   485		runtime·sched.stopwait = runtime·gomaxprocs;
   486		runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
   487		preemptall();
   488		// stop current P
   489		m->p->status = Pgcstop;
   490		runtime·sched.stopwait--;
   491		// try to retake all P's in Psyscall status
   492		for(i = 0; i < runtime·gomaxprocs; i++) {
   493			p = runtime·allp[i];
   494			s = p->status;
   495			if(s == Psyscall && runtime·cas(&p->status, s, Pgcstop))
   496				runtime·sched.stopwait--;
   497		}
   498		// stop idle P's
   499		while(p = pidleget()) {
   500			p->status = Pgcstop;
   501			runtime·sched.stopwait--;
   502		}
   503		wait = runtime·sched.stopwait > 0;
   504		runtime·unlock(&runtime·sched);
   505	
   506		// wait for remaining P's to stop voluntarily
   507		if(wait) {
   508			for(;;) {
   509				// wait for 100us, then try to re-preempt in case of any races
   510				if(runtime·notetsleep(&runtime·sched.stopnote, 100*1000)) {
   511					runtime·noteclear(&runtime·sched.stopnote);
   512					break;
   513				}
   514				preemptall();
   515			}
   516		}
   517		if(runtime·sched.stopwait)
   518			runtime·throw("stoptheworld: not stopped");
   519		for(i = 0; i < runtime·gomaxprocs; i++) {
   520			p = runtime·allp[i];
   521			if(p->status != Pgcstop)
   522				runtime·throw("stoptheworld: not stopped");
   523		}
   524	}
   525	
   526	static void
   527	mhelpgc(void)
   528	{
   529		m->helpgc = -1;
   530	}
   531	
   532	void
   533	runtime·starttheworld(void)
   534	{
   535		P *p, *p1;
   536		M *mp;
   537		G *gp;
   538		bool add;
   539	
   540		m->locks++;  // disable preemption because it can be holding p in a local var
   541		gp = runtime·netpoll(false);  // non-blocking
   542		injectglist(gp);
   543		add = needaddgcproc();
   544		runtime·lock(&runtime·sched);
   545		if(newprocs) {
   546			procresize(newprocs);
   547			newprocs = 0;
   548		} else
   549			procresize(runtime·gomaxprocs);
   550		runtime·sched.gcwaiting = 0;
   551	
   552		p1 = nil;
   553		while(p = pidleget()) {
   554			// procresize() puts p's with work at the beginning of the list.
   555			// Once we reach a p without a run queue, the rest don't have one either.
   556			if(p->runqhead == p->runqtail) {
   557				pidleput(p);
   558				break;
   559			}
   560			p->m = mget();
   561			p->link = p1;
   562			p1 = p;
   563		}
   564		if(runtime·sched.sysmonwait) {
   565			runtime·sched.sysmonwait = false;
   566			runtime·notewakeup(&runtime·sched.sysmonnote);
   567		}
   568		runtime·unlock(&runtime·sched);
   569	
   570		while(p1) {
   571			p = p1;
   572			p1 = p1->link;
   573			if(p->m) {
   574				mp = p->m;
   575				p->m = nil;
   576				if(mp->nextp)
   577					runtime·throw("starttheworld: inconsistent mp->nextp");
   578				mp->nextp = p;
   579				runtime·notewakeup(&mp->park);
   580			} else {
   581				// Start M to run P.  Do not start another M below.
   582				newm(nil, p);
   583				add = false;
   584			}
   585		}
   586	
   587		if(add) {
   588			// If GC could have used another helper proc, start one now,
   589			// in the hope that it will be available next time.
   590			// It would have been even better to start it before the collection,
   591			// but doing so requires allocating memory, so it's tricky to
   592			// coordinate.  This lazy approach works out in practice:
   593			// we don't mind if the first couple gc rounds don't have quite
   594			// the maximum number of procs.
   595			newm(mhelpgc, nil);
   596		}
   597		m->locks--;
   598		if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
   599			g->stackguard0 = StackPreempt;
   600	}
   601	
   602	// Called to start an M.
   603	void
   604	runtime·mstart(void)
   605	{
   606		if(g != m->g0)
   607			runtime·throw("bad runtime·mstart");
   608	
   609		// Record top of stack for use by mcall.
   610		// Once we call schedule we're never coming back,
   611		// so other calls can reuse this stack space.
   612		runtime·gosave(&m->g0->sched);
   613		m->g0->sched.pc = (uintptr)-1;  // make sure it is never used
   614		m->g0->stackguard = m->g0->stackguard0;  // cgo sets only stackguard0, copy it to stackguard
   615		runtime·asminit();
   616		runtime·minit();
   617	
   618		// Install signal handlers; after minit so that minit can
   619		// prepare the thread to be able to handle the signals.
   620		if(m == &runtime·m0)
   621			runtime·initsig();
   622		
   623		if(m->mstartfn)
   624			m->mstartfn();
   625	
   626		if(m->helpgc) {
   627			m->helpgc = 0;
   628			stopm();
   629		} else if(m != &runtime·m0) {
   630			acquirep(m->nextp);
   631			m->nextp = nil;
   632		}
   633		schedule();
   634	
   635		// TODO(brainman): This point is never reached, because scheduler
   636		// does not release os threads at the moment. But once this path
   637		// is enabled, we must remove our seh here.
   638	}
   639	
   640	// When running with cgo, we call _cgo_thread_start
   641	// to start threads for us so that we can play nicely with
   642	// foreign code.
   643	void (*_cgo_thread_start)(void*);
   644	
   645	typedef struct CgoThreadStart CgoThreadStart;
   646	struct CgoThreadStart
   647	{
   648		M *m;
   649		G *g;
   650		uintptr *tls;
   651		void (*fn)(void);
   652	};
   653	
   654	// Allocate a new m unassociated with any thread.
   655	// Can use p for allocation context if needed.
   656	M*
   657	runtime·allocm(P *p)
   658	{
   659		M *mp;
   660		static Type *mtype;  // The Go type M
   661	
   662		m->locks++;  // disable GC because it can be called from sysmon
   663		if(m->p == nil)
   664			acquirep(p);  // temporarily borrow p for mallocs in this function
   665		if(mtype == nil) {
   666			Eface e;
   667			runtime·gc_m_ptr(&e);
   668			mtype = ((PtrType*)e.type)->elem;
   669		}
   670	
   671		mp = runtime·cnew(mtype);
   672		mcommoninit(mp);
   673	
   674		// In case of cgo or Solaris, pthread_create will make us a stack.
   675		// Windows will layout sched stack on OS stack.
   676		if(runtime·iscgo || Solaris || Windows)
   677			mp->g0 = runtime·malg(-1);
   678		else
   679			mp->g0 = runtime·malg(8192);
   680	
   681		if(p == m->p)
   682			releasep();
   683		m->locks--;
   684		if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
   685			g->stackguard0 = StackPreempt;
   686	
   687		return mp;
   688	}
   689	
   690	static G*
   691	allocg(void)
   692	{
   693		G *gp;
   694		static Type *gtype;
   695		
   696		if(gtype == nil) {
   697			Eface e;
   698			runtime·gc_g_ptr(&e);
   699			gtype = ((PtrType*)e.type)->elem;
   700		}
   701		gp = runtime·cnew(gtype);
   702		return gp;
   703	}
   704	
   705	static M* lockextra(bool nilokay);
   706	static void unlockextra(M*);
   707	
   708	// needm is called when a cgo callback happens on a
   709	// thread without an m (a thread not created by Go).
   710	// In this case, needm is expected to find an m to use
   711	// and return with m, g initialized correctly.
   712	// Since m and g are not set now (likely nil, but see below)
   713	// needm is limited in what routines it can call. In particular
   714	// it can only call nosplit functions (textflag 7) and cannot
   715	// do any scheduling that requires an m.
   716	//
   717	// In order to avoid needing heavy lifting here, we adopt
   718	// the following strategy: there is a stack of available m's
   719	// that can be stolen. Using compare-and-swap
   720	// to pop from the stack has ABA races, so we simulate
   721	// a lock by doing an exchange (via casp) to steal the stack
   722	// head and replace the top pointer with MLOCKED (1).
   723	// This serves as a simple spin lock that we can use even
   724	// without an m. The thread that locks the stack in this way
   725	// unlocks the stack by storing a valid stack head pointer.
   726	//
   727	// In order to make sure that there is always an m structure
   728	// available to be stolen, we maintain the invariant that there
   729	// is always one more than needed. At the beginning of the
   730	// program (if cgo is in use) the list is seeded with a single m.
   731	// If needm finds that it has taken the last m off the list, its job
   732	// is - once it has installed its own m so that it can do things like
   733	// allocate memory - to create a spare m and put it on the list.
   734	//
   735	// Each of these extra m's also has a g0 and a curg that are
   736	// pressed into service as the scheduling stack and current
   737	// goroutine for the duration of the cgo callback.
   738	//
   739	// When the callback is done with the m, it calls dropm to
   740	// put the m back on the list.
   741	#pragma textflag NOSPLIT
   742	void
   743	runtime·needm(byte x)
   744	{
   745		M *mp;
   746	
   747		if(runtime·needextram) {
   748			// Can happen if C/C++ code calls Go from a global ctor.
   749			// Can not throw, because scheduler is not initialized yet.
   750			runtime·write(2, "fatal error: cgo callback before cgo call\n",
   751				sizeof("fatal error: cgo callback before cgo call\n")-1);
   752			runtime·exit(1);
   753		}
   754	
   755		// Lock extra list, take head, unlock popped list.
   756		// nilokay=false is safe here because of the invariant above,
   757		// that the extra list always contains or will soon contain
   758		// at least one m.
   759		mp = lockextra(false);
   760	
   761		// Set needextram when we've just emptied the list,
   762		// so that the eventual call into cgocallbackg will
   763		// allocate a new m for the extra list. We delay the
   764		// allocation until then so that it can be done
   765		// after exitsyscall makes sure it is okay to be
   766		// running at all (that is, there's no garbage collection
   767		// running right now).
   768		mp->needextram = mp->schedlink == nil;
   769		unlockextra(mp->schedlink);
   770	
   771		// Install m and g (= m->g0) and set the stack bounds
   772		// to match the current stack. We don't actually know
   773		// how big the stack is, like we don't know how big any
   774		// scheduling stack is, but we assume there's at least 32 kB,
   775		// which is more than enough for us.
   776		runtime·setmg(mp, mp->g0);
   777		g->stackbase = (uintptr)(&x + 1024);
   778		g->stackguard = (uintptr)(&x - 32*1024);
   779		g->stackguard0 = g->stackguard;
   780	
   781		// Initialize this thread to use the m.
   782		runtime·asminit();
   783		runtime·minit();
   784	}
   785	
   786	// newextram allocates an m and puts it on the extra list.
   787	// It is called with a working local m, so that it can do things
   788	// like call schedlock and allocate.
   789	void
   790	runtime·newextram(void)
   791	{
   792		M *mp, *mnext;
   793		G *gp;
   794	
   795		// Create extra goroutine locked to extra m.
   796		// The goroutine is the context in which the cgo callback will run.
   797		// The sched.pc will never be returned to, but setting it to
   798		// runtime.goexit makes clear to the traceback routines where
   799		// the goroutine stack ends.
   800		mp = runtime·allocm(nil);
   801		gp = runtime·malg(4096);
   802		gp->sched.pc = (uintptr)runtime·goexit;
   803		gp->sched.sp = gp->stackbase;
   804		gp->sched.lr = 0;
   805		gp->sched.g = gp;
   806		gp->syscallpc = gp->sched.pc;
   807		gp->syscallsp = gp->sched.sp;
   808		gp->syscallstack = gp->stackbase;
   809		gp->syscallguard = gp->stackguard;
   810		gp->status = Gsyscall;
   811		mp->curg = gp;
   812		mp->locked = LockInternal;
   813		mp->lockedg = gp;
   814		gp->lockedm = mp;
   815		gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
   816		if(raceenabled)
   817			gp->racectx = runtime·racegostart(runtime·newextram);
   818		// put on allg for garbage collector
   819		allgadd(gp);
   820	
   821		// Add m to the extra list.
   822		mnext = lockextra(true);
   823		mp->schedlink = mnext;
   824		unlockextra(mp);
   825	}
   826	
   827	// dropm is called when a cgo callback has called needm but is now
   828	// done with the callback and returning back into the non-Go thread.
   829	// It puts the current m back onto the extra list.
   830	//
   831	// The main expense here is the call to signalstack to release the
   832	// m's signal stack, and then the call to needm on the next callback
   833	// from this thread. It is tempting to try to save the m for next time,
   834	// which would eliminate both these costs, but there might not be
   835	// a next time: the current thread (which Go does not control) might exit.
   836	// If we saved the m for that thread, there would be an m leak each time
   837	// such a thread exited. Instead, we acquire and release an m on each
   838	// call. These should typically not be scheduling operations, just a few
   839	// atomics, so the cost should be small.
   840	//
   841	// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
   842	// variable using pthread_key_create. Unlike the pthread keys we already use
   843	// on OS X, this dummy key would never be read by Go code. It would exist
   844	// only so that we could register at thread-exit-time destructor.
   845	// That destructor would put the m back onto the extra list.
   846	// This is purely a performance optimization. The current version,
   847	// in which dropm happens on each cgo call, is still correct too.
   848	// We may have to keep the current version on systems with cgo
   849	// but without pthreads, like Windows.
   850	void
   851	runtime·dropm(void)
   852	{
   853		M *mp, *mnext;
   854	
   855		// Undo whatever initialization minit did during needm.
   856		runtime·unminit();
   857	
   858		// Clear m and g, and return m to the extra list.
   859		// After the call to setmg we can only call nosplit functions.
   860		mp = m;
   861		runtime·setmg(nil, nil);
   862	
   863		mnext = lockextra(true);
   864		mp->schedlink = mnext;
   865		unlockextra(mp);
   866	}
   867	
   868	#define MLOCKED ((M*)1)
   869	
   870	// lockextra locks the extra list and returns the list head.
   871	// The caller must unlock the list by storing a new list head
   872	// to runtime.extram. If nilokay is true, then lockextra will
   873	// return a nil list head if that's what it finds. If nilokay is false,
   874	// lockextra will keep waiting until the list head is no longer nil.
   875	#pragma textflag NOSPLIT
   876	static M*
   877	lockextra(bool nilokay)
   878	{
   879		M *mp;
   880		void (*yield)(void);
   881	
   882		for(;;) {
   883			mp = runtime·atomicloadp(&runtime·extram);
   884			if(mp == MLOCKED) {
   885				yield = runtime·osyield;
   886				yield();
   887				continue;
   888			}
   889			if(mp == nil && !nilokay) {
   890				runtime·usleep(1);
   891				continue;
   892			}
   893			if(!runtime·casp(&runtime·extram, mp, MLOCKED)) {
   894				yield = runtime·osyield;
   895				yield();
   896				continue;
   897			}
   898			break;
   899		}
   900		return mp;
   901	}
   902	
   903	#pragma textflag NOSPLIT
   904	static void
   905	unlockextra(M *mp)
   906	{
   907		runtime·atomicstorep(&runtime·extram, mp);
   908	}
   909	
   910	
   911	// Create a new m.  It will start off with a call to fn, or else the scheduler.
   912	static void
   913	newm(void(*fn)(void), P *p)
   914	{
   915		M *mp;
   916	
   917		mp = runtime·allocm(p);
   918		mp->nextp = p;
   919		mp->mstartfn = fn;
   920	
   921		if(runtime·iscgo) {
   922			CgoThreadStart ts;
   923	
   924			if(_cgo_thread_start == nil)
   925				runtime·throw("_cgo_thread_start missing");
   926			ts.m = mp;
   927			ts.g = mp->g0;
   928			ts.tls = mp->tls;
   929			ts.fn = runtime·mstart;
   930			runtime·asmcgocall(_cgo_thread_start, &ts);
   931			return;
   932		}
   933		runtime·newosproc(mp, (byte*)mp->g0->stackbase);
   934	}
   935	
   936	// Stops execution of the current m until new work is available.
   937	// Returns with acquired P.
   938	static void
   939	stopm(void)
   940	{
   941		if(m->locks)
   942			runtime·throw("stopm holding locks");
   943		if(m->p)
   944			runtime·throw("stopm holding p");
   945		if(m->spinning) {
   946			m->spinning = false;
   947			runtime·xadd(&runtime·sched.nmspinning, -1);
   948		}
   949	
   950	retry:
   951		runtime·lock(&runtime·sched);
   952		mput(m);
   953		runtime·unlock(&runtime·sched);
   954		runtime·notesleep(&m->park);
   955		runtime·noteclear(&m->park);
   956		if(m->helpgc) {
   957			runtime·gchelper();
   958			m->helpgc = 0;
   959			m->mcache = nil;
   960			goto retry;
   961		}
   962		acquirep(m->nextp);
   963		m->nextp = nil;
   964	}
   965	
   966	static void
   967	mspinning(void)
   968	{
   969		m->spinning = true;
   970	}
   971	
   972	// Schedules some M to run the p (creates an M if necessary).
   973	// If p==nil, tries to get an idle P, if no idle P's does nothing.
   974	static void
   975	startm(P *p, bool spinning)
   976	{
   977		M *mp;
   978		void (*fn)(void);
   979	
   980		runtime·lock(&runtime·sched);
   981		if(p == nil) {
   982			p = pidleget();
   983			if(p == nil) {
   984				runtime·unlock(&runtime·sched);
   985				if(spinning)
   986					runtime·xadd(&runtime·sched.nmspinning, -1);
   987				return;
   988			}
   989		}
   990		mp = mget();
   991		runtime·unlock(&runtime·sched);
   992		if(mp == nil) {
   993			fn = nil;
   994			if(spinning)
   995				fn = mspinning;
   996			newm(fn, p);
   997			return;
   998		}
   999		if(mp->spinning)
  1000			runtime·throw("startm: m is spinning");
  1001		if(mp->nextp)
  1002			runtime·throw("startm: m has p");
  1003		mp->spinning = spinning;
  1004		mp->nextp = p;
  1005		runtime·notewakeup(&mp->park);
  1006	}
  1007	
  1008	// Hands off P from syscall or locked M.
  1009	static void
  1010	handoffp(P *p)
  1011	{
  1012		// if it has local work, start it straight away
  1013		if(p->runqhead != p->runqtail || runtime·sched.runqsize) {
  1014			startm(p, false);
  1015			return;
  1016		}
  1017		// no local work, check that there are no spinning/idle M's,
  1018		// otherwise our help is not required
  1019		if(runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) == 0 &&  // TODO: fast atomic
  1020			runtime·cas(&runtime·sched.nmspinning, 0, 1)) {
  1021			startm(p, true);
  1022			return;
  1023		}
  1024		runtime·lock(&runtime·sched);
  1025		if(runtime·sched.gcwaiting) {
  1026			p->status = Pgcstop;
  1027			if(--runtime·sched.stopwait == 0)
  1028				runtime·notewakeup(&runtime·sched.stopnote);
  1029			runtime·unlock(&runtime·sched);
  1030			return;
  1031		}
  1032		if(runtime·sched.runqsize) {
  1033			runtime·unlock(&runtime·sched);
  1034			startm(p, false);
  1035			return;
  1036		}
  1037		// If this is the last running P and nobody is polling network,
  1038		// need to wakeup another M to poll network.
  1039		if(runtime·sched.npidle == runtime·gomaxprocs-1 && runtime·atomicload64(&runtime·sched.lastpoll) != 0) {
  1040			runtime·unlock(&runtime·sched);
  1041			startm(p, false);
  1042			return;
  1043		}
  1044		pidleput(p);
  1045		runtime·unlock(&runtime·sched);
  1046	}
  1047	
  1048	// Tries to add one more P to execute G's.
  1049	// Called when a G is made runnable (newproc, ready).
  1050	static void
  1051	wakep(void)
  1052	{
  1053		// be conservative about spinning threads
  1054		if(!runtime·cas(&runtime·sched.nmspinning, 0, 1))
  1055			return;
  1056		startm(nil, true);
  1057	}
  1058	
  1059	// Stops execution of the current m that is locked to a g until the g is runnable again.
  1060	// Returns with acquired P.
  1061	static void
  1062	stoplockedm(void)
  1063	{
  1064		P *p;
  1065	
  1066		if(m->lockedg == nil || m->lockedg->lockedm != m)
  1067			runtime·throw("stoplockedm: inconsistent locking");
  1068		if(m->p) {
  1069			// Schedule another M to run this p.
  1070			p = releasep();
  1071			handoffp(p);
  1072		}
  1073		incidlelocked(1);
  1074		// Wait until another thread schedules lockedg again.
  1075		runtime·notesleep(&m->park);
  1076		runtime·noteclear(&m->park);
  1077		if(m->lockedg->status != Grunnable)
  1078			runtime·throw("stoplockedm: not runnable");
  1079		acquirep(m->nextp);
  1080		m->nextp = nil;
  1081	}
  1082	
  1083	// Schedules the locked m to run the locked gp.
  1084	static void
  1085	startlockedm(G *gp)
  1086	{
  1087		M *mp;
  1088		P *p;
  1089	
  1090		mp = gp->lockedm;
  1091		if(mp == m)
  1092			runtime·throw("startlockedm: locked to me");
  1093		if(mp->nextp)
  1094			runtime·throw("startlockedm: m has p");
  1095		// directly handoff current P to the locked m
  1096		incidlelocked(-1);
  1097		p = releasep();
  1098		mp->nextp = p;
  1099		runtime·notewakeup(&mp->park);
  1100		stopm();
  1101	}
  1102	
  1103	// Stops the current m for stoptheworld.
  1104	// Returns when the world is restarted.
  1105	static void
  1106	gcstopm(void)
  1107	{
  1108		P *p;
  1109	
  1110		if(!runtime·sched.gcwaiting)
  1111			runtime·throw("gcstopm: not waiting for gc");
  1112		if(m->spinning) {
  1113			m->spinning = false;
  1114			runtime·xadd(&runtime·sched.nmspinning, -1);
  1115		}
  1116		p = releasep();
  1117		runtime·lock(&runtime·sched);
  1118		p->status = Pgcstop;
  1119		if(--runtime·sched.stopwait == 0)
  1120			runtime·notewakeup(&runtime·sched.stopnote);
  1121		runtime·unlock(&runtime·sched);
  1122		stopm();
  1123	}
  1124	
  1125	// Schedules gp to run on the current M.
  1126	// Never returns.
  1127	static void
  1128	execute(G *gp)
  1129	{
  1130		int32 hz;
  1131	
  1132		if(gp->status != Grunnable) {
  1133			runtime·printf("execute: bad g status %d\n", gp->status);
  1134			runtime·throw("execute: bad g status");
  1135		}
  1136		gp->status = Grunning;
  1137		gp->waitsince = 0;
  1138		gp->preempt = false;
  1139		gp->stackguard0 = gp->stackguard;
  1140		m->p->schedtick++;
  1141		m->curg = gp;
  1142		gp->m = m;
  1143	
  1144		// Check whether the profiler needs to be turned on or off.
  1145		hz = runtime·sched.profilehz;
  1146		if(m->profilehz != hz)
  1147			runtime·resetcpuprofiler(hz);
  1148	
  1149		runtime·gogo(&gp->sched);
  1150	}
  1151	
  1152	// Finds a runnable goroutine to execute.
  1153	// Tries to steal from other P's, get g from global queue, poll network.
  1154	static G*
  1155	findrunnable(void)
  1156	{
  1157		G *gp;
  1158		P *p;
  1159		int32 i;
  1160	
  1161	top:
  1162		if(runtime·sched.gcwaiting) {
  1163			gcstopm();
  1164			goto top;
  1165		}
  1166		if(runtime·fingwait && runtime·fingwake && (gp = runtime·wakefing()) != nil)
  1167			runtime·ready(gp);
  1168		// local runq
  1169		gp = runqget(m->p);
  1170		if(gp)
  1171			return gp;
  1172		// global runq
  1173		if(runtime·sched.runqsize) {
  1174			runtime·lock(&runtime·sched);
  1175			gp = globrunqget(m->p, 0);
  1176			runtime·unlock(&runtime·sched);
  1177			if(gp)
  1178				return gp;
  1179		}
  1180		// poll network
  1181		gp = runtime·netpoll(false);  // non-blocking
  1182		if(gp) {
  1183			injectglist(gp->schedlink);
  1184			gp->status = Grunnable;
  1185			return gp;
  1186		}
  1187		// If number of spinning M's >= number of busy P's, block.
  1188		// This is necessary to prevent excessive CPU consumption
  1189		// when GOMAXPROCS>>1 but the program parallelism is low.
  1190		if(!m->spinning && 2 * runtime·atomicload(&runtime·sched.nmspinning) >= runtime·gomaxprocs - runtime·atomicload(&runtime·sched.npidle))  // TODO: fast atomic
  1191			goto stop;
  1192		if(!m->spinning) {
  1193			m->spinning = true;
  1194			runtime·xadd(&runtime·sched.nmspinning, 1);
  1195		}
  1196		// random steal from other P's
  1197		for(i = 0; i < 2*runtime·gomaxprocs; i++) {
  1198			if(runtime·sched.gcwaiting)
  1199				goto top;
  1200			p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs];
  1201			if(p == m->p)
  1202				gp = runqget(p);
  1203			else
  1204				gp = runqsteal(m->p, p);
  1205			if(gp)
  1206				return gp;
  1207		}
  1208	stop:
  1209		// return P and block
  1210		runtime·lock(&runtime·sched);
  1211		if(runtime·sched.gcwaiting) {
  1212			runtime·unlock(&runtime·sched);
  1213			goto top;
  1214		}
  1215		if(runtime·sched.runqsize) {
  1216			gp = globrunqget(m->p, 0);
  1217			runtime·unlock(&runtime·sched);
  1218			return gp;
  1219		}
  1220		p = releasep();
  1221		pidleput(p);
  1222		runtime·unlock(&runtime·sched);
  1223		if(m->spinning) {
  1224			m->spinning = false;
  1225			runtime·xadd(&runtime·sched.nmspinning, -1);
  1226		}
  1227		// check all runqueues once again
  1228		for(i = 0; i < runtime·gomaxprocs; i++) {
  1229			p = runtime·allp[i];
  1230			if(p && p->runqhead != p->runqtail) {
  1231				runtime·lock(&runtime·sched);
  1232				p = pidleget();
  1233				runtime·unlock(&runtime·sched);
  1234				if(p) {
  1235					acquirep(p);
  1236					goto top;
  1237				}
  1238				break;
  1239			}
  1240		}
  1241		// poll network
  1242		if(runtime·xchg64(&runtime·sched.lastpoll, 0) != 0) {
  1243			if(m->p)
  1244				runtime·throw("findrunnable: netpoll with p");
  1245			if(m->spinning)
  1246				runtime·throw("findrunnable: netpoll with spinning");
  1247			gp = runtime·netpoll(true);  // block until new work is available
  1248			runtime·atomicstore64(&runtime·sched.lastpoll, runtime·nanotime());
  1249			if(gp) {
  1250				runtime·lock(&runtime·sched);
  1251				p = pidleget();
  1252				runtime·unlock(&runtime·sched);
  1253				if(p) {
  1254					acquirep(p);
  1255					injectglist(gp->schedlink);
  1256					gp->status = Grunnable;
  1257					return gp;
  1258				}
  1259				injectglist(gp);
  1260			}
  1261		}
  1262		stopm();
  1263		goto top;
  1264	}
  1265	
  1266	static void
  1267	resetspinning(void)
  1268	{
  1269		int32 nmspinning;
  1270	
  1271		if(m->spinning) {
  1272			m->spinning = false;
  1273			nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1);
  1274			if(nmspinning < 0)
  1275				runtime·throw("findrunnable: negative nmspinning");
  1276		} else
  1277			nmspinning = runtime·atomicload(&runtime·sched.nmspinning);
  1278	
  1279		// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
  1280		// so see if we need to wakeup another P here.
  1281		if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0)
  1282			wakep();
  1283	}
  1284	
  1285	// Injects the list of runnable G's into the scheduler.
  1286	// Can run concurrently with GC.
  1287	static void
  1288	injectglist(G *glist)
  1289	{
  1290		int32 n;
  1291		G *gp;
  1292	
  1293		if(glist == nil)
  1294			return;
  1295		runtime·lock(&runtime·sched);
  1296		for(n = 0; glist; n++) {
  1297			gp = glist;
  1298			glist = gp->schedlink;
  1299			gp->status = Grunnable;
  1300			globrunqput(gp);
  1301		}
  1302		runtime·unlock(&runtime·sched);
  1303	
  1304		for(; n && runtime·sched.npidle; n--)
  1305			startm(nil, false);
  1306	}
  1307	
  1308	// One round of scheduler: find a runnable goroutine and execute it.
  1309	// Never returns.
  1310	static void
  1311	schedule(void)
  1312	{
  1313		G *gp;
  1314		uint32 tick;
  1315	
  1316		if(m->locks)
  1317			runtime·throw("schedule: holding locks");
  1318	
  1319	top:
  1320		if(runtime·sched.gcwaiting) {
  1321			gcstopm();
  1322			goto top;
  1323		}
  1324	
  1325		gp = nil;
  1326		// Check the global runnable queue once in a while to ensure fairness.
  1327		// Otherwise two goroutines can completely occupy the local runqueue
  1328		// by constantly respawning each other.
  1329		tick = m->p->schedtick;
  1330		// This is a fancy way to say tick%61==0,
  1331		// it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
  1332		if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
  1333			runtime·lock(&runtime·sched);
  1334			gp = globrunqget(m->p, 1);
  1335			runtime·unlock(&runtime·sched);
  1336			if(gp)
  1337				resetspinning();
  1338		}
  1339		if(gp == nil) {
  1340			gp = runqget(m->p);
  1341			if(gp && m->spinning)
  1342				runtime·throw("schedule: spinning with local work");
  1343		}
  1344		if(gp == nil) {
  1345			gp = findrunnable();  // blocks until work is available
  1346			resetspinning();
  1347		}
  1348	
  1349		if(gp->lockedm) {
  1350			// Hands off own p to the locked m,
  1351			// then blocks waiting for a new p.
  1352			startlockedm(gp);
  1353			goto top;
  1354		}
  1355	
  1356		execute(gp);
  1357	}
  1358	
  1359	// Puts the current goroutine into a waiting state and calls unlockf.
  1360	// If unlockf returns false, the goroutine is resumed.
  1361	void
  1362	runtime·park(bool(*unlockf)(G*, void*), void *lock, int8 *reason)
  1363	{
  1364		if(g->status != Grunning)
  1365			runtime·throw("bad g status");
  1366		m->waitlock = lock;
  1367		m->waitunlockf = unlockf;
  1368		g->waitreason = reason;
  1369		runtime·mcall(park0);
  1370	}
  1371	
  1372	static bool
  1373	parkunlock(G *gp, void *lock)
  1374	{
  1375		USED(gp);
  1376		runtime·unlock(lock);
  1377		return true;
  1378	}
  1379	
  1380	// Puts the current goroutine into a waiting state and unlocks the lock.
  1381	// The goroutine can be made runnable again by calling runtime·ready(gp).
  1382	void
  1383	runtime·parkunlock(Lock *lock, int8 *reason)
  1384	{
  1385		runtime·park(parkunlock, lock, reason);
  1386	}
  1387	
  1388	// runtime·park continuation on g0.
  1389	static void
  1390	park0(G *gp)
  1391	{
  1392		bool ok;
  1393	
  1394		gp->status = Gwaiting;
  1395		gp->m = nil;
  1396		m->curg = nil;
  1397		if(m->waitunlockf) {
  1398			ok = m->waitunlockf(gp, m->waitlock);
  1399			m->waitunlockf = nil;
  1400			m->waitlock = nil;
  1401			if(!ok) {
  1402				gp->status = Grunnable;
  1403				execute(gp);  // Schedule it back, never returns.
  1404			}
  1405		}
  1406		if(m->lockedg) {
  1407			stoplockedm();
  1408			execute(gp);  // Never returns.
  1409		}
  1410		schedule();
  1411	}
  1412	
  1413	// Scheduler yield.
  1414	void
  1415	runtime·gosched(void)
  1416	{
  1417		if(g->status != Grunning)
  1418			runtime·throw("bad g status");
  1419		runtime·mcall(runtime·gosched0);
  1420	}
  1421	
  1422	// runtime·gosched continuation on g0.
  1423	void
  1424	runtime·gosched0(G *gp)
  1425	{
  1426		gp->status = Grunnable;
  1427		gp->m = nil;
  1428		m->curg = nil;
  1429		runtime·lock(&runtime·sched);
  1430		globrunqput(gp);
  1431		runtime·unlock(&runtime·sched);
  1432		if(m->lockedg) {
  1433			stoplockedm();
  1434			execute(gp);  // Never returns.
  1435		}
  1436		schedule();
  1437	}
  1438	
  1439	// Finishes execution of the current goroutine.
  1440	// Need to mark it as nosplit, because it runs with sp > stackbase (as runtime·lessstack).
  1441	// Since it does not return it does not matter.  But if it is preempted
  1442	// at the split stack check, GC will complain about inconsistent sp.
  1443	#pragma textflag NOSPLIT
  1444	void
  1445	runtime·goexit(void)
  1446	{
  1447		if(g->status != Grunning)
  1448			runtime·throw("bad g status");
  1449		if(raceenabled)
  1450			runtime·racegoend();
  1451		runtime·mcall(goexit0);
  1452	}
  1453	
  1454	// runtime·goexit continuation on g0.
  1455	static void
  1456	goexit0(G *gp)
  1457	{
  1458		gp->status = Gdead;
  1459		gp->m = nil;
  1460		gp->lockedm = nil;
  1461		gp->paniconfault = 0;
  1462		gp->defer = nil; // should be true already but just in case.
  1463		gp->panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
  1464		gp->writenbuf = 0;
  1465		gp->writebuf = nil;
  1466		gp->waitreason = nil;
  1467		gp->param = nil;
  1468		m->curg = nil;
  1469		m->lockedg = nil;
  1470		if(m->locked & ~LockExternal) {
  1471			runtime·printf("invalid m->locked = %d\n", m->locked);
  1472			runtime·throw("internal lockOSThread error");
  1473		}	
  1474		m->locked = 0;
  1475		runtime·unwindstack(gp, nil);
  1476		gfput(m->p, gp);
  1477		schedule();
  1478	}
  1479	
  1480	#pragma textflag NOSPLIT
  1481	static void
  1482	save(void *pc, uintptr sp)
  1483	{
  1484		g->sched.pc = (uintptr)pc;
  1485		g->sched.sp = sp;
  1486		g->sched.lr = 0;
  1487		g->sched.ret = 0;
  1488		g->sched.ctxt = 0;
  1489		g->sched.g = g;
  1490	}
  1491	
  1492	// The goroutine g is about to enter a system call.
  1493	// Record that it's not using the cpu anymore.
  1494	// This is called only from the go syscall library and cgocall,
  1495	// not from the low-level system calls used by the runtime.
  1496	//
  1497	// Entersyscall cannot split the stack: the runtime·gosave must
  1498	// make g->sched refer to the caller's stack segment, because
  1499	// entersyscall is going to return immediately after.
  1500	#pragma textflag NOSPLIT
  1501	void
  1502	runtime·reentersyscall(void *pc, uintptr sp)
  1503	{
  1504		// Disable preemption because during this function g is in Gsyscall status,
  1505		// but can have inconsistent g->sched, do not let GC observe it.
  1506		m->locks++;
  1507	
  1508		// Leave SP around for GC and traceback.
  1509		save(pc, sp);
  1510		g->syscallsp = g->sched.sp;
  1511		g->syscallpc = g->sched.pc;
  1512		g->syscallstack = g->stackbase;
  1513		g->syscallguard = g->stackguard;
  1514		g->status = Gsyscall;
  1515		if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
  1516			// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
  1517			//	g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
  1518			runtime·throw("entersyscall");
  1519		}
  1520	
  1521		if(runtime·atomicload(&runtime·sched.sysmonwait)) {  // TODO: fast atomic
  1522			runtime·lock(&runtime·sched);
  1523			if(runtime·atomicload(&runtime·sched.sysmonwait)) {
  1524				runtime·atomicstore(&runtime·sched.sysmonwait, 0);
  1525				runtime·notewakeup(&runtime·sched.sysmonnote);
  1526			}
  1527			runtime·unlock(&runtime·sched);
  1528			save(pc, sp);
  1529		}
  1530	
  1531		m->mcache = nil;
  1532		m->p->m = nil;
  1533		runtime·atomicstore(&m->p->status, Psyscall);
  1534		if(runtime·sched.gcwaiting) {
  1535			runtime·lock(&runtime·sched);
  1536			if (runtime·sched.stopwait > 0 && runtime·cas(&m->p->status, Psyscall, Pgcstop)) {
  1537				if(--runtime·sched.stopwait == 0)
  1538					runtime·notewakeup(&runtime·sched.stopnote);
  1539			}
  1540			runtime·unlock(&runtime·sched);
  1541			save(pc, sp);
  1542		}
  1543	
  1544		// Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
  1545		// We set stackguard to StackPreempt so that first split stack check calls morestack.
  1546		// Morestack detects this case and throws.
  1547		g->stackguard0 = StackPreempt;
  1548		m->locks--;
  1549	}
  1550	
  1551	#pragma textflag NOSPLIT
  1552	void
  1553	·entersyscall(int32 dummy)
  1554	{
  1555		runtime·reentersyscall(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1556	}
  1557	
  1558	// The same as runtime·entersyscall(), but with a hint that the syscall is blocking.
  1559	#pragma textflag NOSPLIT
  1560	void
  1561	·entersyscallblock(int32 dummy)
  1562	{
  1563		P *p;
  1564	
  1565		m->locks++;  // see comment in entersyscall
  1566	
  1567		// Leave SP around for GC and traceback.
  1568		save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1569		g->syscallsp = g->sched.sp;
  1570		g->syscallpc = g->sched.pc;
  1571		g->syscallstack = g->stackbase;
  1572		g->syscallguard = g->stackguard;
  1573		g->status = Gsyscall;
  1574		if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
  1575			// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
  1576			//	g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
  1577			runtime·throw("entersyscallblock");
  1578		}
  1579	
  1580		p = releasep();
  1581		handoffp(p);
  1582		if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
  1583			incidlelocked(1);
  1584	
  1585		// Resave for traceback during blocked call.
  1586		save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1587	
  1588		g->stackguard0 = StackPreempt;  // see comment in entersyscall
  1589		m->locks--;
  1590	}
  1591	
  1592	// The goroutine g exited its system call.
  1593	// Arrange for it to run on a cpu again.
  1594	// This is called only from the go syscall library, not
  1595	// from the low-level system calls used by the runtime.
  1596	#pragma textflag NOSPLIT
  1597	void
  1598	·exitsyscall(int32 dummy)
  1599	{
  1600		m->locks++;  // see comment in entersyscall
  1601	
  1602		if(runtime·getcallersp(&dummy) > g->syscallsp)
  1603			runtime·throw("exitsyscall: syscall frame is no longer valid");
  1604	
  1605		if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
  1606			incidlelocked(-1);
  1607	
  1608		g->waitsince = 0;
  1609		if(exitsyscallfast()) {
  1610			// There's a cpu for us, so we can run.
  1611			m->p->syscalltick++;
  1612			g->status = Grunning;
  1613			// Garbage collector isn't running (since we are),
  1614			// so okay to clear gcstack and gcsp.
  1615			g->syscallstack = (uintptr)nil;
  1616			g->syscallsp = (uintptr)nil;
  1617			m->locks--;
  1618			if(g->preempt) {
  1619				// restore the preemption request in case we've cleared it in newstack
  1620				g->stackguard0 = StackPreempt;
  1621			} else {
  1622				// otherwise restore the real stackguard, we've spoiled it in entersyscall/entersyscallblock
  1623				g->stackguard0 = g->stackguard;
  1624			}
  1625			return;
  1626		}
  1627	
  1628		m->locks--;
  1629	
  1630		// Call the scheduler.
  1631		runtime·mcall(exitsyscall0);
  1632	
  1633		// Scheduler returned, so we're allowed to run now.
  1634		// Delete the gcstack information that we left for
  1635		// the garbage collector during the system call.
  1636		// Must wait until now because until gosched returns
  1637		// we don't know for sure that the garbage collector
  1638		// is not running.
  1639		g->syscallstack = (uintptr)nil;
  1640		g->syscallsp = (uintptr)nil;
  1641		m->p->syscalltick++;
  1642	}
  1643	
  1644	#pragma textflag NOSPLIT
  1645	static bool
  1646	exitsyscallfast(void)
  1647	{
  1648		P *p;
  1649	
  1650		// Freezetheworld sets stopwait but does not retake P's.
  1651		if(runtime·sched.stopwait) {
  1652			m->p = nil;
  1653			return false;
  1654		}
  1655	
  1656		// Try to re-acquire the last P.
  1657		if(m->p && m->p->status == Psyscall && runtime·cas(&m->p->status, Psyscall, Prunning)) {
  1658			// There's a cpu for us, so we can run.
  1659			m->mcache = m->p->mcache;
  1660			m->p->m = m;
  1661			return true;
  1662		}
  1663		// Try to get any other idle P.
  1664		m->p = nil;
  1665		if(runtime·sched.pidle) {
  1666			runtime·lock(&runtime·sched);
  1667			p = pidleget();
  1668			if(p && runtime·atomicload(&runtime·sched.sysmonwait)) {
  1669				runtime·atomicstore(&runtime·sched.sysmonwait, 0);
  1670				runtime·notewakeup(&runtime·sched.sysmonnote);
  1671			}
  1672			runtime·unlock(&runtime·sched);
  1673			if(p) {
  1674				acquirep(p);
  1675				return true;
  1676			}
  1677		}
  1678		return false;
  1679	}
  1680	
  1681	// runtime·exitsyscall slow path on g0.
  1682	// Failed to acquire P, enqueue gp as runnable.
  1683	static void
  1684	exitsyscall0(G *gp)
  1685	{
  1686		P *p;
  1687	
  1688		gp->status = Grunnable;
  1689		gp->m = nil;
  1690		m->curg = nil;
  1691		runtime·lock(&runtime·sched);
  1692		p = pidleget();
  1693		if(p == nil)
  1694			globrunqput(gp);
  1695		else if(runtime·atomicload(&runtime·sched.sysmonwait)) {
  1696			runtime·atomicstore(&runtime·sched.sysmonwait, 0);
  1697			runtime·notewakeup(&runtime·sched.sysmonnote);
  1698		}
  1699		runtime·unlock(&runtime·sched);
  1700		if(p) {
  1701			acquirep(p);
  1702			execute(gp);  // Never returns.
  1703		}
  1704		if(m->lockedg) {
  1705			// Wait until another thread schedules gp and so m again.
  1706			stoplockedm();
  1707			execute(gp);  // Never returns.
  1708		}
  1709		stopm();
  1710		schedule();  // Never returns.
  1711	}
  1712	
  1713	// Called from syscall package before fork.
  1714	#pragma textflag NOSPLIT
  1715	void
  1716	syscall·runtime_BeforeFork(void)
  1717	{
  1718		// Fork can hang if preempted with signals frequently enough (see issue 5517).
  1719		// Ensure that we stay on the same M where we disable profiling.
  1720		m->locks++;
  1721		if(m->profilehz != 0)
  1722			runtime·resetcpuprofiler(0);
  1723	
  1724		// This function is called before fork in syscall package.
  1725		// Code between fork and exec must not allocate memory nor even try to grow stack.
  1726		// Here we spoil g->stackguard to reliably detect any attempts to grow stack.
  1727		// runtime_AfterFork will undo this in parent process, but not in child.
  1728		m->forkstackguard = g->stackguard;
  1729		g->stackguard0 = StackPreempt-1;
  1730		g->stackguard = StackPreempt-1;
  1731	}
  1732	
  1733	// Called from syscall package after fork in parent.
  1734	#pragma textflag NOSPLIT
  1735	void
  1736	syscall·runtime_AfterFork(void)
  1737	{
  1738		int32 hz;
  1739	
  1740		// See the comment in runtime_BeforeFork.
  1741		g->stackguard0 = m->forkstackguard;
  1742		g->stackguard = m->forkstackguard;
  1743		m->forkstackguard = 0;
  1744	
  1745		hz = runtime·sched.profilehz;
  1746		if(hz != 0)
  1747			runtime·resetcpuprofiler(hz);
  1748		m->locks--;
  1749	}
  1750	
  1751	// Hook used by runtime·malg to call runtime·stackalloc on the
  1752	// scheduler stack.  This exists because runtime·stackalloc insists
  1753	// on being called on the scheduler stack, to avoid trying to grow
  1754	// the stack while allocating a new stack segment.
  1755	static void
  1756	mstackalloc(G *gp)
  1757	{
  1758		G *newg;
  1759		uintptr size;
  1760	
  1761		newg = (G*)gp->param;
  1762		size = newg->stacksize;
  1763		newg->stacksize = 0;
  1764		gp->param = runtime·stackalloc(newg, size);
  1765		runtime·gogo(&gp->sched);
  1766	}
  1767	
  1768	// Allocate a new g, with a stack big enough for stacksize bytes.
  1769	G*
  1770	runtime·malg(int32 stacksize)
  1771	{
  1772		G *newg;
  1773		byte *stk;
  1774	
  1775		if(StackTop < sizeof(Stktop)) {
  1776			runtime·printf("runtime: SizeofStktop=%d, should be >=%d\n", (int32)StackTop, (int32)sizeof(Stktop));
  1777			runtime·throw("runtime: bad stack.h");
  1778		}
  1779	
  1780		newg = allocg();
  1781		if(stacksize >= 0) {
  1782			stacksize = runtime·round2(StackSystem + stacksize);
  1783			if(g == m->g0) {
  1784				// running on scheduler stack already.
  1785				stk = runtime·stackalloc(newg, stacksize);
  1786			} else {
  1787				// have to call stackalloc on scheduler stack.
  1788				newg->stacksize = stacksize;
  1789				g->param = newg;
  1790				runtime·mcall(mstackalloc);
  1791				stk = g->param;
  1792				g->param = nil;
  1793			}
  1794			newg->stack0 = (uintptr)stk;
  1795			newg->stackguard = (uintptr)stk + StackGuard;
  1796			newg->stackguard0 = newg->stackguard;
  1797			newg->stackbase = (uintptr)stk + stacksize - sizeof(Stktop);
  1798		}
  1799		return newg;
  1800	}
  1801	
  1802	// Create a new g running fn with siz bytes of arguments.
  1803	// Put it on the queue of g's waiting to run.
  1804	// The compiler turns a go statement into a call to this.
  1805	// Cannot split the stack because it assumes that the arguments
  1806	// are available sequentially after &fn; they would not be
  1807	// copied if a stack split occurred.  It's OK for this to call
  1808	// functions that split the stack.
  1809	#pragma textflag NOSPLIT
  1810	void
  1811	runtime·newproc(int32 siz, FuncVal* fn, ...)
  1812	{
  1813		byte *argp;
  1814	
  1815		if(thechar == '5')
  1816			argp = (byte*)(&fn+2);  // skip caller's saved LR
  1817		else
  1818			argp = (byte*)(&fn+1);
  1819		runtime·newproc1(fn, argp, siz, 0, runtime·getcallerpc(&siz));
  1820	}
  1821	
  1822	// Create a new g running fn with narg bytes of arguments starting
  1823	// at argp and returning nret bytes of results.  callerpc is the
  1824	// address of the go statement that created this.  The new g is put
  1825	// on the queue of g's waiting to run.
  1826	G*
  1827	runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
  1828	{
  1829		byte *sp;
  1830		G *newg;
  1831		P *p;
  1832		int32 siz;
  1833	
  1834	//runtime·printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
  1835		if(fn == nil) {
  1836			m->throwing = -1;  // do not dump full stacks
  1837			runtime·throw("go of nil func value");
  1838		}
  1839		m->locks++;  // disable preemption because it can be holding p in a local var
  1840		siz = narg + nret;
  1841		siz = (siz+7) & ~7;
  1842	
  1843		// We could instead create a secondary stack frame
  1844		// and make it look like goexit was on the original but
  1845		// the call to the actual goroutine function was split.
  1846		// Not worth it: this is almost always an error.
  1847		if(siz > StackMin - 1024)
  1848			runtime·throw("runtime.newproc: function arguments too large for new goroutine");
  1849	
  1850		p = m->p;
  1851		if((newg = gfget(p)) != nil) {
  1852			if(newg->stackguard - StackGuard != newg->stack0)
  1853				runtime·throw("invalid stack in newg");
  1854		} else {
  1855			newg = runtime·malg(StackMin);
  1856			allgadd(newg);
  1857		}
  1858	
  1859		sp = (byte*)newg->stackbase;
  1860		sp -= siz;
  1861		runtime·memmove(sp, argp, narg);
  1862		if(thechar == '5') {
  1863			// caller's LR
  1864			sp -= sizeof(void*);
  1865			*(void**)sp = nil;
  1866		}
  1867	
  1868		runtime·memclr((byte*)&newg->sched, sizeof newg->sched);
  1869		newg->sched.sp = (uintptr)sp;
  1870		newg->sched.pc = (uintptr)runtime·goexit;
  1871		newg->sched.g = newg;
  1872		runtime·gostartcallfn(&newg->sched, fn);
  1873		newg->gopc = (uintptr)callerpc;
  1874		newg->status = Grunnable;
  1875		if(p->goidcache == p->goidcacheend) {
  1876			p->goidcache = runtime·xadd64(&runtime·sched.goidgen, GoidCacheBatch);
  1877			p->goidcacheend = p->goidcache + GoidCacheBatch;
  1878		}
  1879		newg->goid = p->goidcache++;
  1880		newg->panicwrap = 0;
  1881		if(raceenabled)
  1882			newg->racectx = runtime·racegostart((void*)callerpc);
  1883		runqput(p, newg);
  1884	
  1885		if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main)  // TODO: fast atomic
  1886			wakep();
  1887		m->locks--;
  1888		if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
  1889			g->stackguard0 = StackPreempt;
  1890		return newg;
  1891	}
  1892	
  1893	static void
  1894	allgadd(G *gp)
  1895	{
  1896		G **new;
  1897		uintptr cap;
  1898	
  1899		runtime·lock(&allglock);
  1900		if(runtime·allglen >= allgcap) {
  1901			cap = 4096/sizeof(new[0]);
  1902			if(cap < 2*allgcap)
  1903				cap = 2*allgcap;
  1904			new = runtime·malloc(cap*sizeof(new[0]));
  1905			if(new == nil)
  1906				runtime·throw("runtime: cannot allocate memory");
  1907			if(runtime·allg != nil) {
  1908				runtime·memmove(new, runtime·allg, runtime·allglen*sizeof(new[0]));
  1909				runtime·free(runtime·allg);
  1910			}
  1911			runtime·allg = new;
  1912			allgcap = cap;
  1913		}
  1914		runtime·allg[runtime·allglen++] = gp;
  1915		runtime·unlock(&allglock);
  1916	}
  1917	
  1918	// Put on gfree list.
  1919	// If local list is too long, transfer a batch to the global list.
  1920	static void
  1921	gfput(P *p, G *gp)
  1922	{
  1923		uintptr stksize;
  1924		Stktop *top;
  1925	
  1926		if(gp->stackguard - StackGuard != gp->stack0)
  1927			runtime·throw("invalid stack in gfput");
  1928		stksize = gp->stackbase + sizeof(Stktop) - gp->stack0;
  1929		if(stksize != gp->stacksize) {
  1930			runtime·printf("runtime: bad stacksize, goroutine %D, remain=%d, last=%d\n",
  1931				gp->goid, (int32)gp->stacksize, (int32)stksize);
  1932			runtime·throw("gfput: bad stacksize");
  1933		}
  1934		top = (Stktop*)gp->stackbase;
  1935		if(top->malloced) {
  1936			// non-standard stack size - free it.
  1937			runtime·stackfree(gp, (void*)gp->stack0, top);
  1938			gp->stack0 = 0;
  1939			gp->stackguard = 0;
  1940			gp->stackguard0 = 0;
  1941			gp->stackbase = 0;
  1942		}
  1943		gp->schedlink = p->gfree;
  1944		p->gfree = gp;
  1945		p->gfreecnt++;
  1946		if(p->gfreecnt >= 64) {
  1947			runtime·lock(&runtime·sched.gflock);
  1948			while(p->gfreecnt >= 32) {
  1949				p->gfreecnt--;
  1950				gp = p->gfree;
  1951				p->gfree = gp->schedlink;
  1952				gp->schedlink = runtime·sched.gfree;
  1953				runtime·sched.gfree = gp;
  1954			}
  1955			runtime·unlock(&runtime·sched.gflock);
  1956		}
  1957	}
  1958	
  1959	// Get from gfree list.
  1960	// If local list is empty, grab a batch from global list.
  1961	static G*
  1962	gfget(P *p)
  1963	{
  1964		G *gp;
  1965		byte *stk;
  1966	
  1967	retry:
  1968		gp = p->gfree;
  1969		if(gp == nil && runtime·sched.gfree) {
  1970			runtime·lock(&runtime·sched.gflock);
  1971			while(p->gfreecnt < 32 && runtime·sched.gfree) {
  1972				p->gfreecnt++;
  1973				gp = runtime·sched.gfree;
  1974				runtime·sched.gfree = gp->schedlink;
  1975				gp->schedlink = p->gfree;
  1976				p->gfree = gp;
  1977			}
  1978			runtime·unlock(&runtime·sched.gflock);
  1979			goto retry;
  1980		}
  1981		if(gp) {
  1982			p->gfree = gp->schedlink;
  1983			p->gfreecnt--;
  1984	
  1985			if(gp->stack0 == 0) {
  1986				// Stack was deallocated in gfput.  Allocate a new one.
  1987				if(g == m->g0) {
  1988					stk = runtime·stackalloc(gp, FixedStack);
  1989				} else {
  1990					gp->stacksize = FixedStack;
  1991					g->param = gp;
  1992					runtime·mcall(mstackalloc);
  1993					stk = g->param;
  1994					g->param = nil;
  1995				}
  1996				gp->stack0 = (uintptr)stk;
  1997				gp->stackbase = (uintptr)stk + FixedStack - sizeof(Stktop);
  1998				gp->stackguard = (uintptr)stk + StackGuard;
  1999				gp->stackguard0 = gp->stackguard;
  2000			}
  2001		}
  2002		return gp;
  2003	}
  2004	
  2005	// Purge all cached G's from gfree list to the global list.
  2006	static void
  2007	gfpurge(P *p)
  2008	{
  2009		G *gp;
  2010	
  2011		runtime·lock(&runtime·sched.gflock);
  2012		while(p->gfreecnt) {
  2013			p->gfreecnt--;
  2014			gp = p->gfree;
  2015			p->gfree = gp->schedlink;
  2016			gp->schedlink = runtime·sched.gfree;
  2017			runtime·sched.gfree = gp;
  2018		}
  2019		runtime·unlock(&runtime·sched.gflock);
  2020	}
  2021	
  2022	void
  2023	runtime·Breakpoint(void)
  2024	{
  2025		runtime·breakpoint();
  2026	}
  2027	
  2028	void
  2029	runtime·Gosched(void)
  2030	{
  2031		runtime·gosched();
  2032	}
  2033	
  2034	// Implementation of runtime.GOMAXPROCS.
  2035	// delete when scheduler is even stronger
  2036	int32
  2037	runtime·gomaxprocsfunc(int32 n)
  2038	{
  2039		int32 ret;
  2040	
  2041		if(n > MaxGomaxprocs)
  2042			n = MaxGomaxprocs;
  2043		runtime·lock(&runtime·sched);
  2044		ret = runtime·gomaxprocs;
  2045		if(n <= 0 || n == ret) {
  2046			runtime·unlock(&runtime·sched);
  2047			return ret;
  2048		}
  2049		runtime·unlock(&runtime·sched);
  2050	
  2051		runtime·semacquire(&runtime·worldsema, false);
  2052		m->gcing = 1;
  2053		runtime·stoptheworld();
  2054		newprocs = n;
  2055		m->gcing = 0;
  2056		runtime·semrelease(&runtime·worldsema);
  2057		runtime·starttheworld();
  2058	
  2059		return ret;
  2060	}
  2061	
  2062	// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
  2063	// after they modify m->locked. Do not allow preemption during this call,
  2064	// or else the m might be different in this function than in the caller.
  2065	#pragma textflag NOSPLIT
  2066	static void
  2067	lockOSThread(void)
  2068	{
  2069		m->lockedg = g;
  2070		g->lockedm = m;
  2071	}
  2072	
  2073	void
  2074	runtime·LockOSThread(void)
  2075	{
  2076		m->locked |= LockExternal;
  2077		lockOSThread();
  2078	}
  2079	
  2080	void
  2081	runtime·lockOSThread(void)
  2082	{
  2083		m->locked += LockInternal;
  2084		lockOSThread();
  2085	}
  2086	
  2087	
  2088	// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
  2089	// after they update m->locked. Do not allow preemption during this call,
  2090	// or else the m might be in different in this function than in the caller.
  2091	#pragma textflag NOSPLIT
  2092	static void
  2093	unlockOSThread(void)
  2094	{
  2095		if(m->locked != 0)
  2096			return;
  2097		m->lockedg = nil;
  2098		g->lockedm = nil;
  2099	}
  2100	
  2101	void
  2102	runtime·UnlockOSThread(void)
  2103	{
  2104		m->locked &= ~LockExternal;
  2105		unlockOSThread();
  2106	}
  2107	
  2108	void
  2109	runtime·unlockOSThread(void)
  2110	{
  2111		if(m->locked < LockInternal)
  2112			runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
  2113		m->locked -= LockInternal;
  2114		unlockOSThread();
  2115	}
  2116	
  2117	bool
  2118	runtime·lockedOSThread(void)
  2119	{
  2120		return g->lockedm != nil && m->lockedg != nil;
  2121	}
  2122	
  2123	int32
  2124	runtime·gcount(void)
  2125	{
  2126		G *gp;
  2127		int32 n, s;
  2128		uintptr i;
  2129	
  2130		n = 0;
  2131		runtime·lock(&allglock);
  2132		// TODO(dvyukov): runtime.NumGoroutine() is O(N).
  2133		// We do not want to increment/decrement centralized counter in newproc/goexit,
  2134		// just to make runtime.NumGoroutine() faster.
  2135		// Compromise solution is to introduce per-P counters of active goroutines.
  2136		for(i = 0; i < runtime·allglen; i++) {
  2137			gp = runtime·allg[i];
  2138			s = gp->status;
  2139			if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
  2140				n++;
  2141		}
  2142		runtime·unlock(&allglock);
  2143		return n;
  2144	}
  2145	
  2146	int32
  2147	runtime·mcount(void)
  2148	{
  2149		return runtime·sched.mcount;
  2150	}
  2151	
  2152	void
  2153	runtime·badmcall(void (*fn)(G*))  // called from assembly
  2154	{
  2155		USED(fn); // TODO: print fn?
  2156		runtime·throw("runtime: mcall called on m->g0 stack");
  2157	}
  2158	
  2159	void
  2160	runtime·badmcall2(void (*fn)(G*))  // called from assembly
  2161	{
  2162		USED(fn);
  2163		runtime·throw("runtime: mcall function returned");
  2164	}
  2165	
  2166	void
  2167	runtime·badreflectcall(void) // called from assembly
  2168	{
  2169		runtime·panicstring("runtime: arg size to reflect.call more than 1GB");
  2170	}
  2171	
  2172	static struct {
  2173		Lock;
  2174		void (*fn)(uintptr*, int32);
  2175		int32 hz;
  2176		uintptr pcbuf[100];
  2177	} prof;
  2178	
  2179	static void System(void) {}
  2180	static void ExternalCode(void) {}
  2181	static void GC(void) {}
  2182	extern byte etext[];
  2183	
  2184	// Called if we receive a SIGPROF signal.
  2185	void
  2186	runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp)
  2187	{
  2188		int32 n;
  2189		bool traceback;
  2190		// Do not use global m in this function, use mp instead.
  2191		// On windows one m is sending reports about all the g's, so m means a wrong thing.
  2192		byte m;
  2193	
  2194		m = 0;
  2195		USED(m);
  2196	
  2197		if(prof.fn == nil || prof.hz == 0)
  2198			return;
  2199	
  2200		// Profiling runs concurrently with GC, so it must not allocate.
  2201		mp->mallocing++;
  2202	
  2203		// Define that a "user g" is a user-created goroutine, and a "system g"
  2204		// is one that is m->g0 or m->gsignal. We've only made sure that we
  2205		// can unwind user g's, so exclude the system g's.
  2206		//
  2207		// It is not quite as easy as testing gp == m->curg (the current user g)
  2208		// because we might be interrupted for profiling halfway through a
  2209		// goroutine switch. The switch involves updating three (or four) values:
  2210		// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
  2211		// because once it gets updated the new g is running.
  2212		//
  2213		// When switching from a user g to a system g, LR is not considered live,
  2214		// so the update only affects g, SP, and PC. Since PC must be last, there
  2215		// the possible partial transitions in ordinary execution are (1) g alone is updated,
  2216		// (2) both g and SP are updated, and (3) SP alone is updated.
  2217		// If g is updated, we'll see a system g and not look closer.
  2218		// If SP alone is updated, we can detect the partial transition by checking
  2219		// whether the SP is within g's stack bounds. (We could also require that SP
  2220		// be changed only after g, but the stack bounds check is needed by other
  2221		// cases, so there is no need to impose an additional requirement.)
  2222		//
  2223		// There is one exceptional transition to a system g, not in ordinary execution.
  2224		// When a signal arrives, the operating system starts the signal handler running
  2225		// with an updated PC and SP. The g is updated last, at the beginning of the
  2226		// handler. There are two reasons this is okay. First, until g is updated the
  2227		// g and SP do not match, so the stack bounds check detects the partial transition.
  2228		// Second, signal handlers currently run with signals disabled, so a profiling
  2229		// signal cannot arrive during the handler.
  2230		//
  2231		// When switching from a system g to a user g, there are three possibilities.
  2232		//
  2233		// First, it may be that the g switch has no PC update, because the SP
  2234		// either corresponds to a user g throughout (as in runtime.asmcgocall)
  2235		// or because it has been arranged to look like a user g frame
  2236		// (as in runtime.cgocallback_gofunc). In this case, since the entire
  2237		// transition is a g+SP update, a partial transition updating just one of 
  2238		// those will be detected by the stack bounds check.
  2239		//
  2240		// Second, when returning from a signal handler, the PC and SP updates
  2241		// are performed by the operating system in an atomic update, so the g
  2242		// update must be done before them. The stack bounds check detects
  2243		// the partial transition here, and (again) signal handlers run with signals
  2244		// disabled, so a profiling signal cannot arrive then anyway.
  2245		//
  2246		// Third, the common case: it may be that the switch updates g, SP, and PC
  2247		// separately, as in runtime.gogo.
  2248		//
  2249		// Because runtime.gogo is the only instance, we check whether the PC lies
  2250		// within that function, and if so, not ask for a traceback. This approach
  2251		// requires knowing the size of the runtime.gogo function, which we
  2252		// record in arch_*.h and check in runtime_test.go.
  2253		//
  2254		// There is another apparently viable approach, recorded here in case
  2255		// the "PC within runtime.gogo" check turns out not to be usable.
  2256		// It would be possible to delay the update of either g or SP until immediately
  2257		// before the PC update instruction. Then, because of the stack bounds check,
  2258		// the only problematic interrupt point is just before that PC update instruction,
  2259		// and the sigprof handler can detect that instruction and simulate stepping past
  2260		// it in order to reach a consistent state. On ARM, the update of g must be made
  2261		// in two places (in R10 and also in a TLS slot), so the delayed update would
  2262		// need to be the SP update. The sigprof handler must read the instruction at
  2263		// the current PC and if it was the known instruction (for example, JMP BX or 
  2264		// MOV R2, PC), use that other register in place of the PC value.
  2265		// The biggest drawback to this solution is that it requires that we can tell
  2266		// whether it's safe to read from the memory pointed at by PC.
  2267		// In a correct program, we can test PC == nil and otherwise read,
  2268		// but if a profiling signal happens at the instant that a program executes
  2269		// a bad jump (before the program manages to handle the resulting fault)
  2270		// the profiling handler could fault trying to read nonexistent memory.
  2271		//
  2272		// To recap, there are no constraints on the assembly being used for the
  2273		// transition. We simply require that g and SP match and that the PC is not
  2274		// in runtime.gogo.
  2275		traceback = true;
  2276		if(gp == nil || gp != mp->curg ||
  2277		   (uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr)sp ||
  2278		   ((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGogoBytes))
  2279			traceback = false;
  2280	
  2281		runtime·lock(&prof);
  2282		if(prof.fn == nil) {
  2283			runtime·unlock(&prof);
  2284			mp->mallocing--;
  2285			return;
  2286		}
  2287		n = 0;
  2288		if(traceback)
  2289			n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
  2290		if(!traceback || n <= 0) {
  2291			// Normal traceback is impossible or has failed.
  2292			// See if it falls into several common cases.
  2293			n = 0;
  2294			if(mp->ncgo > 0 && mp->curg != nil &&
  2295				mp->curg->syscallpc != 0 && mp->curg->syscallsp != 0) {
  2296				// Cgo, we can't unwind and symbolize arbitrary C code,
  2297				// so instead collect Go stack that leads to the cgo call.
  2298				// This is especially important on windows, since all syscalls are cgo calls.
  2299				n = runtime·gentraceback(mp->curg->syscallpc, mp->curg->syscallsp, 0, mp->curg, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
  2300			}
  2301	#ifdef GOOS_windows
  2302			if(n == 0 && mp->libcallg != nil && mp->libcallpc != 0 && mp->libcallsp != 0) {
  2303				// Libcall, i.e. runtime syscall on windows.
  2304				// Collect Go stack that leads to the call.
  2305				n = runtime·gentraceback(mp->libcallpc, mp->libcallsp, 0, mp->libcallg, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
  2306			}
  2307	#endif
  2308			if(n == 0) {
  2309				// If all of the above has failed, account it against abstract "System" or "GC".
  2310				n = 2;
  2311				// "ExternalCode" is better than "etext".
  2312				if((uintptr)pc > (uintptr)etext)
  2313					pc = (byte*)ExternalCode + PCQuantum;
  2314				prof.pcbuf[0] = (uintptr)pc;
  2315				if(mp->gcing || mp->helpgc)
  2316					prof.pcbuf[1] = (uintptr)GC + PCQuantum;
  2317				else
  2318					prof.pcbuf[1] = (uintptr)System + PCQuantum;
  2319			}
  2320		}
  2321		prof.fn(prof.pcbuf, n);
  2322		runtime·unlock(&prof);
  2323		mp->mallocing--;
  2324	}
  2325	
  2326	// Arrange to call fn with a traceback hz times a second.
  2327	void
  2328	runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
  2329	{
  2330		// Force sane arguments.
  2331		if(hz < 0)
  2332			hz = 0;
  2333		if(hz == 0)
  2334			fn = nil;
  2335		if(fn == nil)
  2336			hz = 0;
  2337	
  2338		// Disable preemption, otherwise we can be rescheduled to another thread
  2339		// that has profiling enabled.
  2340		m->locks++;
  2341	
  2342		// Stop profiler on this thread so that it is safe to lock prof.
  2343		// if a profiling signal came in while we had prof locked,
  2344		// it would deadlock.
  2345		runtime·resetcpuprofiler(0);
  2346	
  2347		runtime·lock(&prof);
  2348		prof.fn = fn;
  2349		prof.hz = hz;
  2350		runtime·unlock(&prof);
  2351		runtime·lock(&runtime·sched);
  2352		runtime·sched.profilehz = hz;
  2353		runtime·unlock(&runtime·sched);
  2354	
  2355		if(hz != 0)
  2356			runtime·resetcpuprofiler(hz);
  2357	
  2358		m->locks--;
  2359	}
  2360	
  2361	// Change number of processors.  The world is stopped, sched is locked.
  2362	static void
  2363	procresize(int32 new)
  2364	{
  2365		int32 i, old;
  2366		bool empty;
  2367		G *gp;
  2368		P *p;
  2369	
  2370		old = runtime·gomaxprocs;
  2371		if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
  2372			runtime·throw("procresize: invalid arg");
  2373		// initialize new P's
  2374		for(i = 0; i < new; i++) {
  2375			p = runtime·allp[i];
  2376			if(p == nil) {
  2377				p = (P*)runtime·mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
  2378				p->id = i;
  2379				p->status = Pgcstop;
  2380				runtime·atomicstorep(&runtime·allp[i], p);
  2381			}
  2382			if(p->mcache == nil) {
  2383				if(old==0 && i==0)
  2384					p->mcache = m->mcache;  // bootstrap
  2385				else
  2386					p->mcache = runtime·allocmcache();
  2387			}
  2388		}
  2389	
  2390		// redistribute runnable G's evenly
  2391		// collect all runnable goroutines in global queue preserving FIFO order
  2392		// FIFO order is required to ensure fairness even during frequent GCs
  2393		// see http://golang.org/issue/7126
  2394		empty = false;
  2395		while(!empty) {
  2396			empty = true;
  2397			for(i = 0; i < old; i++) {
  2398				p = runtime·allp[i];
  2399				if(p->runqhead == p->runqtail)
  2400					continue;
  2401				empty = false;
  2402				// pop from tail of local queue
  2403				p->runqtail--;
  2404				gp = p->runq[p->runqtail%nelem(p->runq)];
  2405				// push onto head of global queue
  2406				gp->schedlink = runtime·sched.runqhead;
  2407				runtime·sched.runqhead = gp;
  2408				if(runtime·sched.runqtail == nil)
  2409					runtime·sched.runqtail = gp;
  2410				runtime·sched.runqsize++;
  2411			}
  2412		}
  2413		// fill local queues with at most nelem(p->runq)/2 goroutines
  2414		// start at 1 because current M already executes some G and will acquire allp[0] below,
  2415		// so if we have a spare G we want to put it into allp[1].
  2416		for(i = 1; i < new * nelem(p->runq)/2 && runtime·sched.runqsize > 0; i++) {
  2417			gp = runtime·sched.runqhead;
  2418			runtime·sched.runqhead = gp->schedlink;
  2419			if(runtime·sched.runqhead == nil)
  2420				runtime·sched.runqtail = nil;
  2421			runtime·sched.runqsize--;
  2422			runqput(runtime·allp[i%new], gp);
  2423		}
  2424	
  2425		// free unused P's
  2426		for(i = new; i < old; i++) {
  2427			p = runtime·allp[i];
  2428			runtime·freemcache(p->mcache);
  2429			p->mcache = nil;
  2430			gfpurge(p);
  2431			p->status = Pdead;
  2432			// can't free P itself because it can be referenced by an M in syscall
  2433		}
  2434	
  2435		if(m->p)
  2436			m->p->m = nil;
  2437		m->p = nil;
  2438		m->mcache = nil;
  2439		p = runtime·allp[0];
  2440		p->m = nil;
  2441		p->status = Pidle;
  2442		acquirep(p);
  2443		for(i = new-1; i > 0; i--) {
  2444			p = runtime·allp[i];
  2445			p->status = Pidle;
  2446			pidleput(p);
  2447		}
  2448		runtime·atomicstore((uint32*)&runtime·gomaxprocs, new);
  2449	}
  2450	
  2451	// Associate p and the current m.
  2452	static void
  2453	acquirep(P *p)
  2454	{
  2455		if(m->p || m->mcache)
  2456			runtime·throw("acquirep: already in go");
  2457		if(p->m || p->status != Pidle) {
  2458			runtime·printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
  2459			runtime·throw("acquirep: invalid p state");
  2460		}
  2461		m->mcache = p->mcache;
  2462		m->p = p;
  2463		p->m = m;
  2464		p->status = Prunning;
  2465	}
  2466	
  2467	// Disassociate p and the current m.
  2468	static P*
  2469	releasep(void)
  2470	{
  2471		P *p;
  2472	
  2473		if(m->p == nil || m->mcache == nil)
  2474			runtime·throw("releasep: invalid arg");
  2475		p = m->p;
  2476		if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
  2477			runtime·printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
  2478				m, m->p, p->m, m->mcache, p->mcache, p->status);
  2479			runtime·throw("releasep: invalid p state");
  2480		}
  2481		m->p = nil;
  2482		m->mcache = nil;
  2483		p->m = nil;
  2484		p->status = Pidle;
  2485		return p;
  2486	}
  2487	
  2488	static void
  2489	incidlelocked(int32 v)
  2490	{
  2491		runtime·lock(&runtime·sched);
  2492		runtime·sched.nmidlelocked += v;
  2493		if(v > 0)
  2494			checkdead();
  2495		runtime·unlock(&runtime·sched);
  2496	}
  2497	
  2498	// Check for deadlock situation.
  2499	// The check is based on number of running M's, if 0 -> deadlock.
  2500	static void
  2501	checkdead(void)
  2502	{
  2503		G *gp;
  2504		int32 run, grunning, s;
  2505		uintptr i;
  2506	
  2507		// -1 for sysmon
  2508		run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidlelocked - 1;
  2509		if(run > 0)
  2510			return;
  2511		// If we are dying because of a signal caught on an already idle thread,
  2512		// freezetheworld will cause all running threads to block.
  2513		// And runtime will essentially enter into deadlock state,
  2514		// except that there is a thread that will call runtime·exit soon.
  2515		if(runtime·panicking > 0)
  2516			return;
  2517		if(run < 0) {
  2518			runtime·printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
  2519				runtime·sched.nmidle, runtime·sched.nmidlelocked, runtime·sched.mcount);
  2520			runtime·throw("checkdead: inconsistent counts");
  2521		}
  2522		grunning = 0;
  2523		runtime·lock(&allglock);
  2524		for(i = 0; i < runtime·allglen; i++) {
  2525			gp = runtime·allg[i];
  2526			if(gp->isbackground)
  2527				continue;
  2528			s = gp->status;
  2529			if(s == Gwaiting)
  2530				grunning++;
  2531			else if(s == Grunnable || s == Grunning || s == Gsyscall) {
  2532				runtime·unlock(&allglock);
  2533				runtime·printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
  2534				runtime·throw("checkdead: runnable g");
  2535			}
  2536		}
  2537		runtime·unlock(&allglock);
  2538		if(grunning == 0)  // possible if main goroutine calls runtime·Goexit()
  2539			runtime·throw("no goroutines (main called runtime.Goexit) - deadlock!");
  2540		m->throwing = -1;  // do not dump full stacks
  2541		runtime·throw("all goroutines are asleep - deadlock!");
  2542	}
  2543	
  2544	static void
  2545	sysmon(void)
  2546	{
  2547		uint32 idle, delay;
  2548		int64 now, lastpoll, lasttrace;
  2549		G *gp;
  2550	
  2551		lasttrace = 0;
  2552		idle = 0;  // how many cycles in succession we had not wokeup somebody
  2553		delay = 0;
  2554		for(;;) {
  2555			if(idle == 0)  // start with 20us sleep...
  2556				delay = 20;
  2557			else if(idle > 50)  // start doubling the sleep after 1ms...
  2558				delay *= 2;
  2559			if(delay > 10*1000)  // up to 10ms
  2560				delay = 10*1000;
  2561			runtime·usleep(delay);
  2562			if(runtime·debug.schedtrace <= 0 &&
  2563				(runtime·sched.gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs)) {  // TODO: fast atomic
  2564				runtime·lock(&runtime·sched);
  2565				if(runtime·atomicload(&runtime·sched.gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {
  2566					runtime·atomicstore(&runtime·sched.sysmonwait, 1);
  2567					runtime·unlock(&runtime·sched);
  2568					runtime·notesleep(&runtime·sched.sysmonnote);
  2569					runtime·noteclear(&runtime·sched.sysmonnote);
  2570					idle = 0;
  2571					delay = 20;
  2572				} else
  2573					runtime·unlock(&runtime·sched);
  2574			}
  2575			// poll network if not polled for more than 10ms
  2576			lastpoll = runtime·atomicload64(&runtime·sched.lastpoll);
  2577			now = runtime·nanotime();
  2578			if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
  2579				runtime·cas64(&runtime·sched.lastpoll, lastpoll, now);
  2580				gp = runtime·netpoll(false);  // non-blocking
  2581				if(gp) {
  2582					// Need to decrement number of idle locked M's
  2583					// (pretending that one more is running) before injectglist.
  2584					// Otherwise it can lead to the following situation:
  2585					// injectglist grabs all P's but before it starts M's to run the P's,
  2586					// another M returns from syscall, finishes running its G,
  2587					// observes that there is no work to do and no other running M's
  2588					// and reports deadlock.
  2589					incidlelocked(-1);
  2590					injectglist(gp);
  2591					incidlelocked(1);
  2592				}
  2593			}
  2594			// retake P's blocked in syscalls
  2595			// and preempt long running G's
  2596			if(retake(now))
  2597				idle = 0;
  2598			else
  2599				idle++;
  2600	
  2601			if(runtime·debug.schedtrace > 0 && lasttrace + runtime·debug.schedtrace*1000000ll <= now) {
  2602				lasttrace = now;
  2603				runtime·schedtrace(runtime·debug.scheddetail);
  2604			}
  2605		}
  2606	}
  2607	
  2608	typedef struct Pdesc Pdesc;
  2609	struct Pdesc
  2610	{
  2611		uint32	schedtick;
  2612		int64	schedwhen;
  2613		uint32	syscalltick;
  2614		int64	syscallwhen;
  2615	};
  2616	#pragma dataflag NOPTR
  2617	static Pdesc pdesc[MaxGomaxprocs];
  2618	
  2619	static uint32
  2620	retake(int64 now)
  2621	{
  2622		uint32 i, s, n;
  2623		int64 t;
  2624		P *p;
  2625		Pdesc *pd;
  2626	
  2627		n = 0;
  2628		for(i = 0; i < runtime·gomaxprocs; i++) {
  2629			p = runtime·allp[i];
  2630			if(p==nil)
  2631				continue;
  2632			pd = &pdesc[i];
  2633			s = p->status;
  2634			if(s == Psyscall) {
  2635				// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
  2636				t = p->syscalltick;
  2637				if(pd->syscalltick != t) {
  2638					pd->syscalltick = t;
  2639					pd->syscallwhen = now;
  2640					continue;
  2641				}
  2642				// On the one hand we don't want to retake Ps if there is no other work to do,
  2643				// but on the other hand we want to retake them eventually
  2644				// because they can prevent the sysmon thread from deep sleep.
  2645				if(p->runqhead == p->runqtail &&
  2646					runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0 &&
  2647					pd->syscallwhen + 10*1000*1000 > now)
  2648					continue;
  2649				// Need to decrement number of idle locked M's
  2650				// (pretending that one more is running) before the CAS.
  2651				// Otherwise the M from which we retake can exit the syscall,
  2652				// increment nmidle and report deadlock.
  2653				incidlelocked(-1);
  2654				if(runtime·cas(&p->status, s, Pidle)) {
  2655					n++;
  2656					handoffp(p);
  2657				}
  2658				incidlelocked(1);
  2659			} else if(s == Prunning) {
  2660				// Preempt G if it's running for more than 10ms.
  2661				t = p->schedtick;
  2662				if(pd->schedtick != t) {
  2663					pd->schedtick = t;
  2664					pd->schedwhen = now;
  2665					continue;
  2666				}
  2667				if(pd->schedwhen + 10*1000*1000 > now)
  2668					continue;
  2669				preemptone(p);
  2670			}
  2671		}
  2672		return n;
  2673	}
  2674	
  2675	// Tell all goroutines that they have been preempted and they should stop.
  2676	// This function is purely best-effort.  It can fail to inform a goroutine if a
  2677	// processor just started running it.
  2678	// No locks need to be held.
  2679	// Returns true if preemption request was issued to at least one goroutine.
  2680	static bool
  2681	preemptall(void)
  2682	{
  2683		P *p;
  2684		int32 i;
  2685		bool res;
  2686	
  2687		res = false;
  2688		for(i = 0; i < runtime·gomaxprocs; i++) {
  2689			p = runtime·allp[i];
  2690			if(p == nil || p->status != Prunning)
  2691				continue;
  2692			res |= preemptone(p);
  2693		}
  2694		return res;
  2695	}
  2696	
  2697	// Tell the goroutine running on processor P to stop.
  2698	// This function is purely best-effort.  It can incorrectly fail to inform the
  2699	// goroutine.  It can send inform the wrong goroutine.  Even if it informs the
  2700	// correct goroutine, that goroutine might ignore the request if it is
  2701	// simultaneously executing runtime·newstack.
  2702	// No lock needs to be held.
  2703	// Returns true if preemption request was issued.
  2704	static bool
  2705	preemptone(P *p)
  2706	{
  2707		M *mp;
  2708		G *gp;
  2709	
  2710		mp = p->m;
  2711		if(mp == nil || mp == m)
  2712			return false;
  2713		gp = mp->curg;
  2714		if(gp == nil || gp == mp->g0)
  2715			return false;
  2716		gp->preempt = true;
  2717		gp->stackguard0 = StackPreempt;
  2718		return true;
  2719	}
  2720	
  2721	void
  2722	runtime·schedtrace(bool detailed)
  2723	{
  2724		static int64 starttime;
  2725		int64 now;
  2726		int64 id1, id2, id3;
  2727		int32 i, t, h;
  2728		uintptr gi;
  2729		int8 *fmt;
  2730		M *mp, *lockedm;
  2731		G *gp, *lockedg;
  2732		P *p;
  2733	
  2734		now = runtime·nanotime();
  2735		if(starttime == 0)
  2736			starttime = now;
  2737	
  2738		runtime·lock(&runtime·sched);
  2739		runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
  2740			(now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidle, runtime·sched.mcount,
  2741			runtime·sched.nmidle, runtime·sched.runqsize);
  2742		if(detailed) {
  2743			runtime·printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
  2744				runtime·sched.gcwaiting, runtime·sched.nmidlelocked, runtime·sched.nmspinning,
  2745				runtime·sched.stopwait, runtime·sched.sysmonwait);
  2746		}
  2747		// We must be careful while reading data from P's, M's and G's.
  2748		// Even if we hold schedlock, most data can be changed concurrently.
  2749		// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
  2750		for(i = 0; i < runtime·gomaxprocs; i++) {
  2751			p = runtime·allp[i];
  2752			if(p == nil)
  2753				continue;
  2754			mp = p->m;
  2755			h = runtime·atomicload(&p->runqhead);
  2756			t = runtime·atomicload(&p->runqtail);
  2757			if(detailed)
  2758				runtime·printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
  2759					i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
  2760			else {
  2761				// In non-detailed mode format lengths of per-P run queues as:
  2762				// [len1 len2 len3 len4]
  2763				fmt = " %d";
  2764				if(runtime·gomaxprocs == 1)
  2765					fmt = " [%d]\n";
  2766				else if(i == 0)
  2767					fmt = " [%d";
  2768				else if(i == runtime·gomaxprocs-1)
  2769					fmt = " %d]\n";
  2770				runtime·printf(fmt, t-h);
  2771			}
  2772		}
  2773		if(!detailed) {
  2774			runtime·unlock(&runtime·sched);
  2775			return;
  2776		}
  2777		for(mp = runtime·allm; mp; mp = mp->alllink) {
  2778			p = mp->p;
  2779			gp = mp->curg;
  2780			lockedg = mp->lockedg;
  2781			id1 = -1;
  2782			if(p)
  2783				id1 = p->id;
  2784			id2 = -1;
  2785			if(gp)
  2786				id2 = gp->goid;
  2787			id3 = -1;
  2788			if(lockedg)
  2789				id3 = lockedg->goid;
  2790			runtime·printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
  2791				" locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
  2792				mp->id, id1, id2,
  2793				mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
  2794				mp->spinning, m->blocked, id3);
  2795		}
  2796		runtime·lock(&allglock);
  2797		for(gi = 0; gi < runtime·allglen; gi++) {
  2798			gp = runtime·allg[gi];
  2799			mp = gp->m;
  2800			lockedm = gp->lockedm;
  2801			runtime·printf("  G%D: status=%d(%s) m=%d lockedm=%d\n",
  2802				gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
  2803				lockedm ? lockedm->id : -1);
  2804		}
  2805		runtime·unlock(&allglock);
  2806		runtime·unlock(&runtime·sched);
  2807	}
  2808	
  2809	// Put mp on midle list.
  2810	// Sched must be locked.
  2811	static void
  2812	mput(M *mp)
  2813	{
  2814		mp->schedlink = runtime·sched.midle;
  2815		runtime·sched.midle = mp;
  2816		runtime·sched.nmidle++;
  2817		checkdead();
  2818	}
  2819	
  2820	// Try to get an m from midle list.
  2821	// Sched must be locked.
  2822	static M*
  2823	mget(void)
  2824	{
  2825		M *mp;
  2826	
  2827		if((mp = runtime·sched.midle) != nil){
  2828			runtime·sched.midle = mp->schedlink;
  2829			runtime·sched.nmidle--;
  2830		}
  2831		return mp;
  2832	}
  2833	
  2834	// Put gp on the global runnable queue.
  2835	// Sched must be locked.
  2836	static void
  2837	globrunqput(G *gp)
  2838	{
  2839		gp->schedlink = nil;
  2840		if(runtime·sched.runqtail)
  2841			runtime·sched.runqtail->schedlink = gp;
  2842		else
  2843			runtime·sched.runqhead = gp;
  2844		runtime·sched.runqtail = gp;
  2845		runtime·sched.runqsize++;
  2846	}
  2847	
  2848	// Put a batch of runnable goroutines on the global runnable queue.
  2849	// Sched must be locked.
  2850	static void
  2851	globrunqputbatch(G *ghead, G *gtail, int32 n)
  2852	{
  2853		gtail->schedlink = nil;
  2854		if(runtime·sched.runqtail)
  2855			runtime·sched.runqtail->schedlink = ghead;
  2856		else
  2857			runtime·sched.runqhead = ghead;
  2858		runtime·sched.runqtail = gtail;
  2859		runtime·sched.runqsize += n;
  2860	}
  2861	
  2862	// Try get a batch of G's from the global runnable queue.
  2863	// Sched must be locked.
  2864	static G*
  2865	globrunqget(P *p, int32 max)
  2866	{
  2867		G *gp, *gp1;
  2868		int32 n;
  2869	
  2870		if(runtime·sched.runqsize == 0)
  2871			return nil;
  2872		n = runtime·sched.runqsize/runtime·gomaxprocs+1;
  2873		if(n > runtime·sched.runqsize)
  2874			n = runtime·sched.runqsize;
  2875		if(max > 0 && n > max)
  2876			n = max;
  2877		if(n > nelem(p->runq)/2)
  2878			n = nelem(p->runq)/2;
  2879		runtime·sched.runqsize -= n;
  2880		if(runtime·sched.runqsize == 0)
  2881			runtime·sched.runqtail = nil;
  2882		gp = runtime·sched.runqhead;
  2883		runtime·sched.runqhead = gp->schedlink;
  2884		n--;
  2885		while(n--) {
  2886			gp1 = runtime·sched.runqhead;
  2887			runtime·sched.runqhead = gp1->schedlink;
  2888			runqput(p, gp1);
  2889		}
  2890		return gp;
  2891	}
  2892	
  2893	// Put p to on pidle list.
  2894	// Sched must be locked.
  2895	static void
  2896	pidleput(P *p)
  2897	{
  2898		p->link = runtime·sched.pidle;
  2899		runtime·sched.pidle = p;
  2900		runtime·xadd(&runtime·sched.npidle, 1);  // TODO: fast atomic
  2901	}
  2902	
  2903	// Try get a p from pidle list.
  2904	// Sched must be locked.
  2905	static P*
  2906	pidleget(void)
  2907	{
  2908		P *p;
  2909	
  2910		p = runtime·sched.pidle;
  2911		if(p) {
  2912			runtime·sched.pidle = p->link;
  2913			runtime·xadd(&runtime·sched.npidle, -1);  // TODO: fast atomic
  2914		}
  2915		return p;
  2916	}
  2917	
  2918	// Try to put g on local runnable queue.
  2919	// If it's full, put onto global queue.
  2920	// Executed only by the owner P.
  2921	static void
  2922	runqput(P *p, G *gp)
  2923	{
  2924		uint32 h, t;
  2925	
  2926	retry:
  2927		h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
  2928		t = p->runqtail;
  2929		if(t - h < nelem(p->runq)) {
  2930			p->runq[t%nelem(p->runq)] = gp;
  2931			runtime·atomicstore(&p->runqtail, t+1);  // store-release, makes the item available for consumption
  2932			return;
  2933		}
  2934		if(runqputslow(p, gp, h, t))
  2935			return;
  2936		// the queue is not full, now the put above must suceed
  2937		goto retry;
  2938	}
  2939	
  2940	// Put g and a batch of work from local runnable queue on global queue.
  2941	// Executed only by the owner P.
  2942	static bool
  2943	runqputslow(P *p, G *gp, uint32 h, uint32 t)
  2944	{
  2945		G *batch[nelem(p->runq)/2+1];
  2946		uint32 n, i;
  2947	
  2948		// First, grab a batch from local queue.
  2949		n = t-h;
  2950		n = n/2;
  2951		if(n != nelem(p->runq)/2)
  2952			runtime·throw("runqputslow: queue is not full");
  2953		for(i=0; i<n; i++)
  2954			batch[i] = p->runq[(h+i)%nelem(p->runq)];
  2955		if(!runtime·cas(&p->runqhead, h, h+n))  // cas-release, commits consume
  2956			return false;
  2957		batch[n] = gp;
  2958		// Link the goroutines.
  2959		for(i=0; i<n; i++)
  2960			batch[i]->schedlink = batch[i+1];
  2961		// Now put the batch on global queue.
  2962		runtime·lock(&runtime·sched);
  2963		globrunqputbatch(batch[0], batch[n], n+1);
  2964		runtime·unlock(&runtime·sched);
  2965		return true;
  2966	}
  2967	
  2968	// Get g from local runnable queue.
  2969	// Executed only by the owner P.
  2970	static G*
  2971	runqget(P *p)
  2972	{
  2973		G *gp;
  2974		uint32 t, h;
  2975	
  2976		for(;;) {
  2977			h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
  2978			t = p->runqtail;
  2979			if(t == h)
  2980				return nil;
  2981			gp = p->runq[h%nelem(p->runq)];
  2982			if(runtime·cas(&p->runqhead, h, h+1))  // cas-release, commits consume
  2983				return gp;
  2984		}
  2985	}
  2986	
  2987	// Grabs a batch of goroutines from local runnable queue.
  2988	// batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
  2989	// Can be executed by any P.
  2990	static uint32
  2991	runqgrab(P *p, G **batch)
  2992	{
  2993		uint32 t, h, n, i;
  2994	
  2995		for(;;) {
  2996			h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
  2997			t = runtime·atomicload(&p->runqtail);  // load-acquire, synchronize with the producer
  2998			n = t-h;
  2999			n = n - n/2;
  3000			if(n == 0)
  3001				break;
  3002			if(n > nelem(p->runq)/2)  // read inconsistent h and t
  3003				continue;
  3004			for(i=0; i<n; i++)
  3005				batch[i] = p->runq[(h+i)%nelem(p->runq)];
  3006			if(runtime·cas(&p->runqhead, h, h+n))  // cas-release, commits consume
  3007				break;
  3008		}
  3009		return n;
  3010	}
  3011	
  3012	// Steal half of elements from local runnable queue of p2
  3013	// and put onto local runnable queue of p.
  3014	// Returns one of the stolen elements (or nil if failed).
  3015	static G*
  3016	runqsteal(P *p, P *p2)
  3017	{
  3018		G *gp;
  3019		G *batch[nelem(p->runq)/2];
  3020		uint32 t, h, n, i;
  3021	
  3022		n = runqgrab(p2, batch);
  3023		if(n == 0)
  3024			return nil;
  3025		n--;
  3026		gp = batch[n];
  3027		if(n == 0)
  3028			return gp;
  3029		h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
  3030		t = p->runqtail;
  3031		if(t - h + n >= nelem(p->runq))
  3032			runtime·throw("runqsteal: runq overflow");
  3033		for(i=0; i<n; i++, t++)
  3034			p->runq[t%nelem(p->runq)] = batch[i];
  3035		runtime·atomicstore(&p->runqtail, t);  // store-release, makes the item available for consumption
  3036		return gp;
  3037	}
  3038	
  3039	void
  3040	runtime·testSchedLocalQueue(void)
  3041	{
  3042		P p;
  3043		G gs[nelem(p.runq)];
  3044		int32 i, j;
  3045	
  3046		runtime·memclr((byte*)&p, sizeof(p));
  3047	
  3048		for(i = 0; i < nelem(gs); i++) {
  3049			if(runqget(&p) != nil)
  3050				runtime·throw("runq is not empty initially");
  3051			for(j = 0; j < i; j++)
  3052				runqput(&p, &gs[i]);
  3053			for(j = 0; j < i; j++) {
  3054				if(runqget(&p) != &gs[i]) {
  3055					runtime·printf("bad element at iter %d/%d\n", i, j);
  3056					runtime·throw("bad element");
  3057				}
  3058			}
  3059			if(runqget(&p) != nil)
  3060				runtime·throw("runq is not empty afterwards");
  3061		}
  3062	}
  3063	
  3064	void
  3065	runtime·testSchedLocalQueueSteal(void)
  3066	{
  3067		P p1, p2;
  3068		G gs[nelem(p1.runq)], *gp;
  3069		int32 i, j, s;
  3070	
  3071		runtime·memclr((byte*)&p1, sizeof(p1));
  3072		runtime·memclr((byte*)&p2, sizeof(p2));
  3073	
  3074		for(i = 0; i < nelem(gs); i++) {
  3075			for(j = 0; j < i; j++) {
  3076				gs[j].sig = 0;
  3077				runqput(&p1, &gs[j]);
  3078			}
  3079			gp = runqsteal(&p2, &p1);
  3080			s = 0;
  3081			if(gp) {
  3082				s++;
  3083				gp->sig++;
  3084			}
  3085			while(gp = runqget(&p2)) {
  3086				s++;
  3087				gp->sig++;
  3088			}
  3089			while(gp = runqget(&p1))
  3090				gp->sig++;
  3091			for(j = 0; j < i; j++) {
  3092				if(gs[j].sig != 1) {
  3093					runtime·printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
  3094					runtime·throw("bad element");
  3095				}
  3096			}
  3097			if(s != i/2 && s != i/2+1) {
  3098				runtime·printf("bad steal %d, want %d or %d, iter %d\n",
  3099					s, i/2, i/2+1, i);
  3100				runtime·throw("bad steal");
  3101			}
  3102		}
  3103	}
  3104	
  3105	extern void runtime·morestack(void);
  3106	uintptr runtime·externalthreadhandlerp;
  3107	
  3108	// Does f mark the top of a goroutine stack?
  3109	bool
  3110	runtime·topofstack(Func *f)
  3111	{
  3112		return f->entry == (uintptr)runtime·goexit ||
  3113			f->entry == (uintptr)runtime·mstart ||
  3114			f->entry == (uintptr)runtime·mcall ||
  3115			f->entry == (uintptr)runtime·morestack ||
  3116			f->entry == (uintptr)runtime·lessstack ||
  3117			f->entry == (uintptr)_rt0_go ||
  3118			(runtime·externalthreadhandlerp != 0 && f->entry == runtime·externalthreadhandlerp);
  3119	}
  3120	
  3121	int32
  3122	runtime·setmaxthreads(int32 in)
  3123	{
  3124		int32 out;
  3125	
  3126		runtime·lock(&runtime·sched);
  3127		out = runtime·sched.maxmcount;
  3128		runtime·sched.maxmcount = in;
  3129		checkmcount();
  3130		runtime·unlock(&runtime·sched);
  3131		return out;
  3132	}
  3133	
  3134	static int8 experiment[] = GOEXPERIMENT; // defined in zaexperiment.h
  3135	
  3136	static bool
  3137	haveexperiment(int8 *name)
  3138	{
  3139		int32 i, j;
  3140		
  3141		for(i=0; i<sizeof(experiment); i++) {
  3142			if((i == 0 || experiment[i-1] == ',') && experiment[i] == name[0]) {
  3143				for(j=0; name[j]; j++)
  3144					if(experiment[i+j] != name[j])
  3145						goto nomatch;
  3146				if(experiment[i+j] != '\0' && experiment[i+j] != ',')
  3147					goto nomatch;
  3148				return 1;
  3149			}
  3150		nomatch:;
  3151		}
  3152		return 0;
  3153	}

View as plain text