Run Format

Text file src/pkg/runtime/proc.c

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "runtime.h"
     6	#include "arch_GOARCH.h"
     7	#include "zaexperiment.h"
     8	#include "malloc.h"
     9	#include "stack.h"
    10	#include "race.h"
    11	#include "type.h"
    12	#include "../../cmd/ld/textflag.h"
    13	
    14	// Goroutine scheduler
    15	// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
    16	//
    17	// The main concepts are:
    18	// G - goroutine.
    19	// M - worker thread, or machine.
    20	// P - processor, a resource that is required to execute Go code.
    21	//     M must have an associated P to execute Go code, however it can be
    22	//     blocked or in a syscall w/o an associated P.
    23	//
    24	// Design doc at http://golang.org/s/go11sched.
    25	
    26	typedef struct Sched Sched;
    27	struct Sched {
    28		Lock;
    29	
    30		uint64	goidgen;
    31	
    32		M*	midle;	 // idle m's waiting for work
    33		int32	nmidle;	 // number of idle m's waiting for work
    34		int32	nmidlelocked; // number of locked m's waiting for work
    35		int32	mcount;	 // number of m's that have been created
    36		int32	maxmcount;	// maximum number of m's allowed (or die)
    37	
    38		P*	pidle;  // idle P's
    39		uint32	npidle;
    40		uint32	nmspinning;
    41	
    42		// Global runnable queue.
    43		G*	runqhead;
    44		G*	runqtail;
    45		int32	runqsize;
    46	
    47		// Global cache of dead G's.
    48		Lock	gflock;
    49		G*	gfree;
    50	
    51		uint32	gcwaiting;	// gc is waiting to run
    52		int32	stopwait;
    53		Note	stopnote;
    54		uint32	sysmonwait;
    55		Note	sysmonnote;
    56		uint64	lastpoll;
    57	
    58		int32	profilehz;	// cpu profiling rate
    59	};
    60	
    61	// The max value of GOMAXPROCS.
    62	// There are no fundamental restrictions on the value.
    63	enum { MaxGomaxprocs = 1<<8 };
    64	
    65	Sched	runtime·sched;
    66	int32	runtime·gomaxprocs;
    67	uint32	runtime·needextram;
    68	bool	runtime·iscgo;
    69	M	runtime·m0;
    70	G	runtime·g0;	 // idle goroutine for m0
    71	G*	runtime·allg;
    72	G*	runtime·lastg;
    73	M*	runtime·allm;
    74	M*	runtime·extram;
    75	int8*	runtime·goos;
    76	int32	runtime·ncpu;
    77	static int32	newprocs;
    78	
    79	void runtime·mstart(void);
    80	static void runqput(P*, G*);
    81	static G* runqget(P*);
    82	static void runqgrow(P*);
    83	static G* runqsteal(P*, P*);
    84	static void mput(M*);
    85	static M* mget(void);
    86	static void mcommoninit(M*);
    87	static void schedule(void);
    88	static void procresize(int32);
    89	static void acquirep(P*);
    90	static P* releasep(void);
    91	static void newm(void(*)(void), P*);
    92	static void stopm(void);
    93	static void startm(P*, bool);
    94	static void handoffp(P*);
    95	static void wakep(void);
    96	static void stoplockedm(void);
    97	static void startlockedm(G*);
    98	static void sysmon(void);
    99	static uint32 retake(int64);
   100	static void incidlelocked(int32);
   101	static void checkdead(void);
   102	static void exitsyscall0(G*);
   103	static void park0(G*);
   104	static void goexit0(G*);
   105	static void gfput(P*, G*);
   106	static G* gfget(P*);
   107	static void gfpurge(P*);
   108	static void globrunqput(G*);
   109	static G* globrunqget(P*, int32);
   110	static P* pidleget(void);
   111	static void pidleput(P*);
   112	static void injectglist(G*);
   113	static bool preemptall(void);
   114	static bool preemptone(P*);
   115	static bool exitsyscallfast(void);
   116	static bool haveexperiment(int8*);
   117	
   118	// The bootstrap sequence is:
   119	//
   120	//	call osinit
   121	//	call schedinit
   122	//	make & queue new G
   123	//	call runtime·mstart
   124	//
   125	// The new G calls runtime·main.
   126	void
   127	runtime·schedinit(void)
   128	{
   129		int32 n, procs;
   130		byte *p;
   131		Eface i;
   132	
   133		runtime·sched.maxmcount = 10000;
   134		runtime·precisestack = haveexperiment("precisestack");
   135	
   136		runtime·mprofinit();
   137		runtime·mallocinit();
   138		mcommoninit(m);
   139		
   140		// Initialize the itable value for newErrorCString,
   141		// so that the next time it gets called, possibly
   142		// in a fault during a garbage collection, it will not
   143		// need to allocated memory.
   144		runtime·newErrorCString(0, &i);
   145	
   146		runtime·goargs();
   147		runtime·goenvs();
   148		runtime·parsedebugvars();
   149	
   150		// Allocate internal symbol table representation now, we need it for GC anyway.
   151		runtime·symtabinit();
   152	
   153		runtime·sched.lastpoll = runtime·nanotime();
   154		procs = 1;
   155		p = runtime·getenv("GOMAXPROCS");
   156		if(p != nil && (n = runtime·atoi(p)) > 0) {
   157			if(n > MaxGomaxprocs)
   158				n = MaxGomaxprocs;
   159			procs = n;
   160		}
   161		runtime·allp = runtime·malloc((MaxGomaxprocs+1)*sizeof(runtime·allp[0]));
   162		procresize(procs);
   163	
   164		mstats.enablegc = 1;
   165	
   166		if(raceenabled)
   167			g->racectx = runtime·raceinit();
   168	}
   169	
   170	extern void main·init(void);
   171	extern void main·main(void);
   172	
   173	static FuncVal scavenger = {runtime·MHeap_Scavenger};
   174	
   175	static FuncVal initDone = { runtime·unlockOSThread };
   176	
   177	// The main goroutine.
   178	void
   179	runtime·main(void)
   180	{
   181		Defer d;
   182		
   183		// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
   184		// Using decimal instead of binary GB and MB because
   185		// they look nicer in the stack overflow failure message.
   186		if(sizeof(void*) == 8)
   187			runtime·maxstacksize = 1000000000;
   188		else
   189			runtime·maxstacksize = 250000000;
   190	
   191		newm(sysmon, nil);
   192	
   193		// Lock the main goroutine onto this, the main OS thread,
   194		// during initialization.  Most programs won't care, but a few
   195		// do require certain calls to be made by the main thread.
   196		// Those can arrange for main.main to run in the main thread
   197		// by calling runtime.LockOSThread during initialization
   198		// to preserve the lock.
   199		runtime·lockOSThread();
   200		
   201		// Defer unlock so that runtime.Goexit during init does the unlock too.
   202		d.fn = &initDone;
   203		d.siz = 0;
   204		d.link = g->defer;
   205		d.argp = (void*)-1;
   206		d.special = true;
   207		d.free = false;
   208		g->defer = &d;
   209	
   210		if(m != &runtime·m0)
   211			runtime·throw("runtime·main not on m0");
   212		runtime·newproc1(&scavenger, nil, 0, 0, runtime·main);
   213		main·init();
   214	
   215		if(g->defer != &d || d.fn != &initDone)
   216			runtime·throw("runtime: bad defer entry after init");
   217		g->defer = d.link;
   218		runtime·unlockOSThread();
   219	
   220		main·main();
   221		if(raceenabled)
   222			runtime·racefini();
   223	
   224		// Make racy client program work: if panicking on
   225		// another goroutine at the same time as main returns,
   226		// let the other goroutine finish printing the panic trace.
   227		// Once it does, it will exit. See issue 3934.
   228		if(runtime·panicking)
   229			runtime·park(nil, nil, "panicwait");
   230	
   231		runtime·exit(0);
   232		for(;;)
   233			*(int32*)runtime·main = 0;
   234	}
   235	
   236	void
   237	runtime·goroutineheader(G *gp)
   238	{
   239		int8 *status;
   240	
   241		switch(gp->status) {
   242		case Gidle:
   243			status = "idle";
   244			break;
   245		case Grunnable:
   246			status = "runnable";
   247			break;
   248		case Grunning:
   249			status = "running";
   250			break;
   251		case Gsyscall:
   252			status = "syscall";
   253			break;
   254		case Gwaiting:
   255			if(gp->waitreason)
   256				status = gp->waitreason;
   257			else
   258				status = "waiting";
   259			break;
   260		default:
   261			status = "???";
   262			break;
   263		}
   264		runtime·printf("goroutine %D [%s]:\n", gp->goid, status);
   265	}
   266	
   267	void
   268	runtime·tracebackothers(G *me)
   269	{
   270		G *gp;
   271		int32 traceback;
   272	
   273		traceback = runtime·gotraceback(nil);
   274		
   275		// Show the current goroutine first, if we haven't already.
   276		if((gp = m->curg) != nil && gp != me) {
   277			runtime·printf("\n");
   278			runtime·goroutineheader(gp);
   279			runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
   280		}
   281	
   282		for(gp = runtime·allg; gp != nil; gp = gp->alllink) {
   283			if(gp == me || gp == m->curg || gp->status == Gdead)
   284				continue;
   285			if(gp->issystem && traceback < 2)
   286				continue;
   287			runtime·printf("\n");
   288			runtime·goroutineheader(gp);
   289			if(gp->status == Grunning) {
   290				runtime·printf("\tgoroutine running on other thread; stack unavailable\n");
   291				runtime·printcreatedby(gp);
   292			} else
   293				runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
   294		}
   295	}
   296	
   297	static void
   298	checkmcount(void)
   299	{
   300		// sched lock is held
   301		if(runtime·sched.mcount > runtime·sched.maxmcount) {
   302			runtime·printf("runtime: program exceeds %d-thread limit\n", runtime·sched.maxmcount);
   303			runtime·throw("thread exhaustion");
   304		}
   305	}
   306	
   307	static void
   308	mcommoninit(M *mp)
   309	{
   310		// If there is no mcache runtime·callers() will crash,
   311		// and we are most likely in sysmon thread so the stack is senseless anyway.
   312		if(m->mcache)
   313			runtime·callers(1, mp->createstack, nelem(mp->createstack));
   314	
   315		mp->fastrand = 0x49f6428aUL + mp->id + runtime·cputicks();
   316	
   317		runtime·lock(&runtime·sched);
   318		mp->id = runtime·sched.mcount++;
   319		checkmcount();
   320		runtime·mpreinit(mp);
   321	
   322		// Add to runtime·allm so garbage collector doesn't free m
   323		// when it is just in a register or thread-local storage.
   324		mp->alllink = runtime·allm;
   325		// runtime·NumCgoCall() iterates over allm w/o schedlock,
   326		// so we need to publish it safely.
   327		runtime·atomicstorep(&runtime·allm, mp);
   328		runtime·unlock(&runtime·sched);
   329	}
   330	
   331	// Mark gp ready to run.
   332	void
   333	runtime·ready(G *gp)
   334	{
   335		// Mark runnable.
   336		m->locks++;  // disable preemption because it can be holding p in a local var
   337		if(gp->status != Gwaiting) {
   338			runtime·printf("goroutine %D has status %d\n", gp->goid, gp->status);
   339			runtime·throw("bad g->status in ready");
   340		}
   341		gp->status = Grunnable;
   342		runqput(m->p, gp);
   343		if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0)  // TODO: fast atomic
   344			wakep();
   345		m->locks--;
   346		if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
   347			g->stackguard0 = StackPreempt;
   348	}
   349	
   350	int32
   351	runtime·gcprocs(void)
   352	{
   353		int32 n;
   354	
   355		// Figure out how many CPUs to use during GC.
   356		// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
   357		runtime·lock(&runtime·sched);
   358		n = runtime·gomaxprocs;
   359		if(n > runtime·ncpu)
   360			n = runtime·ncpu;
   361		if(n > MaxGcproc)
   362			n = MaxGcproc;
   363		if(n > runtime·sched.nmidle+1) // one M is currently running
   364			n = runtime·sched.nmidle+1;
   365		runtime·unlock(&runtime·sched);
   366		return n;
   367	}
   368	
   369	static bool
   370	needaddgcproc(void)
   371	{
   372		int32 n;
   373	
   374		runtime·lock(&runtime·sched);
   375		n = runtime·gomaxprocs;
   376		if(n > runtime·ncpu)
   377			n = runtime·ncpu;
   378		if(n > MaxGcproc)
   379			n = MaxGcproc;
   380		n -= runtime·sched.nmidle+1; // one M is currently running
   381		runtime·unlock(&runtime·sched);
   382		return n > 0;
   383	}
   384	
   385	void
   386	runtime·helpgc(int32 nproc)
   387	{
   388		M *mp;
   389		int32 n, pos;
   390	
   391		runtime·lock(&runtime·sched);
   392		pos = 0;
   393		for(n = 1; n < nproc; n++) {  // one M is currently running
   394			if(runtime·allp[pos]->mcache == m->mcache)
   395				pos++;
   396			mp = mget();
   397			if(mp == nil)
   398				runtime·throw("runtime·gcprocs inconsistency");
   399			mp->helpgc = n;
   400			mp->mcache = runtime·allp[pos]->mcache;
   401			pos++;
   402			runtime·notewakeup(&mp->park);
   403		}
   404		runtime·unlock(&runtime·sched);
   405	}
   406	
   407	// Similar to stoptheworld but best-effort and can be called several times.
   408	// There is no reverse operation, used during crashing.
   409	// This function must not lock any mutexes.
   410	void
   411	runtime·freezetheworld(void)
   412	{
   413		int32 i;
   414	
   415		if(runtime·gomaxprocs == 1)
   416			return;
   417		// stopwait and preemption requests can be lost
   418		// due to races with concurrently executing threads,
   419		// so try several times
   420		for(i = 0; i < 5; i++) {
   421			// this should tell the scheduler to not start any new goroutines
   422			runtime·sched.stopwait = 0x7fffffff;
   423			runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
   424			// this should stop running goroutines
   425			if(!preemptall())
   426				break;  // no running goroutines
   427			runtime·usleep(1000);
   428		}
   429		// to be sure
   430		runtime·usleep(1000);
   431		preemptall();
   432		runtime·usleep(1000);
   433	}
   434	
   435	void
   436	runtime·stoptheworld(void)
   437	{
   438		int32 i;
   439		uint32 s;
   440		P *p;
   441		bool wait;
   442	
   443		runtime·lock(&runtime·sched);
   444		runtime·sched.stopwait = runtime·gomaxprocs;
   445		runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
   446		preemptall();
   447		// stop current P
   448		m->p->status = Pgcstop;
   449		runtime·sched.stopwait--;
   450		// try to retake all P's in Psyscall status
   451		for(i = 0; i < runtime·gomaxprocs; i++) {
   452			p = runtime·allp[i];
   453			s = p->status;
   454			if(s == Psyscall && runtime·cas(&p->status, s, Pgcstop))
   455				runtime·sched.stopwait--;
   456		}
   457		// stop idle P's
   458		while(p = pidleget()) {
   459			p->status = Pgcstop;
   460			runtime·sched.stopwait--;
   461		}
   462		wait = runtime·sched.stopwait > 0;
   463		runtime·unlock(&runtime·sched);
   464	
   465		// wait for remaining P's to stop voluntarily
   466		if(wait) {
   467			for(;;) {
   468				// wait for 100us, then try to re-preempt in case of any races
   469				if(runtime·notetsleep(&runtime·sched.stopnote, 100*1000)) {
   470					runtime·noteclear(&runtime·sched.stopnote);
   471					break;
   472				}
   473				preemptall();
   474			}
   475		}
   476		if(runtime·sched.stopwait)
   477			runtime·throw("stoptheworld: not stopped");
   478		for(i = 0; i < runtime·gomaxprocs; i++) {
   479			p = runtime·allp[i];
   480			if(p->status != Pgcstop)
   481				runtime·throw("stoptheworld: not stopped");
   482		}
   483	}
   484	
   485	static void
   486	mhelpgc(void)
   487	{
   488		m->helpgc = -1;
   489	}
   490	
   491	void
   492	runtime·starttheworld(void)
   493	{
   494		P *p, *p1;
   495		M *mp;
   496		G *gp;
   497		bool add;
   498	
   499		m->locks++;  // disable preemption because it can be holding p in a local var
   500		gp = runtime·netpoll(false);  // non-blocking
   501		injectglist(gp);
   502		add = needaddgcproc();
   503		runtime·lock(&runtime·sched);
   504		if(newprocs) {
   505			procresize(newprocs);
   506			newprocs = 0;
   507		} else
   508			procresize(runtime·gomaxprocs);
   509		runtime·sched.gcwaiting = 0;
   510	
   511		p1 = nil;
   512		while(p = pidleget()) {
   513			// procresize() puts p's with work at the beginning of the list.
   514			// Once we reach a p without a run queue, the rest don't have one either.
   515			if(p->runqhead == p->runqtail) {
   516				pidleput(p);
   517				break;
   518			}
   519			p->m = mget();
   520			p->link = p1;
   521			p1 = p;
   522		}
   523		if(runtime·sched.sysmonwait) {
   524			runtime·sched.sysmonwait = false;
   525			runtime·notewakeup(&runtime·sched.sysmonnote);
   526		}
   527		runtime·unlock(&runtime·sched);
   528	
   529		while(p1) {
   530			p = p1;
   531			p1 = p1->link;
   532			if(p->m) {
   533				mp = p->m;
   534				p->m = nil;
   535				if(mp->nextp)
   536					runtime·throw("starttheworld: inconsistent mp->nextp");
   537				mp->nextp = p;
   538				runtime·notewakeup(&mp->park);
   539			} else {
   540				// Start M to run P.  Do not start another M below.
   541				newm(nil, p);
   542				add = false;
   543			}
   544		}
   545	
   546		if(add) {
   547			// If GC could have used another helper proc, start one now,
   548			// in the hope that it will be available next time.
   549			// It would have been even better to start it before the collection,
   550			// but doing so requires allocating memory, so it's tricky to
   551			// coordinate.  This lazy approach works out in practice:
   552			// we don't mind if the first couple gc rounds don't have quite
   553			// the maximum number of procs.
   554			newm(mhelpgc, nil);
   555		}
   556		m->locks--;
   557		if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
   558			g->stackguard0 = StackPreempt;
   559	}
   560	
   561	// Called to start an M.
   562	void
   563	runtime·mstart(void)
   564	{
   565	#ifdef GOOS_windows
   566	#ifdef GOARCH_386
   567		// It is used by windows-386 only. Unfortunately, seh needs
   568		// to be located on os stack, and mstart runs on os stack
   569		// for both m0 and m.
   570		SEH seh;
   571	#endif
   572	#endif
   573	
   574		if(g != m->g0)
   575			runtime·throw("bad runtime·mstart");
   576	
   577		// Record top of stack for use by mcall.
   578		// Once we call schedule we're never coming back,
   579		// so other calls can reuse this stack space.
   580		runtime·gosave(&m->g0->sched);
   581		m->g0->sched.pc = (uintptr)-1;  // make sure it is never used
   582		m->g0->stackguard = m->g0->stackguard0;  // cgo sets only stackguard0, copy it to stackguard
   583	#ifdef GOOS_windows
   584	#ifdef GOARCH_386
   585		m->seh = &seh;
   586	#endif
   587	#endif
   588		runtime·asminit();
   589		runtime·minit();
   590	
   591		// Install signal handlers; after minit so that minit can
   592		// prepare the thread to be able to handle the signals.
   593		if(m == &runtime·m0)
   594			runtime·initsig();
   595		
   596		if(m->mstartfn)
   597			m->mstartfn();
   598	
   599		if(m->helpgc) {
   600			m->helpgc = 0;
   601			stopm();
   602		} else if(m != &runtime·m0) {
   603			acquirep(m->nextp);
   604			m->nextp = nil;
   605		}
   606		schedule();
   607	
   608		// TODO(brainman): This point is never reached, because scheduler
   609		// does not release os threads at the moment. But once this path
   610		// is enabled, we must remove our seh here.
   611	}
   612	
   613	// When running with cgo, we call _cgo_thread_start
   614	// to start threads for us so that we can play nicely with
   615	// foreign code.
   616	void (*_cgo_thread_start)(void*);
   617	
   618	typedef struct CgoThreadStart CgoThreadStart;
   619	struct CgoThreadStart
   620	{
   621		M *m;
   622		G *g;
   623		void (*fn)(void);
   624	};
   625	
   626	// Allocate a new m unassociated with any thread.
   627	// Can use p for allocation context if needed.
   628	M*
   629	runtime·allocm(P *p)
   630	{
   631		M *mp;
   632		static Type *mtype;  // The Go type M
   633	
   634		m->locks++;  // disable GC because it can be called from sysmon
   635		if(m->p == nil)
   636			acquirep(p);  // temporarily borrow p for mallocs in this function
   637		if(mtype == nil) {
   638			Eface e;
   639			runtime·gc_m_ptr(&e);
   640			mtype = ((PtrType*)e.type)->elem;
   641		}
   642	
   643		mp = runtime·cnew(mtype);
   644		mcommoninit(mp);
   645	
   646		// In case of cgo, pthread_create will make us a stack.
   647		// Windows will layout sched stack on OS stack.
   648		if(runtime·iscgo || Windows)
   649			mp->g0 = runtime·malg(-1);
   650		else
   651			mp->g0 = runtime·malg(8192);
   652	
   653		if(p == m->p)
   654			releasep();
   655		m->locks--;
   656		if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
   657			g->stackguard0 = StackPreempt;
   658	
   659		return mp;
   660	}
   661	
   662	static M* lockextra(bool nilokay);
   663	static void unlockextra(M*);
   664	
   665	// needm is called when a cgo callback happens on a
   666	// thread without an m (a thread not created by Go).
   667	// In this case, needm is expected to find an m to use
   668	// and return with m, g initialized correctly.
   669	// Since m and g are not set now (likely nil, but see below)
   670	// needm is limited in what routines it can call. In particular
   671	// it can only call nosplit functions (textflag 7) and cannot
   672	// do any scheduling that requires an m.
   673	//
   674	// In order to avoid needing heavy lifting here, we adopt
   675	// the following strategy: there is a stack of available m's
   676	// that can be stolen. Using compare-and-swap
   677	// to pop from the stack has ABA races, so we simulate
   678	// a lock by doing an exchange (via casp) to steal the stack
   679	// head and replace the top pointer with MLOCKED (1).
   680	// This serves as a simple spin lock that we can use even
   681	// without an m. The thread that locks the stack in this way
   682	// unlocks the stack by storing a valid stack head pointer.
   683	//
   684	// In order to make sure that there is always an m structure
   685	// available to be stolen, we maintain the invariant that there
   686	// is always one more than needed. At the beginning of the
   687	// program (if cgo is in use) the list is seeded with a single m.
   688	// If needm finds that it has taken the last m off the list, its job
   689	// is - once it has installed its own m so that it can do things like
   690	// allocate memory - to create a spare m and put it on the list.
   691	//
   692	// Each of these extra m's also has a g0 and a curg that are
   693	// pressed into service as the scheduling stack and current
   694	// goroutine for the duration of the cgo callback.
   695	//
   696	// When the callback is done with the m, it calls dropm to
   697	// put the m back on the list.
   698	#pragma textflag NOSPLIT
   699	void
   700	runtime·needm(byte x)
   701	{
   702		M *mp;
   703	
   704		if(runtime·needextram) {
   705			// Can happen if C/C++ code calls Go from a global ctor.
   706			// Can not throw, because scheduler is not initialized yet.
   707			runtime·write(2, "fatal error: cgo callback before cgo call\n",
   708				sizeof("fatal error: cgo callback before cgo call\n")-1);
   709			runtime·exit(1);
   710		}
   711	
   712		// Lock extra list, take head, unlock popped list.
   713		// nilokay=false is safe here because of the invariant above,
   714		// that the extra list always contains or will soon contain
   715		// at least one m.
   716		mp = lockextra(false);
   717	
   718		// Set needextram when we've just emptied the list,
   719		// so that the eventual call into cgocallbackg will
   720		// allocate a new m for the extra list. We delay the
   721		// allocation until then so that it can be done
   722		// after exitsyscall makes sure it is okay to be
   723		// running at all (that is, there's no garbage collection
   724		// running right now).
   725		mp->needextram = mp->schedlink == nil;
   726		unlockextra(mp->schedlink);
   727	
   728		// Install m and g (= m->g0) and set the stack bounds
   729		// to match the current stack. We don't actually know
   730		// how big the stack is, like we don't know how big any
   731		// scheduling stack is, but we assume there's at least 32 kB,
   732		// which is more than enough for us.
   733		runtime·setmg(mp, mp->g0);
   734		g->stackbase = (uintptr)(&x + 1024);
   735		g->stackguard = (uintptr)(&x - 32*1024);
   736		g->stackguard0 = g->stackguard;
   737	
   738	#ifdef GOOS_windows
   739	#ifdef GOARCH_386
   740		// On windows/386, we need to put an SEH frame (two words)
   741		// somewhere on the current stack. We are called from cgocallback_gofunc
   742		// and we know that it will leave two unused words below m->curg->sched.sp.
   743		// Use those.
   744		m->seh = (SEH*)((uintptr*)&x + 1);
   745	#endif
   746	#endif
   747	
   748		// Initialize this thread to use the m.
   749		runtime·asminit();
   750		runtime·minit();
   751	}
   752	
   753	// newextram allocates an m and puts it on the extra list.
   754	// It is called with a working local m, so that it can do things
   755	// like call schedlock and allocate.
   756	void
   757	runtime·newextram(void)
   758	{
   759		M *mp, *mnext;
   760		G *gp;
   761	
   762		// Create extra goroutine locked to extra m.
   763		// The goroutine is the context in which the cgo callback will run.
   764		// The sched.pc will never be returned to, but setting it to
   765		// runtime.goexit makes clear to the traceback routines where
   766		// the goroutine stack ends.
   767		mp = runtime·allocm(nil);
   768		gp = runtime·malg(4096);
   769		gp->sched.pc = (uintptr)runtime·goexit;
   770		gp->sched.sp = gp->stackbase;
   771		gp->sched.lr = 0;
   772		gp->sched.g = gp;
   773		gp->syscallpc = gp->sched.pc;
   774		gp->syscallsp = gp->sched.sp;
   775		gp->syscallstack = gp->stackbase;
   776		gp->syscallguard = gp->stackguard;
   777		gp->status = Gsyscall;
   778		mp->curg = gp;
   779		mp->locked = LockInternal;
   780		mp->lockedg = gp;
   781		gp->lockedm = mp;
   782		gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
   783		if(raceenabled)
   784			gp->racectx = runtime·racegostart(runtime·newextram);
   785		// put on allg for garbage collector
   786		runtime·lock(&runtime·sched);
   787		if(runtime·lastg == nil)
   788			runtime·allg = gp;
   789		else
   790			runtime·lastg->alllink = gp;
   791		runtime·lastg = gp;
   792		runtime·unlock(&runtime·sched);
   793	
   794		// Add m to the extra list.
   795		mnext = lockextra(true);
   796		mp->schedlink = mnext;
   797		unlockextra(mp);
   798	}
   799	
   800	// dropm is called when a cgo callback has called needm but is now
   801	// done with the callback and returning back into the non-Go thread.
   802	// It puts the current m back onto the extra list.
   803	//
   804	// The main expense here is the call to signalstack to release the
   805	// m's signal stack, and then the call to needm on the next callback
   806	// from this thread. It is tempting to try to save the m for next time,
   807	// which would eliminate both these costs, but there might not be
   808	// a next time: the current thread (which Go does not control) might exit.
   809	// If we saved the m for that thread, there would be an m leak each time
   810	// such a thread exited. Instead, we acquire and release an m on each
   811	// call. These should typically not be scheduling operations, just a few
   812	// atomics, so the cost should be small.
   813	//
   814	// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
   815	// variable using pthread_key_create. Unlike the pthread keys we already use
   816	// on OS X, this dummy key would never be read by Go code. It would exist
   817	// only so that we could register at thread-exit-time destructor.
   818	// That destructor would put the m back onto the extra list.
   819	// This is purely a performance optimization. The current version,
   820	// in which dropm happens on each cgo call, is still correct too.
   821	// We may have to keep the current version on systems with cgo
   822	// but without pthreads, like Windows.
   823	void
   824	runtime·dropm(void)
   825	{
   826		M *mp, *mnext;
   827	
   828		// Undo whatever initialization minit did during needm.
   829		runtime·unminit();
   830	
   831	#ifdef GOOS_windows
   832	#ifdef GOARCH_386
   833		m->seh = nil;  // reset dangling typed pointer
   834	#endif
   835	#endif
   836	
   837		// Clear m and g, and return m to the extra list.
   838		// After the call to setmg we can only call nosplit functions.
   839		mp = m;
   840		runtime·setmg(nil, nil);
   841	
   842		mnext = lockextra(true);
   843		mp->schedlink = mnext;
   844		unlockextra(mp);
   845	}
   846	
   847	#define MLOCKED ((M*)1)
   848	
   849	// lockextra locks the extra list and returns the list head.
   850	// The caller must unlock the list by storing a new list head
   851	// to runtime.extram. If nilokay is true, then lockextra will
   852	// return a nil list head if that's what it finds. If nilokay is false,
   853	// lockextra will keep waiting until the list head is no longer nil.
   854	#pragma textflag NOSPLIT
   855	static M*
   856	lockextra(bool nilokay)
   857	{
   858		M *mp;
   859		void (*yield)(void);
   860	
   861		for(;;) {
   862			mp = runtime·atomicloadp(&runtime·extram);
   863			if(mp == MLOCKED) {
   864				yield = runtime·osyield;
   865				yield();
   866				continue;
   867			}
   868			if(mp == nil && !nilokay) {
   869				runtime·usleep(1);
   870				continue;
   871			}
   872			if(!runtime·casp(&runtime·extram, mp, MLOCKED)) {
   873				yield = runtime·osyield;
   874				yield();
   875				continue;
   876			}
   877			break;
   878		}
   879		return mp;
   880	}
   881	
   882	#pragma textflag NOSPLIT
   883	static void
   884	unlockextra(M *mp)
   885	{
   886		runtime·atomicstorep(&runtime·extram, mp);
   887	}
   888	
   889	
   890	// Create a new m.  It will start off with a call to fn, or else the scheduler.
   891	static void
   892	newm(void(*fn)(void), P *p)
   893	{
   894		M *mp;
   895	
   896		mp = runtime·allocm(p);
   897		mp->nextp = p;
   898		mp->mstartfn = fn;
   899	
   900		if(runtime·iscgo) {
   901			CgoThreadStart ts;
   902	
   903			if(_cgo_thread_start == nil)
   904				runtime·throw("_cgo_thread_start missing");
   905			ts.m = mp;
   906			ts.g = mp->g0;
   907			ts.fn = runtime·mstart;
   908			runtime·asmcgocall(_cgo_thread_start, &ts);
   909			return;
   910		}
   911		runtime·newosproc(mp, (byte*)mp->g0->stackbase);
   912	}
   913	
   914	// Stops execution of the current m until new work is available.
   915	// Returns with acquired P.
   916	static void
   917	stopm(void)
   918	{
   919		if(m->locks)
   920			runtime·throw("stopm holding locks");
   921		if(m->p)
   922			runtime·throw("stopm holding p");
   923		if(m->spinning) {
   924			m->spinning = false;
   925			runtime·xadd(&runtime·sched.nmspinning, -1);
   926		}
   927	
   928	retry:
   929		runtime·lock(&runtime·sched);
   930		mput(m);
   931		runtime·unlock(&runtime·sched);
   932		runtime·notesleep(&m->park);
   933		runtime·noteclear(&m->park);
   934		if(m->helpgc) {
   935			runtime·gchelper();
   936			m->helpgc = 0;
   937			m->mcache = nil;
   938			goto retry;
   939		}
   940		acquirep(m->nextp);
   941		m->nextp = nil;
   942	}
   943	
   944	static void
   945	mspinning(void)
   946	{
   947		m->spinning = true;
   948	}
   949	
   950	// Schedules some M to run the p (creates an M if necessary).
   951	// If p==nil, tries to get an idle P, if no idle P's returns false.
   952	static void
   953	startm(P *p, bool spinning)
   954	{
   955		M *mp;
   956		void (*fn)(void);
   957	
   958		runtime·lock(&runtime·sched);
   959		if(p == nil) {
   960			p = pidleget();
   961			if(p == nil) {
   962				runtime·unlock(&runtime·sched);
   963				if(spinning)
   964					runtime·xadd(&runtime·sched.nmspinning, -1);
   965				return;
   966			}
   967		}
   968		mp = mget();
   969		runtime·unlock(&runtime·sched);
   970		if(mp == nil) {
   971			fn = nil;
   972			if(spinning)
   973				fn = mspinning;
   974			newm(fn, p);
   975			return;
   976		}
   977		if(mp->spinning)
   978			runtime·throw("startm: m is spinning");
   979		if(mp->nextp)
   980			runtime·throw("startm: m has p");
   981		mp->spinning = spinning;
   982		mp->nextp = p;
   983		runtime·notewakeup(&mp->park);
   984	}
   985	
   986	// Hands off P from syscall or locked M.
   987	static void
   988	handoffp(P *p)
   989	{
   990		// if it has local work, start it straight away
   991		if(p->runqhead != p->runqtail || runtime·sched.runqsize) {
   992			startm(p, false);
   993			return;
   994		}
   995		// no local work, check that there are no spinning/idle M's,
   996		// otherwise our help is not required
   997		if(runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) == 0 &&  // TODO: fast atomic
   998			runtime·cas(&runtime·sched.nmspinning, 0, 1)) {
   999			startm(p, true);
  1000			return;
  1001		}
  1002		runtime·lock(&runtime·sched);
  1003		if(runtime·sched.gcwaiting) {
  1004			p->status = Pgcstop;
  1005			if(--runtime·sched.stopwait == 0)
  1006				runtime·notewakeup(&runtime·sched.stopnote);
  1007			runtime·unlock(&runtime·sched);
  1008			return;
  1009		}
  1010		if(runtime·sched.runqsize) {
  1011			runtime·unlock(&runtime·sched);
  1012			startm(p, false);
  1013			return;
  1014		}
  1015		// If this is the last running P and nobody is polling network,
  1016		// need to wakeup another M to poll network.
  1017		if(runtime·sched.npidle == runtime·gomaxprocs-1 && runtime·atomicload64(&runtime·sched.lastpoll) != 0) {
  1018			runtime·unlock(&runtime·sched);
  1019			startm(p, false);
  1020			return;
  1021		}
  1022		pidleput(p);
  1023		runtime·unlock(&runtime·sched);
  1024	}
  1025	
  1026	// Tries to add one more P to execute G's.
  1027	// Called when a G is made runnable (newproc, ready).
  1028	static void
  1029	wakep(void)
  1030	{
  1031		// be conservative about spinning threads
  1032		if(!runtime·cas(&runtime·sched.nmspinning, 0, 1))
  1033			return;
  1034		startm(nil, true);
  1035	}
  1036	
  1037	// Stops execution of the current m that is locked to a g until the g is runnable again.
  1038	// Returns with acquired P.
  1039	static void
  1040	stoplockedm(void)
  1041	{
  1042		P *p;
  1043	
  1044		if(m->lockedg == nil || m->lockedg->lockedm != m)
  1045			runtime·throw("stoplockedm: inconsistent locking");
  1046		if(m->p) {
  1047			// Schedule another M to run this p.
  1048			p = releasep();
  1049			handoffp(p);
  1050		}
  1051		incidlelocked(1);
  1052		// Wait until another thread schedules lockedg again.
  1053		runtime·notesleep(&m->park);
  1054		runtime·noteclear(&m->park);
  1055		if(m->lockedg->status != Grunnable)
  1056			runtime·throw("stoplockedm: not runnable");
  1057		acquirep(m->nextp);
  1058		m->nextp = nil;
  1059	}
  1060	
  1061	// Schedules the locked m to run the locked gp.
  1062	static void
  1063	startlockedm(G *gp)
  1064	{
  1065		M *mp;
  1066		P *p;
  1067	
  1068		mp = gp->lockedm;
  1069		if(mp == m)
  1070			runtime·throw("startlockedm: locked to me");
  1071		if(mp->nextp)
  1072			runtime·throw("startlockedm: m has p");
  1073		// directly handoff current P to the locked m
  1074		incidlelocked(-1);
  1075		p = releasep();
  1076		mp->nextp = p;
  1077		runtime·notewakeup(&mp->park);
  1078		stopm();
  1079	}
  1080	
  1081	// Stops the current m for stoptheworld.
  1082	// Returns when the world is restarted.
  1083	static void
  1084	gcstopm(void)
  1085	{
  1086		P *p;
  1087	
  1088		if(!runtime·sched.gcwaiting)
  1089			runtime·throw("gcstopm: not waiting for gc");
  1090		if(m->spinning) {
  1091			m->spinning = false;
  1092			runtime·xadd(&runtime·sched.nmspinning, -1);
  1093		}
  1094		p = releasep();
  1095		runtime·lock(&runtime·sched);
  1096		p->status = Pgcstop;
  1097		if(--runtime·sched.stopwait == 0)
  1098			runtime·notewakeup(&runtime·sched.stopnote);
  1099		runtime·unlock(&runtime·sched);
  1100		stopm();
  1101	}
  1102	
  1103	// Schedules gp to run on the current M.
  1104	// Never returns.
  1105	static void
  1106	execute(G *gp)
  1107	{
  1108		int32 hz;
  1109	
  1110		if(gp->status != Grunnable) {
  1111			runtime·printf("execute: bad g status %d\n", gp->status);
  1112			runtime·throw("execute: bad g status");
  1113		}
  1114		gp->status = Grunning;
  1115		gp->preempt = false;
  1116		gp->stackguard0 = gp->stackguard;
  1117		m->p->schedtick++;
  1118		m->curg = gp;
  1119		gp->m = m;
  1120	
  1121		// Check whether the profiler needs to be turned on or off.
  1122		hz = runtime·sched.profilehz;
  1123		if(m->profilehz != hz)
  1124			runtime·resetcpuprofiler(hz);
  1125	
  1126		runtime·gogo(&gp->sched);
  1127	}
  1128	
  1129	// Finds a runnable goroutine to execute.
  1130	// Tries to steal from other P's, get g from global queue, poll network.
  1131	static G*
  1132	findrunnable(void)
  1133	{
  1134		G *gp;
  1135		P *p;
  1136		int32 i;
  1137	
  1138	top:
  1139		if(runtime·sched.gcwaiting) {
  1140			gcstopm();
  1141			goto top;
  1142		}
  1143		// local runq
  1144		gp = runqget(m->p);
  1145		if(gp)
  1146			return gp;
  1147		// global runq
  1148		if(runtime·sched.runqsize) {
  1149			runtime·lock(&runtime·sched);
  1150			gp = globrunqget(m->p, 0);
  1151			runtime·unlock(&runtime·sched);
  1152			if(gp)
  1153				return gp;
  1154		}
  1155		// poll network
  1156		gp = runtime·netpoll(false);  // non-blocking
  1157		if(gp) {
  1158			injectglist(gp->schedlink);
  1159			gp->status = Grunnable;
  1160			return gp;
  1161		}
  1162		// If number of spinning M's >= number of busy P's, block.
  1163		// This is necessary to prevent excessive CPU consumption
  1164		// when GOMAXPROCS>>1 but the program parallelism is low.
  1165		if(!m->spinning && 2 * runtime·atomicload(&runtime·sched.nmspinning) >= runtime·gomaxprocs - runtime·atomicload(&runtime·sched.npidle))  // TODO: fast atomic
  1166			goto stop;
  1167		if(!m->spinning) {
  1168			m->spinning = true;
  1169			runtime·xadd(&runtime·sched.nmspinning, 1);
  1170		}
  1171		// random steal from other P's
  1172		for(i = 0; i < 2*runtime·gomaxprocs; i++) {
  1173			if(runtime·sched.gcwaiting)
  1174				goto top;
  1175			p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs];
  1176			if(p == m->p)
  1177				gp = runqget(p);
  1178			else
  1179				gp = runqsteal(m->p, p);
  1180			if(gp)
  1181				return gp;
  1182		}
  1183	stop:
  1184		// return P and block
  1185		runtime·lock(&runtime·sched);
  1186		if(runtime·sched.gcwaiting) {
  1187			runtime·unlock(&runtime·sched);
  1188			goto top;
  1189		}
  1190		if(runtime·sched.runqsize) {
  1191			gp = globrunqget(m->p, 0);
  1192			runtime·unlock(&runtime·sched);
  1193			return gp;
  1194		}
  1195		p = releasep();
  1196		pidleput(p);
  1197		runtime·unlock(&runtime·sched);
  1198		if(m->spinning) {
  1199			m->spinning = false;
  1200			runtime·xadd(&runtime·sched.nmspinning, -1);
  1201		}
  1202		// check all runqueues once again
  1203		for(i = 0; i < runtime·gomaxprocs; i++) {
  1204			p = runtime·allp[i];
  1205			if(p && p->runqhead != p->runqtail) {
  1206				runtime·lock(&runtime·sched);
  1207				p = pidleget();
  1208				runtime·unlock(&runtime·sched);
  1209				if(p) {
  1210					acquirep(p);
  1211					goto top;
  1212				}
  1213				break;
  1214			}
  1215		}
  1216		// poll network
  1217		if(runtime·xchg64(&runtime·sched.lastpoll, 0) != 0) {
  1218			if(m->p)
  1219				runtime·throw("findrunnable: netpoll with p");
  1220			if(m->spinning)
  1221				runtime·throw("findrunnable: netpoll with spinning");
  1222			gp = runtime·netpoll(true);  // block until new work is available
  1223			runtime·atomicstore64(&runtime·sched.lastpoll, runtime·nanotime());
  1224			if(gp) {
  1225				runtime·lock(&runtime·sched);
  1226				p = pidleget();
  1227				runtime·unlock(&runtime·sched);
  1228				if(p) {
  1229					acquirep(p);
  1230					injectglist(gp->schedlink);
  1231					gp->status = Grunnable;
  1232					return gp;
  1233				}
  1234				injectglist(gp);
  1235			}
  1236		}
  1237		stopm();
  1238		goto top;
  1239	}
  1240	
  1241	static void
  1242	resetspinning(void)
  1243	{
  1244		int32 nmspinning;
  1245	
  1246		if(m->spinning) {
  1247			m->spinning = false;
  1248			nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1);
  1249			if(nmspinning < 0)
  1250				runtime·throw("findrunnable: negative nmspinning");
  1251		} else
  1252			nmspinning = runtime·atomicload(&runtime·sched.nmspinning);
  1253	
  1254		// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
  1255		// so see if we need to wakeup another P here.
  1256		if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0)
  1257			wakep();
  1258	}
  1259	
  1260	// Injects the list of runnable G's into the scheduler.
  1261	// Can run concurrently with GC.
  1262	static void
  1263	injectglist(G *glist)
  1264	{
  1265		int32 n;
  1266		G *gp;
  1267	
  1268		if(glist == nil)
  1269			return;
  1270		runtime·lock(&runtime·sched);
  1271		for(n = 0; glist; n++) {
  1272			gp = glist;
  1273			glist = gp->schedlink;
  1274			gp->status = Grunnable;
  1275			globrunqput(gp);
  1276		}
  1277		runtime·unlock(&runtime·sched);
  1278	
  1279		for(; n && runtime·sched.npidle; n--)
  1280			startm(nil, false);
  1281	}
  1282	
  1283	// One round of scheduler: find a runnable goroutine and execute it.
  1284	// Never returns.
  1285	static void
  1286	schedule(void)
  1287	{
  1288		G *gp;
  1289		uint32 tick;
  1290	
  1291		if(m->locks)
  1292			runtime·throw("schedule: holding locks");
  1293	
  1294	top:
  1295		if(runtime·sched.gcwaiting) {
  1296			gcstopm();
  1297			goto top;
  1298		}
  1299	
  1300		gp = nil;
  1301		// Check the global runnable queue once in a while to ensure fairness.
  1302		// Otherwise two goroutines can completely occupy the local runqueue
  1303		// by constantly respawning each other.
  1304		tick = m->p->schedtick;
  1305		// This is a fancy way to say tick%61==0,
  1306		// it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
  1307		if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
  1308			runtime·lock(&runtime·sched);
  1309			gp = globrunqget(m->p, 1);
  1310			runtime·unlock(&runtime·sched);
  1311			if(gp)
  1312				resetspinning();
  1313		}
  1314		if(gp == nil) {
  1315			gp = runqget(m->p);
  1316			if(gp && m->spinning)
  1317				runtime·throw("schedule: spinning with local work");
  1318		}
  1319		if(gp == nil) {
  1320			gp = findrunnable();  // blocks until work is available
  1321			resetspinning();
  1322		}
  1323	
  1324		if(gp->lockedm) {
  1325			// Hands off own p to the locked m,
  1326			// then blocks waiting for a new p.
  1327			startlockedm(gp);
  1328			goto top;
  1329		}
  1330	
  1331		execute(gp);
  1332	}
  1333	
  1334	// Puts the current goroutine into a waiting state and unlocks the lock.
  1335	// The goroutine can be made runnable again by calling runtime·ready(gp).
  1336	void
  1337	runtime·park(void(*unlockf)(Lock*), Lock *lock, int8 *reason)
  1338	{
  1339		m->waitlock = lock;
  1340		m->waitunlockf = unlockf;
  1341		g->waitreason = reason;
  1342		runtime·mcall(park0);
  1343	}
  1344	
  1345	// runtime·park continuation on g0.
  1346	static void
  1347	park0(G *gp)
  1348	{
  1349		gp->status = Gwaiting;
  1350		gp->m = nil;
  1351		m->curg = nil;
  1352		if(m->waitunlockf) {
  1353			m->waitunlockf(m->waitlock);
  1354			m->waitunlockf = nil;
  1355			m->waitlock = nil;
  1356		}
  1357		if(m->lockedg) {
  1358			stoplockedm();
  1359			execute(gp);  // Never returns.
  1360		}
  1361		schedule();
  1362	}
  1363	
  1364	// Scheduler yield.
  1365	void
  1366	runtime·gosched(void)
  1367	{
  1368		runtime·mcall(runtime·gosched0);
  1369	}
  1370	
  1371	// runtime·gosched continuation on g0.
  1372	void
  1373	runtime·gosched0(G *gp)
  1374	{
  1375		gp->status = Grunnable;
  1376		gp->m = nil;
  1377		m->curg = nil;
  1378		runtime·lock(&runtime·sched);
  1379		globrunqput(gp);
  1380		runtime·unlock(&runtime·sched);
  1381		if(m->lockedg) {
  1382			stoplockedm();
  1383			execute(gp);  // Never returns.
  1384		}
  1385		schedule();
  1386	}
  1387	
  1388	// Finishes execution of the current goroutine.
  1389	// Need to mark it as nosplit, because it runs with sp > stackbase (as runtime·lessstack).
  1390	// Since it does not return it does not matter.  But if it is preempted
  1391	// at the split stack check, GC will complain about inconsistent sp.
  1392	#pragma textflag NOSPLIT
  1393	void
  1394	runtime·goexit(void)
  1395	{
  1396		if(raceenabled)
  1397			runtime·racegoend();
  1398		runtime·mcall(goexit0);
  1399	}
  1400	
  1401	// runtime·goexit continuation on g0.
  1402	static void
  1403	goexit0(G *gp)
  1404	{
  1405		gp->status = Gdead;
  1406		gp->m = nil;
  1407		gp->lockedm = nil;
  1408		m->curg = nil;
  1409		m->lockedg = nil;
  1410		if(m->locked & ~LockExternal) {
  1411			runtime·printf("invalid m->locked = %d\n", m->locked);
  1412			runtime·throw("internal lockOSThread error");
  1413		}	
  1414		m->locked = 0;
  1415		runtime·unwindstack(gp, nil);
  1416		gfput(m->p, gp);
  1417		schedule();
  1418	}
  1419	
  1420	#pragma textflag NOSPLIT
  1421	static void
  1422	save(void *pc, uintptr sp)
  1423	{
  1424		g->sched.pc = (uintptr)pc;
  1425		g->sched.sp = sp;
  1426		g->sched.lr = 0;
  1427		g->sched.ret = 0;
  1428		g->sched.ctxt = 0;
  1429		g->sched.g = g;
  1430	}
  1431	
  1432	// The goroutine g is about to enter a system call.
  1433	// Record that it's not using the cpu anymore.
  1434	// This is called only from the go syscall library and cgocall,
  1435	// not from the low-level system calls used by the runtime.
  1436	//
  1437	// Entersyscall cannot split the stack: the runtime·gosave must
  1438	// make g->sched refer to the caller's stack segment, because
  1439	// entersyscall is going to return immediately after.
  1440	#pragma textflag NOSPLIT
  1441	void
  1442	·entersyscall(int32 dummy)
  1443	{
  1444		// Disable preemption because during this function g is in Gsyscall status,
  1445		// but can have inconsistent g->sched, do not let GC observe it.
  1446		m->locks++;
  1447	
  1448		// Leave SP around for GC and traceback.
  1449		save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1450		g->syscallsp = g->sched.sp;
  1451		g->syscallpc = g->sched.pc;
  1452		g->syscallstack = g->stackbase;
  1453		g->syscallguard = g->stackguard;
  1454		g->status = Gsyscall;
  1455		if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
  1456			// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
  1457			//	g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
  1458			runtime·throw("entersyscall");
  1459		}
  1460	
  1461		if(runtime·atomicload(&runtime·sched.sysmonwait)) {  // TODO: fast atomic
  1462			runtime·lock(&runtime·sched);
  1463			if(runtime·atomicload(&runtime·sched.sysmonwait)) {
  1464				runtime·atomicstore(&runtime·sched.sysmonwait, 0);
  1465				runtime·notewakeup(&runtime·sched.sysmonnote);
  1466			}
  1467			runtime·unlock(&runtime·sched);
  1468			save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1469		}
  1470	
  1471		m->mcache = nil;
  1472		m->p->m = nil;
  1473		runtime·atomicstore(&m->p->status, Psyscall);
  1474		if(runtime·sched.gcwaiting) {
  1475			runtime·lock(&runtime·sched);
  1476			if (runtime·sched.stopwait > 0 && runtime·cas(&m->p->status, Psyscall, Pgcstop)) {
  1477				if(--runtime·sched.stopwait == 0)
  1478					runtime·notewakeup(&runtime·sched.stopnote);
  1479			}
  1480			runtime·unlock(&runtime·sched);
  1481			save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1482		}
  1483	
  1484		// Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
  1485		// We set stackguard to StackPreempt so that first split stack check calls morestack.
  1486		// Morestack detects this case and throws.
  1487		g->stackguard0 = StackPreempt;
  1488		m->locks--;
  1489	}
  1490	
  1491	// The same as runtime·entersyscall(), but with a hint that the syscall is blocking.
  1492	#pragma textflag NOSPLIT
  1493	void
  1494	·entersyscallblock(int32 dummy)
  1495	{
  1496		P *p;
  1497	
  1498		m->locks++;  // see comment in entersyscall
  1499	
  1500		// Leave SP around for GC and traceback.
  1501		save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1502		g->syscallsp = g->sched.sp;
  1503		g->syscallpc = g->sched.pc;
  1504		g->syscallstack = g->stackbase;
  1505		g->syscallguard = g->stackguard;
  1506		g->status = Gsyscall;
  1507		if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
  1508			// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
  1509			//	g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
  1510			runtime·throw("entersyscallblock");
  1511		}
  1512	
  1513		p = releasep();
  1514		handoffp(p);
  1515		if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
  1516			incidlelocked(1);
  1517	
  1518		// Resave for traceback during blocked call.
  1519		save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1520	
  1521		g->stackguard0 = StackPreempt;  // see comment in entersyscall
  1522		m->locks--;
  1523	}
  1524	
  1525	// The goroutine g exited its system call.
  1526	// Arrange for it to run on a cpu again.
  1527	// This is called only from the go syscall library, not
  1528	// from the low-level system calls used by the runtime.
  1529	#pragma textflag NOSPLIT
  1530	void
  1531	runtime·exitsyscall(void)
  1532	{
  1533		m->locks++;  // see comment in entersyscall
  1534	
  1535		if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
  1536			incidlelocked(-1);
  1537	
  1538		if(exitsyscallfast()) {
  1539			// There's a cpu for us, so we can run.
  1540			m->p->syscalltick++;
  1541			g->status = Grunning;
  1542			// Garbage collector isn't running (since we are),
  1543			// so okay to clear gcstack and gcsp.
  1544			g->syscallstack = (uintptr)nil;
  1545			g->syscallsp = (uintptr)nil;
  1546			m->locks--;
  1547			if(g->preempt) {
  1548				// restore the preemption request in case we've cleared it in newstack
  1549				g->stackguard0 = StackPreempt;
  1550			} else {
  1551				// otherwise restore the real stackguard, we've spoiled it in entersyscall/entersyscallblock
  1552				g->stackguard0 = g->stackguard;
  1553			}
  1554			return;
  1555		}
  1556	
  1557		m->locks--;
  1558	
  1559		// Call the scheduler.
  1560		runtime·mcall(exitsyscall0);
  1561	
  1562		// Scheduler returned, so we're allowed to run now.
  1563		// Delete the gcstack information that we left for
  1564		// the garbage collector during the system call.
  1565		// Must wait until now because until gosched returns
  1566		// we don't know for sure that the garbage collector
  1567		// is not running.
  1568		g->syscallstack = (uintptr)nil;
  1569		g->syscallsp = (uintptr)nil;
  1570		m->p->syscalltick++;
  1571	}
  1572	
  1573	#pragma textflag NOSPLIT
  1574	static bool
  1575	exitsyscallfast(void)
  1576	{
  1577		P *p;
  1578	
  1579		// Freezetheworld sets stopwait but does not retake P's.
  1580		if(runtime·sched.stopwait) {
  1581			m->p = nil;
  1582			return false;
  1583		}
  1584	
  1585		// Try to re-acquire the last P.
  1586		if(m->p && m->p->status == Psyscall && runtime·cas(&m->p->status, Psyscall, Prunning)) {
  1587			// There's a cpu for us, so we can run.
  1588			m->mcache = m->p->mcache;
  1589			m->p->m = m;
  1590			return true;
  1591		}
  1592		// Try to get any other idle P.
  1593		m->p = nil;
  1594		if(runtime·sched.pidle) {
  1595			runtime·lock(&runtime·sched);
  1596			p = pidleget();
  1597			if(p && runtime·atomicload(&runtime·sched.sysmonwait)) {
  1598				runtime·atomicstore(&runtime·sched.sysmonwait, 0);
  1599				runtime·notewakeup(&runtime·sched.sysmonnote);
  1600			}
  1601			runtime·unlock(&runtime·sched);
  1602			if(p) {
  1603				acquirep(p);
  1604				return true;
  1605			}
  1606		}
  1607		return false;
  1608	}
  1609	
  1610	// runtime·exitsyscall slow path on g0.
  1611	// Failed to acquire P, enqueue gp as runnable.
  1612	static void
  1613	exitsyscall0(G *gp)
  1614	{
  1615		P *p;
  1616	
  1617		gp->status = Grunnable;
  1618		gp->m = nil;
  1619		m->curg = nil;
  1620		runtime·lock(&runtime·sched);
  1621		p = pidleget();
  1622		if(p == nil)
  1623			globrunqput(gp);
  1624		else if(runtime·atomicload(&runtime·sched.sysmonwait)) {
  1625			runtime·atomicstore(&runtime·sched.sysmonwait, 0);
  1626			runtime·notewakeup(&runtime·sched.sysmonnote);
  1627		}
  1628		runtime·unlock(&runtime·sched);
  1629		if(p) {
  1630			acquirep(p);
  1631			execute(gp);  // Never returns.
  1632		}
  1633		if(m->lockedg) {
  1634			// Wait until another thread schedules gp and so m again.
  1635			stoplockedm();
  1636			execute(gp);  // Never returns.
  1637		}
  1638		stopm();
  1639		schedule();  // Never returns.
  1640	}
  1641	
  1642	// Called from syscall package before fork.
  1643	void
  1644	syscall·runtime_BeforeFork(void)
  1645	{
  1646		// Fork can hang if preempted with signals frequently enough (see issue 5517).
  1647		// Ensure that we stay on the same M where we disable profiling.
  1648		m->locks++;
  1649		if(m->profilehz != 0)
  1650			runtime·resetcpuprofiler(0);
  1651	}
  1652	
  1653	// Called from syscall package after fork in parent.
  1654	void
  1655	syscall·runtime_AfterFork(void)
  1656	{
  1657		int32 hz;
  1658	
  1659		hz = runtime·sched.profilehz;
  1660		if(hz != 0)
  1661			runtime·resetcpuprofiler(hz);
  1662		m->locks--;
  1663	}
  1664	
  1665	// Hook used by runtime·malg to call runtime·stackalloc on the
  1666	// scheduler stack.  This exists because runtime·stackalloc insists
  1667	// on being called on the scheduler stack, to avoid trying to grow
  1668	// the stack while allocating a new stack segment.
  1669	static void
  1670	mstackalloc(G *gp)
  1671	{
  1672		gp->param = runtime·stackalloc((uintptr)gp->param);
  1673		runtime·gogo(&gp->sched);
  1674	}
  1675	
  1676	// Allocate a new g, with a stack big enough for stacksize bytes.
  1677	G*
  1678	runtime·malg(int32 stacksize)
  1679	{
  1680		G *newg;
  1681		byte *stk;
  1682	
  1683		if(StackTop < sizeof(Stktop)) {
  1684			runtime·printf("runtime: SizeofStktop=%d, should be >=%d\n", (int32)StackTop, (int32)sizeof(Stktop));
  1685			runtime·throw("runtime: bad stack.h");
  1686		}
  1687	
  1688		newg = runtime·malloc(sizeof(G));
  1689		if(stacksize >= 0) {
  1690			if(g == m->g0) {
  1691				// running on scheduler stack already.
  1692				stk = runtime·stackalloc(StackSystem + stacksize);
  1693			} else {
  1694				// have to call stackalloc on scheduler stack.
  1695				g->param = (void*)(StackSystem + stacksize);
  1696				runtime·mcall(mstackalloc);
  1697				stk = g->param;
  1698				g->param = nil;
  1699			}
  1700			newg->stacksize = StackSystem + stacksize;
  1701			newg->stack0 = (uintptr)stk;
  1702			newg->stackguard = (uintptr)stk + StackGuard;
  1703			newg->stackguard0 = newg->stackguard;
  1704			newg->stackbase = (uintptr)stk + StackSystem + stacksize - sizeof(Stktop);
  1705			runtime·memclr((byte*)newg->stackbase, sizeof(Stktop));
  1706		}
  1707		return newg;
  1708	}
  1709	
  1710	// Create a new g running fn with siz bytes of arguments.
  1711	// Put it on the queue of g's waiting to run.
  1712	// The compiler turns a go statement into a call to this.
  1713	// Cannot split the stack because it assumes that the arguments
  1714	// are available sequentially after &fn; they would not be
  1715	// copied if a stack split occurred.  It's OK for this to call
  1716	// functions that split the stack.
  1717	#pragma textflag NOSPLIT
  1718	void
  1719	runtime·newproc(int32 siz, FuncVal* fn, ...)
  1720	{
  1721		byte *argp;
  1722	
  1723		if(thechar == '5')
  1724			argp = (byte*)(&fn+2);  // skip caller's saved LR
  1725		else
  1726			argp = (byte*)(&fn+1);
  1727		runtime·newproc1(fn, argp, siz, 0, runtime·getcallerpc(&siz));
  1728	}
  1729	
  1730	// Create a new g running fn with narg bytes of arguments starting
  1731	// at argp and returning nret bytes of results.  callerpc is the
  1732	// address of the go statement that created this.  The new g is put
  1733	// on the queue of g's waiting to run.
  1734	G*
  1735	runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
  1736	{
  1737		byte *sp;
  1738		G *newg;
  1739		int32 siz;
  1740	
  1741	//runtime·printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
  1742		m->locks++;  // disable preemption because it can be holding p in a local var
  1743		siz = narg + nret;
  1744		siz = (siz+7) & ~7;
  1745	
  1746		// We could instead create a secondary stack frame
  1747		// and make it look like goexit was on the original but
  1748		// the call to the actual goroutine function was split.
  1749		// Not worth it: this is almost always an error.
  1750		if(siz > StackMin - 1024)
  1751			runtime·throw("runtime.newproc: function arguments too large for new goroutine");
  1752	
  1753		if((newg = gfget(m->p)) != nil) {
  1754			if(newg->stackguard - StackGuard != newg->stack0)
  1755				runtime·throw("invalid stack in newg");
  1756		} else {
  1757			newg = runtime·malg(StackMin);
  1758			runtime·lock(&runtime·sched);
  1759			if(runtime·lastg == nil)
  1760				runtime·allg = newg;
  1761			else
  1762				runtime·lastg->alllink = newg;
  1763			runtime·lastg = newg;
  1764			runtime·unlock(&runtime·sched);
  1765		}
  1766	
  1767		sp = (byte*)newg->stackbase;
  1768		sp -= siz;
  1769		runtime·memmove(sp, argp, narg);
  1770		if(thechar == '5') {
  1771			// caller's LR
  1772			sp -= sizeof(void*);
  1773			*(void**)sp = nil;
  1774		}
  1775	
  1776		runtime·memclr((byte*)&newg->sched, sizeof newg->sched);
  1777		newg->sched.sp = (uintptr)sp;
  1778		newg->sched.pc = (uintptr)runtime·goexit;
  1779		newg->sched.g = newg;
  1780		runtime·gostartcallfn(&newg->sched, fn);
  1781		newg->gopc = (uintptr)callerpc;
  1782		newg->status = Grunnable;
  1783		newg->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
  1784		newg->panicwrap = 0;
  1785		if(raceenabled)
  1786			newg->racectx = runtime·racegostart((void*)callerpc);
  1787		runqput(m->p, newg);
  1788	
  1789		if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main)  // TODO: fast atomic
  1790			wakep();
  1791		m->locks--;
  1792		if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
  1793			g->stackguard0 = StackPreempt;
  1794		return newg;
  1795	}
  1796	
  1797	// Put on gfree list.
  1798	// If local list is too long, transfer a batch to the global list.
  1799	static void
  1800	gfput(P *p, G *gp)
  1801	{
  1802		if(gp->stackguard - StackGuard != gp->stack0)
  1803			runtime·throw("invalid stack in gfput");
  1804		gp->schedlink = p->gfree;
  1805		p->gfree = gp;
  1806		p->gfreecnt++;
  1807		if(p->gfreecnt >= 64) {
  1808			runtime·lock(&runtime·sched.gflock);
  1809			while(p->gfreecnt >= 32) {
  1810				p->gfreecnt--;
  1811				gp = p->gfree;
  1812				p->gfree = gp->schedlink;
  1813				gp->schedlink = runtime·sched.gfree;
  1814				runtime·sched.gfree = gp;
  1815			}
  1816			runtime·unlock(&runtime·sched.gflock);
  1817		}
  1818	}
  1819	
  1820	// Get from gfree list.
  1821	// If local list is empty, grab a batch from global list.
  1822	static G*
  1823	gfget(P *p)
  1824	{
  1825		G *gp;
  1826	
  1827	retry:
  1828		gp = p->gfree;
  1829		if(gp == nil && runtime·sched.gfree) {
  1830			runtime·lock(&runtime·sched.gflock);
  1831			while(p->gfreecnt < 32 && runtime·sched.gfree) {
  1832				p->gfreecnt++;
  1833				gp = runtime·sched.gfree;
  1834				runtime·sched.gfree = gp->schedlink;
  1835				gp->schedlink = p->gfree;
  1836				p->gfree = gp;
  1837			}
  1838			runtime·unlock(&runtime·sched.gflock);
  1839			goto retry;
  1840		}
  1841		if(gp) {
  1842			p->gfree = gp->schedlink;
  1843			p->gfreecnt--;
  1844		}
  1845		return gp;
  1846	}
  1847	
  1848	// Purge all cached G's from gfree list to the global list.
  1849	static void
  1850	gfpurge(P *p)
  1851	{
  1852		G *gp;
  1853	
  1854		runtime·lock(&runtime·sched.gflock);
  1855		while(p->gfreecnt) {
  1856			p->gfreecnt--;
  1857			gp = p->gfree;
  1858			p->gfree = gp->schedlink;
  1859			gp->schedlink = runtime·sched.gfree;
  1860			runtime·sched.gfree = gp;
  1861		}
  1862		runtime·unlock(&runtime·sched.gflock);
  1863	}
  1864	
  1865	void
  1866	runtime·Breakpoint(void)
  1867	{
  1868		runtime·breakpoint();
  1869	}
  1870	
  1871	void
  1872	runtime·Gosched(void)
  1873	{
  1874		runtime·gosched();
  1875	}
  1876	
  1877	// Implementation of runtime.GOMAXPROCS.
  1878	// delete when scheduler is even stronger
  1879	int32
  1880	runtime·gomaxprocsfunc(int32 n)
  1881	{
  1882		int32 ret;
  1883	
  1884		if(n > MaxGomaxprocs)
  1885			n = MaxGomaxprocs;
  1886		runtime·lock(&runtime·sched);
  1887		ret = runtime·gomaxprocs;
  1888		if(n <= 0 || n == ret) {
  1889			runtime·unlock(&runtime·sched);
  1890			return ret;
  1891		}
  1892		runtime·unlock(&runtime·sched);
  1893	
  1894		runtime·semacquire(&runtime·worldsema, false);
  1895		m->gcing = 1;
  1896		runtime·stoptheworld();
  1897		newprocs = n;
  1898		m->gcing = 0;
  1899		runtime·semrelease(&runtime·worldsema);
  1900		runtime·starttheworld();
  1901	
  1902		return ret;
  1903	}
  1904	
  1905	// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
  1906	// after they modify m->locked. Do not allow preemption during this call,
  1907	// or else the m might be different in this function than in the caller.
  1908	#pragma textflag NOSPLIT
  1909	static void
  1910	lockOSThread(void)
  1911	{
  1912		m->lockedg = g;
  1913		g->lockedm = m;
  1914	}
  1915	
  1916	void
  1917	runtime·LockOSThread(void)
  1918	{
  1919		m->locked |= LockExternal;
  1920		lockOSThread();
  1921	}
  1922	
  1923	void
  1924	runtime·lockOSThread(void)
  1925	{
  1926		m->locked += LockInternal;
  1927		lockOSThread();
  1928	}
  1929	
  1930	
  1931	// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
  1932	// after they update m->locked. Do not allow preemption during this call,
  1933	// or else the m might be in different in this function than in the caller.
  1934	#pragma textflag NOSPLIT
  1935	static void
  1936	unlockOSThread(void)
  1937	{
  1938		if(m->locked != 0)
  1939			return;
  1940		m->lockedg = nil;
  1941		g->lockedm = nil;
  1942	}
  1943	
  1944	void
  1945	runtime·UnlockOSThread(void)
  1946	{
  1947		m->locked &= ~LockExternal;
  1948		unlockOSThread();
  1949	}
  1950	
  1951	void
  1952	runtime·unlockOSThread(void)
  1953	{
  1954		if(m->locked < LockInternal)
  1955			runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
  1956		m->locked -= LockInternal;
  1957		unlockOSThread();
  1958	}
  1959	
  1960	bool
  1961	runtime·lockedOSThread(void)
  1962	{
  1963		return g->lockedm != nil && m->lockedg != nil;
  1964	}
  1965	
  1966	// for testing of callbacks
  1967	void
  1968	runtime·golockedOSThread(bool ret)
  1969	{
  1970		ret = runtime·lockedOSThread();
  1971		FLUSH(&ret);
  1972	}
  1973	
  1974	void
  1975	runtime·NumGoroutine(intgo ret)
  1976	{
  1977		ret = runtime·gcount();
  1978		FLUSH(&ret);
  1979	}
  1980	
  1981	int32
  1982	runtime·gcount(void)
  1983	{
  1984		G *gp;
  1985		int32 n, s;
  1986	
  1987		n = 0;
  1988		runtime·lock(&runtime·sched);
  1989		// TODO(dvyukov): runtime.NumGoroutine() is O(N).
  1990		// We do not want to increment/decrement centralized counter in newproc/goexit,
  1991		// just to make runtime.NumGoroutine() faster.
  1992		// Compromise solution is to introduce per-P counters of active goroutines.
  1993		for(gp = runtime·allg; gp; gp = gp->alllink) {
  1994			s = gp->status;
  1995			if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
  1996				n++;
  1997		}
  1998		runtime·unlock(&runtime·sched);
  1999		return n;
  2000	}
  2001	
  2002	int32
  2003	runtime·mcount(void)
  2004	{
  2005		return runtime·sched.mcount;
  2006	}
  2007	
  2008	void
  2009	runtime·badmcall(void (*fn)(G*))  // called from assembly
  2010	{
  2011		USED(fn); // TODO: print fn?
  2012		runtime·throw("runtime: mcall called on m->g0 stack");
  2013	}
  2014	
  2015	void
  2016	runtime·badmcall2(void (*fn)(G*))  // called from assembly
  2017	{
  2018		USED(fn);
  2019		runtime·throw("runtime: mcall function returned");
  2020	}
  2021	
  2022	void
  2023	runtime·badreflectcall(void) // called from assembly
  2024	{
  2025		runtime·panicstring("runtime: arg size to reflect.call more than 1GB");
  2026	}
  2027	
  2028	static struct {
  2029		Lock;
  2030		void (*fn)(uintptr*, int32);
  2031		int32 hz;
  2032		uintptr pcbuf[100];
  2033	} prof;
  2034	
  2035	static void
  2036	System(void)
  2037	{
  2038	}
  2039	
  2040	// Called if we receive a SIGPROF signal.
  2041	void
  2042	runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp)
  2043	{
  2044		int32 n;
  2045		bool traceback;
  2046	
  2047		if(prof.fn == nil || prof.hz == 0)
  2048			return;
  2049		traceback = true;
  2050		// Windows does profiling in a dedicated thread w/o m.
  2051		if(!Windows && (m == nil || m->mcache == nil))
  2052			traceback = false;
  2053		
  2054		// Define that a "user g" is a user-created goroutine, and a "system g"
  2055		// is one that is m->g0 or m->gsignal. We've only made sure that we
  2056		// can unwind user g's, so exclude the system g's.
  2057		//
  2058		// It is not quite as easy as testing gp == m->curg (the current user g)
  2059		// because we might be interrupted for profiling halfway through a
  2060		// goroutine switch. The switch involves updating three (or four) values:
  2061		// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
  2062		// because once it gets updated the new g is running.
  2063		//
  2064		// When switching from a user g to a system g, LR is not considered live,
  2065		// so the update only affects g, SP, and PC. Since PC must be last, there
  2066		// the possible partial transitions in ordinary execution are (1) g alone is updated,
  2067		// (2) both g and SP are updated, and (3) SP alone is updated.
  2068		// If g is updated, we'll see a system g and not look closer.
  2069		// If SP alone is updated, we can detect the partial transition by checking
  2070		// whether the SP is within g's stack bounds. (We could also require that SP
  2071		// be changed only after g, but the stack bounds check is needed by other
  2072		// cases, so there is no need to impose an additional requirement.)
  2073		//
  2074		// There is one exceptional transition to a system g, not in ordinary execution.
  2075		// When a signal arrives, the operating system starts the signal handler running
  2076		// with an updated PC and SP. The g is updated last, at the beginning of the
  2077		// handler. There are two reasons this is okay. First, until g is updated the
  2078		// g and SP do not match, so the stack bounds check detects the partial transition.
  2079		// Second, signal handlers currently run with signals disabled, so a profiling
  2080		// signal cannot arrive during the handler.
  2081		//
  2082		// When switching from a system g to a user g, there are three possibilities.
  2083		//
  2084		// First, it may be that the g switch has no PC update, because the SP
  2085		// either corresponds to a user g throughout (as in runtime.asmcgocall)
  2086		// or because it has been arranged to look like a user g frame
  2087		// (as in runtime.cgocallback_gofunc). In this case, since the entire
  2088		// transition is a g+SP update, a partial transition updating just one of 
  2089		// those will be detected by the stack bounds check.
  2090		//
  2091		// Second, when returning from a signal handler, the PC and SP updates
  2092		// are performed by the operating system in an atomic update, so the g
  2093		// update must be done before them. The stack bounds check detects
  2094		// the partial transition here, and (again) signal handlers run with signals
  2095		// disabled, so a profiling signal cannot arrive then anyway.
  2096		//
  2097		// Third, the common case: it may be that the switch updates g, SP, and PC
  2098		// separately, as in runtime.gogo.
  2099		//
  2100		// Because runtime.gogo is the only instance, we check whether the PC lies
  2101		// within that function, and if so, not ask for a traceback. This approach
  2102		// requires knowing the size of the runtime.gogo function, which we
  2103		// record in arch_*.h and check in runtime_test.go.
  2104		//
  2105		// There is another apparently viable approach, recorded here in case
  2106		// the "PC within runtime.gogo" check turns out not to be usable.
  2107		// It would be possible to delay the update of either g or SP until immediately
  2108		// before the PC update instruction. Then, because of the stack bounds check,
  2109		// the only problematic interrupt point is just before that PC update instruction,
  2110		// and the sigprof handler can detect that instruction and simulate stepping past
  2111		// it in order to reach a consistent state. On ARM, the update of g must be made
  2112		// in two places (in R10 and also in a TLS slot), so the delayed update would
  2113		// need to be the SP update. The sigprof handler must read the instruction at
  2114		// the current PC and if it was the known instruction (for example, JMP BX or 
  2115		// MOV R2, PC), use that other register in place of the PC value.
  2116		// The biggest drawback to this solution is that it requires that we can tell
  2117		// whether it's safe to read from the memory pointed at by PC.
  2118		// In a correct program, we can test PC == nil and otherwise read,
  2119		// but if a profiling signal happens at the instant that a program executes
  2120		// a bad jump (before the program manages to handle the resulting fault)
  2121		// the profiling handler could fault trying to read nonexistent memory.
  2122		//
  2123		// To recap, there are no constraints on the assembly being used for the
  2124		// transition. We simply require that g and SP match and that the PC is not
  2125		// in runtime.gogo.
  2126		//
  2127		// On Windows, one m is sending reports about all the g's, so gp == m->curg
  2128		// is not a useful comparison. The profilem function in os_windows.c has
  2129		// already checked that gp is a user g.
  2130		if(gp == nil ||
  2131		   (!Windows && gp != m->curg) ||
  2132		   (uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr)sp ||
  2133		   ((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGogoBytes))
  2134			traceback = false;
  2135	
  2136		// Race detector calls asmcgocall w/o entersyscall/exitsyscall,
  2137		// we can not currently unwind through asmcgocall.
  2138		if(m != nil && m->racecall)
  2139			traceback = false;
  2140	
  2141		runtime·lock(&prof);
  2142		if(prof.fn == nil) {
  2143			runtime·unlock(&prof);
  2144			return;
  2145		}
  2146		n = 0;
  2147		if(traceback)
  2148			n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
  2149		if(!traceback || n <= 0) {
  2150			n = 2;
  2151			prof.pcbuf[0] = (uintptr)pc;
  2152			prof.pcbuf[1] = (uintptr)System + 1;
  2153		}
  2154		prof.fn(prof.pcbuf, n);
  2155		runtime·unlock(&prof);
  2156	}
  2157	
  2158	// Arrange to call fn with a traceback hz times a second.
  2159	void
  2160	runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
  2161	{
  2162		// Force sane arguments.
  2163		if(hz < 0)
  2164			hz = 0;
  2165		if(hz == 0)
  2166			fn = nil;
  2167		if(fn == nil)
  2168			hz = 0;
  2169	
  2170		// Disable preemption, otherwise we can be rescheduled to another thread
  2171		// that has profiling enabled.
  2172		m->locks++;
  2173	
  2174		// Stop profiler on this thread so that it is safe to lock prof.
  2175		// if a profiling signal came in while we had prof locked,
  2176		// it would deadlock.
  2177		runtime·resetcpuprofiler(0);
  2178	
  2179		runtime·lock(&prof);
  2180		prof.fn = fn;
  2181		prof.hz = hz;
  2182		runtime·unlock(&prof);
  2183		runtime·lock(&runtime·sched);
  2184		runtime·sched.profilehz = hz;
  2185		runtime·unlock(&runtime·sched);
  2186	
  2187		if(hz != 0)
  2188			runtime·resetcpuprofiler(hz);
  2189	
  2190		m->locks--;
  2191	}
  2192	
  2193	// Change number of processors.  The world is stopped, sched is locked.
  2194	static void
  2195	procresize(int32 new)
  2196	{
  2197		int32 i, old;
  2198		G *gp;
  2199		P *p;
  2200	
  2201		old = runtime·gomaxprocs;
  2202		if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
  2203			runtime·throw("procresize: invalid arg");
  2204		// initialize new P's
  2205		for(i = 0; i < new; i++) {
  2206			p = runtime·allp[i];
  2207			if(p == nil) {
  2208				p = (P*)runtime·mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
  2209				p->id = i;
  2210				p->status = Pgcstop;
  2211				runtime·atomicstorep(&runtime·allp[i], p);
  2212			}
  2213			if(p->mcache == nil) {
  2214				if(old==0 && i==0)
  2215					p->mcache = m->mcache;  // bootstrap
  2216				else
  2217					p->mcache = runtime·allocmcache();
  2218			}
  2219			if(p->runq == nil) {
  2220				p->runqsize = 128;
  2221				p->runq = (G**)runtime·mallocgc(p->runqsize*sizeof(G*), 0, FlagNoInvokeGC);
  2222			}
  2223		}
  2224	
  2225		// redistribute runnable G's evenly
  2226		for(i = 0; i < old; i++) {
  2227			p = runtime·allp[i];
  2228			while(gp = runqget(p))
  2229				globrunqput(gp);
  2230		}
  2231		// start at 1 because current M already executes some G and will acquire allp[0] below,
  2232		// so if we have a spare G we want to put it into allp[1].
  2233		for(i = 1; runtime·sched.runqhead; i++) {
  2234			gp = runtime·sched.runqhead;
  2235			runtime·sched.runqhead = gp->schedlink;
  2236			runqput(runtime·allp[i%new], gp);
  2237		}
  2238		runtime·sched.runqtail = nil;
  2239		runtime·sched.runqsize = 0;
  2240	
  2241		// free unused P's
  2242		for(i = new; i < old; i++) {
  2243			p = runtime·allp[i];
  2244			runtime·freemcache(p->mcache);
  2245			p->mcache = nil;
  2246			gfpurge(p);
  2247			p->status = Pdead;
  2248			// can't free P itself because it can be referenced by an M in syscall
  2249		}
  2250	
  2251		if(m->p)
  2252			m->p->m = nil;
  2253		m->p = nil;
  2254		m->mcache = nil;
  2255		p = runtime·allp[0];
  2256		p->m = nil;
  2257		p->status = Pidle;
  2258		acquirep(p);
  2259		for(i = new-1; i > 0; i--) {
  2260			p = runtime·allp[i];
  2261			p->status = Pidle;
  2262			pidleput(p);
  2263		}
  2264		runtime·atomicstore((uint32*)&runtime·gomaxprocs, new);
  2265	}
  2266	
  2267	// Associate p and the current m.
  2268	static void
  2269	acquirep(P *p)
  2270	{
  2271		if(m->p || m->mcache)
  2272			runtime·throw("acquirep: already in go");
  2273		if(p->m || p->status != Pidle) {
  2274			runtime·printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
  2275			runtime·throw("acquirep: invalid p state");
  2276		}
  2277		m->mcache = p->mcache;
  2278		m->p = p;
  2279		p->m = m;
  2280		p->status = Prunning;
  2281	}
  2282	
  2283	// Disassociate p and the current m.
  2284	static P*
  2285	releasep(void)
  2286	{
  2287		P *p;
  2288	
  2289		if(m->p == nil || m->mcache == nil)
  2290			runtime·throw("releasep: invalid arg");
  2291		p = m->p;
  2292		if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
  2293			runtime·printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
  2294				m, m->p, p->m, m->mcache, p->mcache, p->status);
  2295			runtime·throw("releasep: invalid p state");
  2296		}
  2297		m->p = nil;
  2298		m->mcache = nil;
  2299		p->m = nil;
  2300		p->status = Pidle;
  2301		return p;
  2302	}
  2303	
  2304	static void
  2305	incidlelocked(int32 v)
  2306	{
  2307		runtime·lock(&runtime·sched);
  2308		runtime·sched.nmidlelocked += v;
  2309		if(v > 0)
  2310			checkdead();
  2311		runtime·unlock(&runtime·sched);
  2312	}
  2313	
  2314	// Check for deadlock situation.
  2315	// The check is based on number of running M's, if 0 -> deadlock.
  2316	static void
  2317	checkdead(void)
  2318	{
  2319		G *gp;
  2320		int32 run, grunning, s;
  2321	
  2322		// -1 for sysmon
  2323		run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidlelocked - 1;
  2324		if(run > 0)
  2325			return;
  2326		if(run < 0) {
  2327			runtime·printf("checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
  2328				runtime·sched.nmidle, runtime·sched.nmidlelocked, runtime·sched.mcount);
  2329			runtime·throw("checkdead: inconsistent counts");
  2330		}
  2331		grunning = 0;
  2332		for(gp = runtime·allg; gp; gp = gp->alllink) {
  2333			if(gp->isbackground)
  2334				continue;
  2335			s = gp->status;
  2336			if(s == Gwaiting)
  2337				grunning++;
  2338			else if(s == Grunnable || s == Grunning || s == Gsyscall) {
  2339				runtime·printf("checkdead: find g %D in status %d\n", gp->goid, s);
  2340				runtime·throw("checkdead: runnable g");
  2341			}
  2342		}
  2343		if(grunning == 0)  // possible if main goroutine calls runtime·Goexit()
  2344			runtime·exit(0);
  2345		m->throwing = -1;  // do not dump full stacks
  2346		runtime·throw("all goroutines are asleep - deadlock!");
  2347	}
  2348	
  2349	static void
  2350	sysmon(void)
  2351	{
  2352		uint32 idle, delay;
  2353		int64 now, lastpoll, lasttrace;
  2354		G *gp;
  2355	
  2356		lasttrace = 0;
  2357		idle = 0;  // how many cycles in succession we had not wokeup somebody
  2358		delay = 0;
  2359		for(;;) {
  2360			if(idle == 0)  // start with 20us sleep...
  2361				delay = 20;
  2362			else if(idle > 50)  // start doubling the sleep after 1ms...
  2363				delay *= 2;
  2364			if(delay > 10*1000)  // up to 10ms
  2365				delay = 10*1000;
  2366			runtime·usleep(delay);
  2367			if(runtime·debug.schedtrace <= 0 &&
  2368				(runtime·sched.gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs)) {  // TODO: fast atomic
  2369				runtime·lock(&runtime·sched);
  2370				if(runtime·atomicload(&runtime·sched.gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {
  2371					runtime·atomicstore(&runtime·sched.sysmonwait, 1);
  2372					runtime·unlock(&runtime·sched);
  2373					runtime·notesleep(&runtime·sched.sysmonnote);
  2374					runtime·noteclear(&runtime·sched.sysmonnote);
  2375					idle = 0;
  2376					delay = 20;
  2377				} else
  2378					runtime·unlock(&runtime·sched);
  2379			}
  2380			// poll network if not polled for more than 10ms
  2381			lastpoll = runtime·atomicload64(&runtime·sched.lastpoll);
  2382			now = runtime·nanotime();
  2383			if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
  2384				runtime·cas64(&runtime·sched.lastpoll, lastpoll, now);
  2385				gp = runtime·netpoll(false);  // non-blocking
  2386				if(gp) {
  2387					// Need to decrement number of idle locked M's
  2388					// (pretending that one more is running) before injectglist.
  2389					// Otherwise it can lead to the following situation:
  2390					// injectglist grabs all P's but before it starts M's to run the P's,
  2391					// another M returns from syscall, finishes running its G,
  2392					// observes that there is no work to do and no other running M's
  2393					// and reports deadlock.
  2394					incidlelocked(-1);
  2395					injectglist(gp);
  2396					incidlelocked(1);
  2397				}
  2398			}
  2399			// retake P's blocked in syscalls
  2400			// and preempt long running G's
  2401			if(retake(now))
  2402				idle = 0;
  2403			else
  2404				idle++;
  2405	
  2406			if(runtime·debug.schedtrace > 0 && lasttrace + runtime·debug.schedtrace*1000000ll <= now) {
  2407				lasttrace = now;
  2408				runtime·schedtrace(runtime·debug.scheddetail);
  2409			}
  2410		}
  2411	}
  2412	
  2413	typedef struct Pdesc Pdesc;
  2414	struct Pdesc
  2415	{
  2416		uint32	schedtick;
  2417		int64	schedwhen;
  2418		uint32	syscalltick;
  2419		int64	syscallwhen;
  2420	};
  2421	static Pdesc pdesc[MaxGomaxprocs];
  2422	
  2423	static uint32
  2424	retake(int64 now)
  2425	{
  2426		uint32 i, s, n;
  2427		int64 t;
  2428		P *p;
  2429		Pdesc *pd;
  2430	
  2431		n = 0;
  2432		for(i = 0; i < runtime·gomaxprocs; i++) {
  2433			p = runtime·allp[i];
  2434			if(p==nil)
  2435				continue;
  2436			pd = &pdesc[i];
  2437			s = p->status;
  2438			if(s == Psyscall) {
  2439				// Retake P from syscall if it's there for more than 1 sysmon tick (20us).
  2440				// But only if there is other work to do.
  2441				t = p->syscalltick;
  2442				if(pd->syscalltick != t) {
  2443					pd->syscalltick = t;
  2444					pd->syscallwhen = now;
  2445					continue;
  2446				}
  2447				if(p->runqhead == p->runqtail &&
  2448					runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0)
  2449					continue;
  2450				// Need to decrement number of idle locked M's
  2451				// (pretending that one more is running) before the CAS.
  2452				// Otherwise the M from which we retake can exit the syscall,
  2453				// increment nmidle and report deadlock.
  2454				incidlelocked(-1);
  2455				if(runtime·cas(&p->status, s, Pidle)) {
  2456					n++;
  2457					handoffp(p);
  2458				}
  2459				incidlelocked(1);
  2460			} else if(s == Prunning) {
  2461				// Preempt G if it's running for more than 10ms.
  2462				t = p->schedtick;
  2463				if(pd->schedtick != t) {
  2464					pd->schedtick = t;
  2465					pd->schedwhen = now;
  2466					continue;
  2467				}
  2468				if(pd->schedwhen + 10*1000*1000 > now)
  2469					continue;
  2470				preemptone(p);
  2471			}
  2472		}
  2473		return n;
  2474	}
  2475	
  2476	// Tell all goroutines that they have been preempted and they should stop.
  2477	// This function is purely best-effort.  It can fail to inform a goroutine if a
  2478	// processor just started running it.
  2479	// No locks need to be held.
  2480	// Returns true if preemption request was issued to at least one goroutine.
  2481	static bool
  2482	preemptall(void)
  2483	{
  2484		P *p;
  2485		int32 i;
  2486		bool res;
  2487	
  2488		res = false;
  2489		for(i = 0; i < runtime·gomaxprocs; i++) {
  2490			p = runtime·allp[i];
  2491			if(p == nil || p->status != Prunning)
  2492				continue;
  2493			res |= preemptone(p);
  2494		}
  2495		return res;
  2496	}
  2497	
  2498	// Tell the goroutine running on processor P to stop.
  2499	// This function is purely best-effort.  It can incorrectly fail to inform the
  2500	// goroutine.  It can send inform the wrong goroutine.  Even if it informs the
  2501	// correct goroutine, that goroutine might ignore the request if it is
  2502	// simultaneously executing runtime·newstack.
  2503	// No lock needs to be held.
  2504	// Returns true if preemption request was issued.
  2505	static bool
  2506	preemptone(P *p)
  2507	{
  2508		M *mp;
  2509		G *gp;
  2510	
  2511		mp = p->m;
  2512		if(mp == nil || mp == m)
  2513			return false;
  2514		gp = mp->curg;
  2515		if(gp == nil || gp == mp->g0)
  2516			return false;
  2517		gp->preempt = true;
  2518		gp->stackguard0 = StackPreempt;
  2519		return true;
  2520	}
  2521	
  2522	void
  2523	runtime·schedtrace(bool detailed)
  2524	{
  2525		static int64 starttime;
  2526		int64 now;
  2527		int64 id1, id2, id3;
  2528		int32 i, q, t, h, s;
  2529		int8 *fmt;
  2530		M *mp, *lockedm;
  2531		G *gp, *lockedg;
  2532		P *p;
  2533	
  2534		now = runtime·nanotime();
  2535		if(starttime == 0)
  2536			starttime = now;
  2537	
  2538		runtime·lock(&runtime·sched);
  2539		runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
  2540			(now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidle, runtime·sched.mcount,
  2541			runtime·sched.nmidle, runtime·sched.runqsize);
  2542		if(detailed) {
  2543			runtime·printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
  2544				runtime·sched.gcwaiting, runtime·sched.nmidlelocked, runtime·sched.nmspinning,
  2545				runtime·sched.stopwait, runtime·sched.sysmonwait);
  2546		}
  2547		// We must be careful while reading data from P's, M's and G's.
  2548		// Even if we hold schedlock, most data can be changed concurrently.
  2549		// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
  2550		for(i = 0; i < runtime·gomaxprocs; i++) {
  2551			p = runtime·allp[i];
  2552			if(p == nil)
  2553				continue;
  2554			mp = p->m;
  2555			t = p->runqtail;
  2556			h = p->runqhead;
  2557			s = p->runqsize;
  2558			q = t - h;
  2559			if(q < 0)
  2560				q += s;
  2561			if(detailed)
  2562				runtime·printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d/%d gfreecnt=%d\n",
  2563					i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, q, s, p->gfreecnt);
  2564			else {
  2565				// In non-detailed mode format lengths of per-P run queues as:
  2566				// [len1 len2 len3 len4]
  2567				fmt = " %d";
  2568				if(runtime·gomaxprocs == 1)
  2569					fmt = " [%d]\n";
  2570				else if(i == 0)
  2571					fmt = " [%d";
  2572				else if(i == runtime·gomaxprocs-1)
  2573					fmt = " %d]\n";
  2574				runtime·printf(fmt, q);
  2575			}
  2576		}
  2577		if(!detailed) {
  2578			runtime·unlock(&runtime·sched);
  2579			return;
  2580		}
  2581		for(mp = runtime·allm; mp; mp = mp->alllink) {
  2582			p = mp->p;
  2583			gp = mp->curg;
  2584			lockedg = mp->lockedg;
  2585			id1 = -1;
  2586			if(p)
  2587				id1 = p->id;
  2588			id2 = -1;
  2589			if(gp)
  2590				id2 = gp->goid;
  2591			id3 = -1;
  2592			if(lockedg)
  2593				id3 = lockedg->goid;
  2594			runtime·printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
  2595				" locks=%d dying=%d helpgc=%d spinning=%d lockedg=%D\n",
  2596				mp->id, id1, id2,
  2597				mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
  2598				mp->spinning, id3);
  2599		}
  2600		for(gp = runtime·allg; gp; gp = gp->alllink) {
  2601			mp = gp->m;
  2602			lockedm = gp->lockedm;
  2603			runtime·printf("  G%D: status=%d(%s) m=%d lockedm=%d\n",
  2604				gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
  2605				lockedm ? lockedm->id : -1);
  2606		}
  2607		runtime·unlock(&runtime·sched);
  2608	}
  2609	
  2610	// Put mp on midle list.
  2611	// Sched must be locked.
  2612	static void
  2613	mput(M *mp)
  2614	{
  2615		mp->schedlink = runtime·sched.midle;
  2616		runtime·sched.midle = mp;
  2617		runtime·sched.nmidle++;
  2618		checkdead();
  2619	}
  2620	
  2621	// Try to get an m from midle list.
  2622	// Sched must be locked.
  2623	static M*
  2624	mget(void)
  2625	{
  2626		M *mp;
  2627	
  2628		if((mp = runtime·sched.midle) != nil){
  2629			runtime·sched.midle = mp->schedlink;
  2630			runtime·sched.nmidle--;
  2631		}
  2632		return mp;
  2633	}
  2634	
  2635	// Put gp on the global runnable queue.
  2636	// Sched must be locked.
  2637	static void
  2638	globrunqput(G *gp)
  2639	{
  2640		gp->schedlink = nil;
  2641		if(runtime·sched.runqtail)
  2642			runtime·sched.runqtail->schedlink = gp;
  2643		else
  2644			runtime·sched.runqhead = gp;
  2645		runtime·sched.runqtail = gp;
  2646		runtime·sched.runqsize++;
  2647	}
  2648	
  2649	// Try get a batch of G's from the global runnable queue.
  2650	// Sched must be locked.
  2651	static G*
  2652	globrunqget(P *p, int32 max)
  2653	{
  2654		G *gp, *gp1;
  2655		int32 n;
  2656	
  2657		if(runtime·sched.runqsize == 0)
  2658			return nil;
  2659		n = runtime·sched.runqsize/runtime·gomaxprocs+1;
  2660		if(n > runtime·sched.runqsize)
  2661			n = runtime·sched.runqsize;
  2662		if(max > 0 && n > max)
  2663			n = max;
  2664		runtime·sched.runqsize -= n;
  2665		if(runtime·sched.runqsize == 0)
  2666			runtime·sched.runqtail = nil;
  2667		gp = runtime·sched.runqhead;
  2668		runtime·sched.runqhead = gp->schedlink;
  2669		n--;
  2670		while(n--) {
  2671			gp1 = runtime·sched.runqhead;
  2672			runtime·sched.runqhead = gp1->schedlink;
  2673			runqput(p, gp1);
  2674		}
  2675		return gp;
  2676	}
  2677	
  2678	// Put p to on pidle list.
  2679	// Sched must be locked.
  2680	static void
  2681	pidleput(P *p)
  2682	{
  2683		p->link = runtime·sched.pidle;
  2684		runtime·sched.pidle = p;
  2685		runtime·xadd(&runtime·sched.npidle, 1);  // TODO: fast atomic
  2686	}
  2687	
  2688	// Try get a p from pidle list.
  2689	// Sched must be locked.
  2690	static P*
  2691	pidleget(void)
  2692	{
  2693		P *p;
  2694	
  2695		p = runtime·sched.pidle;
  2696		if(p) {
  2697			runtime·sched.pidle = p->link;
  2698			runtime·xadd(&runtime·sched.npidle, -1);  // TODO: fast atomic
  2699		}
  2700		return p;
  2701	}
  2702	
  2703	// Put g on local runnable queue.
  2704	// TODO(dvyukov): consider using lock-free queue.
  2705	static void
  2706	runqput(P *p, G *gp)
  2707	{
  2708		int32 h, t, s;
  2709	
  2710		runtime·lock(p);
  2711	retry:
  2712		h = p->runqhead;
  2713		t = p->runqtail;
  2714		s = p->runqsize;
  2715		if(t == h-1 || (h == 0 && t == s-1)) {
  2716			runqgrow(p);
  2717			goto retry;
  2718		}
  2719		p->runq[t++] = gp;
  2720		if(t == s)
  2721			t = 0;
  2722		p->runqtail = t;
  2723		runtime·unlock(p);
  2724	}
  2725	
  2726	// Get g from local runnable queue.
  2727	static G*
  2728	runqget(P *p)
  2729	{
  2730		G *gp;
  2731		int32 t, h, s;
  2732	
  2733		if(p->runqhead == p->runqtail)
  2734			return nil;
  2735		runtime·lock(p);
  2736		h = p->runqhead;
  2737		t = p->runqtail;
  2738		s = p->runqsize;
  2739		if(t == h) {
  2740			runtime·unlock(p);
  2741			return nil;
  2742		}
  2743		gp = p->runq[h++];
  2744		if(h == s)
  2745			h = 0;
  2746		p->runqhead = h;
  2747		runtime·unlock(p);
  2748		return gp;
  2749	}
  2750	
  2751	// Grow local runnable queue.
  2752	// TODO(dvyukov): consider using fixed-size array
  2753	// and transfer excess to the global list (local queue can grow way too big).
  2754	static void
  2755	runqgrow(P *p)
  2756	{
  2757		G **q;
  2758		int32 s, t, h, t2;
  2759	
  2760		h = p->runqhead;
  2761		t = p->runqtail;
  2762		s = p->runqsize;
  2763		t2 = 0;
  2764		q = runtime·malloc(2*s*sizeof(*q));
  2765		while(t != h) {
  2766			q[t2++] = p->runq[h++];
  2767			if(h == s)
  2768				h = 0;
  2769		}
  2770		runtime·free(p->runq);
  2771		p->runq = q;
  2772		p->runqhead = 0;
  2773		p->runqtail = t2;
  2774		p->runqsize = 2*s;
  2775	}
  2776	
  2777	// Steal half of elements from local runnable queue of p2
  2778	// and put onto local runnable queue of p.
  2779	// Returns one of the stolen elements (or nil if failed).
  2780	static G*
  2781	runqsteal(P *p, P *p2)
  2782	{
  2783		G *gp, *gp1;
  2784		int32 t, h, s, t2, h2, s2, c, i;
  2785	
  2786		if(p2->runqhead == p2->runqtail)
  2787			return nil;
  2788		// sort locks to prevent deadlocks
  2789		if(p < p2)
  2790			runtime·lock(p);
  2791		runtime·lock(p2);
  2792		if(p2->runqhead == p2->runqtail) {
  2793			runtime·unlock(p2);
  2794			if(p < p2)
  2795				runtime·unlock(p);
  2796			return nil;
  2797		}
  2798		if(p >= p2)
  2799			runtime·lock(p);
  2800		// now we've locked both queues and know the victim is not empty
  2801		h = p->runqhead;
  2802		t = p->runqtail;
  2803		s = p->runqsize;
  2804		h2 = p2->runqhead;
  2805		t2 = p2->runqtail;
  2806		s2 = p2->runqsize;
  2807		gp = p2->runq[h2++];  // return value
  2808		if(h2 == s2)
  2809			h2 = 0;
  2810		// steal roughly half
  2811		if(t2 > h2)
  2812			c = (t2 - h2) / 2;
  2813		else
  2814			c = (s2 - h2 + t2) / 2;
  2815		// copy
  2816		for(i = 0; i != c; i++) {
  2817			// the target queue is full?
  2818			if(t == h-1 || (h == 0 && t == s-1))
  2819				break;
  2820			// the victim queue is empty?
  2821			if(t2 == h2)
  2822				break;
  2823			gp1 = p2->runq[h2++];
  2824			if(h2 == s2)
  2825				h2 = 0;
  2826			p->runq[t++] = gp1;
  2827			if(t == s)
  2828				t = 0;
  2829		}
  2830		p->runqtail = t;
  2831		p2->runqhead = h2;
  2832		runtime·unlock(p2);
  2833		runtime·unlock(p);
  2834		return gp;
  2835	}
  2836	
  2837	void
  2838	runtime·testSchedLocalQueue(void)
  2839	{
  2840		P p;
  2841		G gs[1000];
  2842		int32 i, j;
  2843	
  2844		runtime·memclr((byte*)&p, sizeof(p));
  2845		p.runqsize = 1;
  2846		p.runqhead = 0;
  2847		p.runqtail = 0;
  2848		p.runq = runtime·malloc(p.runqsize*sizeof(*p.runq));
  2849	
  2850		for(i = 0; i < nelem(gs); i++) {
  2851			if(runqget(&p) != nil)
  2852				runtime·throw("runq is not empty initially");
  2853			for(j = 0; j < i; j++)
  2854				runqput(&p, &gs[i]);
  2855			for(j = 0; j < i; j++) {
  2856				if(runqget(&p) != &gs[i]) {
  2857					runtime·printf("bad element at iter %d/%d\n", i, j);
  2858					runtime·throw("bad element");
  2859				}
  2860			}
  2861			if(runqget(&p) != nil)
  2862				runtime·throw("runq is not empty afterwards");
  2863		}
  2864	}
  2865	
  2866	void
  2867	runtime·testSchedLocalQueueSteal(void)
  2868	{
  2869		P p1, p2;
  2870		G gs[1000], *gp;
  2871		int32 i, j, s;
  2872	
  2873		runtime·memclr((byte*)&p1, sizeof(p1));
  2874		p1.runqsize = 1;
  2875		p1.runqhead = 0;
  2876		p1.runqtail = 0;
  2877		p1.runq = runtime·malloc(p1.runqsize*sizeof(*p1.runq));
  2878	
  2879		runtime·memclr((byte*)&p2, sizeof(p2));
  2880		p2.runqsize = nelem(gs);
  2881		p2.runqhead = 0;
  2882		p2.runqtail = 0;
  2883		p2.runq = runtime·malloc(p2.runqsize*sizeof(*p2.runq));
  2884	
  2885		for(i = 0; i < nelem(gs); i++) {
  2886			for(j = 0; j < i; j++) {
  2887				gs[j].sig = 0;
  2888				runqput(&p1, &gs[j]);
  2889			}
  2890			gp = runqsteal(&p2, &p1);
  2891			s = 0;
  2892			if(gp) {
  2893				s++;
  2894				gp->sig++;
  2895			}
  2896			while(gp = runqget(&p2)) {
  2897				s++;
  2898				gp->sig++;
  2899			}
  2900			while(gp = runqget(&p1))
  2901				gp->sig++;
  2902			for(j = 0; j < i; j++) {
  2903				if(gs[j].sig != 1) {
  2904					runtime·printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
  2905					runtime·throw("bad element");
  2906				}
  2907			}
  2908			if(s != i/2 && s != i/2+1) {
  2909				runtime·printf("bad steal %d, want %d or %d, iter %d\n",
  2910					s, i/2, i/2+1, i);
  2911				runtime·throw("bad steal");
  2912			}
  2913		}
  2914	}
  2915	
  2916	extern void runtime·morestack(void);
  2917	
  2918	// Does f mark the top of a goroutine stack?
  2919	bool
  2920	runtime·topofstack(Func *f)
  2921	{
  2922		return f->entry == (uintptr)runtime·goexit ||
  2923			f->entry == (uintptr)runtime·mstart ||
  2924			f->entry == (uintptr)runtime·mcall ||
  2925			f->entry == (uintptr)runtime·morestack ||
  2926			f->entry == (uintptr)runtime·lessstack ||
  2927			f->entry == (uintptr)_rt0_go;
  2928	}
  2929	
  2930	void
  2931	runtime∕debug·setMaxThreads(intgo in, intgo out)
  2932	{
  2933		runtime·lock(&runtime·sched);
  2934		out = runtime·sched.maxmcount;
  2935		runtime·sched.maxmcount = in;
  2936		checkmcount();
  2937		runtime·unlock(&runtime·sched);
  2938		FLUSH(&out);
  2939	}
  2940	
  2941	static int8 experiment[] = GOEXPERIMENT; // defined in zaexperiment.h
  2942	
  2943	static bool
  2944	haveexperiment(int8 *name)
  2945	{
  2946		int32 i, j;
  2947		
  2948		for(i=0; i<sizeof(experiment); i++) {
  2949			if((i == 0 || experiment[i-1] == ',') && experiment[i] == name[0]) {
  2950				for(j=0; name[j]; j++)
  2951					if(experiment[i+j] != name[j])
  2952						goto nomatch;
  2953				if(experiment[i+j] != '\0' && experiment[i+j] != ',')
  2954					goto nomatch;
  2955				return 1;
  2956			}
  2957		nomatch:;
  2958		}
  2959		return 0;
  2960	}

View as plain text