The Go Programming Language

Text file src/cmd/8g/reg.c

     1	// Derived from Inferno utils/6c/reg.c
     2	// http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
     3	//
     4	//	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5	//	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6	//	Portions Copyright © 1997-1999 Vita Nuova Limited
     7	//	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8	//	Portions Copyright © 2004,2006 Bruce Ellis
     9	//	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10	//	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11	//	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12	//
    13	// Permission is hereby granted, free of charge, to any person obtaining a copy
    14	// of this software and associated documentation files (the "Software"), to deal
    15	// in the Software without restriction, including without limitation the rights
    16	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17	// copies of the Software, and to permit persons to whom the Software is
    18	// furnished to do so, subject to the following conditions:
    19	//
    20	// The above copyright notice and this permission notice shall be included in
    21	// all copies or substantial portions of the Software.
    22	//
    23	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29	// THE SOFTWARE.
    30	
    31	#include "gg.h"
    32	#undef	EXTERN
    33	#define	EXTERN
    34	#include "opt.h"
    35	
    36	#define	NREGVAR	8
    37	#define	REGBITS	((uint32)0xff)
    38	#define	P2R(p)	(Reg*)(p->reg)
    39	
    40	static	int	first	= 1;
    41	
    42	Reg*
    43	rega(void)
    44	{
    45		Reg *r;
    46	
    47		r = freer;
    48		if(r == R) {
    49			r = mal(sizeof(*r));
    50		} else
    51			freer = r->link;
    52	
    53		*r = zreg;
    54		return r;
    55	}
    56	
    57	int
    58	rcmp(const void *a1, const void *a2)
    59	{
    60		Rgn *p1, *p2;
    61		int c1, c2;
    62	
    63		p1 = (Rgn*)a1;
    64		p2 = (Rgn*)a2;
    65		c1 = p2->cost;
    66		c2 = p1->cost;
    67		if(c1 -= c2)
    68			return c1;
    69		return p2->varno - p1->varno;
    70	}
    71	
    72	static void
    73	setoutvar(void)
    74	{
    75		Type *t;
    76		Node *n;
    77		Addr a;
    78		Iter save;
    79		Bits bit;
    80		int z;
    81	
    82		t = structfirst(&save, getoutarg(curfn->type));
    83		while(t != T) {
    84			n = nodarg(t, 1);
    85			a = zprog.from;
    86			naddr(n, &a, 0);
    87			bit = mkvar(R, &a);
    88			for(z=0; z<BITS; z++)
    89				ovar.b[z] |= bit.b[z];
    90			t = structnext(&save);
    91		}
    92	//if(bany(b))
    93	//print("ovars = %Q\n", &ovar);
    94	}
    95	
    96	static void
    97	setaddrs(Bits bit)
    98	{
    99		int i, n;
   100		Var *v;
   101		Sym *s;
   102	
   103		while(bany(&bit)) {
   104			// convert each bit to a variable
   105			i = bnum(bit);
   106			s = var[i].sym;
   107			n = var[i].name;
   108			bit.b[i/32] &= ~(1L<<(i%32));
   109	
   110			// disable all pieces of that variable
   111			for(i=0; i<nvar; i++) {
   112				v = var+i;
   113				if(v->sym == s && v->name == n)
   114					v->addr = 2;
   115			}
   116		}
   117	}
   118	
   119	static char* regname[] = { ".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di" };
   120	
   121	void
   122	regopt(Prog *firstp)
   123	{
   124		Reg *r, *r1;
   125		Prog *p;
   126		int i, z, nr;
   127		uint32 vreg;
   128		Bits bit;
   129	
   130		if(first) {
   131			fmtinstall('Q', Qconv);
   132			exregoffset = D_DI;	// no externals
   133			first = 0;
   134		}
   135	
   136		// count instructions
   137		nr = 0;
   138		for(p=firstp; p!=P; p=p->link)
   139			nr++;
   140		// if too big dont bother
   141		if(nr >= 10000) {
   142	//		print("********** %S is too big (%d)\n", curfn->nname->sym, nr);
   143			return;
   144		}
   145	
   146		r1 = R;
   147		firstr = R;
   148		lastr = R;
   149		
   150		/*
   151		 * control flow is more complicated in generated go code
   152		 * than in generated c code.  define pseudo-variables for
   153		 * registers, so we have complete register usage information.
   154		 */
   155		nvar = NREGVAR;
   156		memset(var, 0, NREGVAR*sizeof var[0]);
   157		for(i=0; i<NREGVAR; i++)
   158			var[i].sym = lookup(regname[i]);
   159	
   160		regbits = RtoB(D_SP);
   161		for(z=0; z<BITS; z++) {
   162			externs.b[z] = 0;
   163			params.b[z] = 0;
   164			consts.b[z] = 0;
   165			addrs.b[z] = 0;
   166			ovar.b[z] = 0;
   167		}
   168	
   169		// build list of return variables
   170		setoutvar();
   171	
   172		/*
   173		 * pass 1
   174		 * build aux data structure
   175		 * allocate pcs
   176		 * find use and set of variables
   177		 */
   178		nr = 0;
   179		for(p=firstp; p!=P; p=p->link) {
   180			switch(p->as) {
   181			case ADATA:
   182			case AGLOBL:
   183			case ANAME:
   184			case ASIGNAME:
   185				continue;
   186			}
   187			r = rega();
   188			nr++;
   189			if(firstr == R) {
   190				firstr = r;
   191				lastr = r;
   192			} else {
   193				lastr->link = r;
   194				r->p1 = lastr;
   195				lastr->s1 = r;
   196				lastr = r;
   197			}
   198			r->prog = p;
   199			p->reg = r;
   200	
   201			r1 = r->p1;
   202			if(r1 != R) {
   203				switch(r1->prog->as) {
   204				case ARET:
   205				case AJMP:
   206				case AIRETL:
   207					r->p1 = R;
   208					r1->s1 = R;
   209				}
   210			}
   211	
   212			bit = mkvar(r, &p->from);
   213			if(bany(&bit))
   214			switch(p->as) {
   215			/*
   216			 * funny
   217			 */
   218			case ALEAL:
   219				setaddrs(bit);
   220				break;
   221	
   222			/*
   223			 * left side read
   224			 */
   225			default:
   226				for(z=0; z<BITS; z++)
   227					r->use1.b[z] |= bit.b[z];
   228				break;
   229	
   230			/*
   231			 * left side read+write
   232			 */
   233			case AXCHGB:
   234			case AXCHGW:
   235			case AXCHGL:
   236				for(z=0; z<BITS; z++) {
   237					r->use1.b[z] |= bit.b[z];
   238					r->set.b[z] |= bit.b[z];
   239				}
   240				break;
   241			}
   242	
   243			bit = mkvar(r, &p->to);
   244			if(bany(&bit))
   245			switch(p->as) {
   246			default:
   247				yyerror("reg: unknown op: %A", p->as);
   248				break;
   249	
   250			/*
   251			 * right side read
   252			 */
   253			case ACMPB:
   254			case ACMPL:
   255			case ACMPW:
   256			case ATESTB:
   257			case ATESTL:
   258			case ATESTW:
   259				for(z=0; z<BITS; z++)
   260					r->use2.b[z] |= bit.b[z];
   261				break;
   262	
   263			/*
   264			 * right side write
   265			 */
   266			case AFSTSW:
   267			case ALEAL:
   268			case ANOP:
   269			case AMOVL:
   270			case AMOVB:
   271			case AMOVW:
   272			case AMOVBLSX:
   273			case AMOVBLZX:
   274			case AMOVBWSX:
   275			case AMOVBWZX:
   276			case AMOVWLSX:
   277			case AMOVWLZX:
   278			case APOPL:
   279				for(z=0; z<BITS; z++)
   280					r->set.b[z] |= bit.b[z];
   281				break;
   282	
   283			/*
   284			 * right side read+write
   285			 */
   286			case AINCB:
   287			case AINCL:
   288			case AINCW:
   289			case ADECB:
   290			case ADECL:
   291			case ADECW:
   292	
   293			case AADDB:
   294			case AADDL:
   295			case AADDW:
   296			case AANDB:
   297			case AANDL:
   298			case AANDW:
   299			case ASUBB:
   300			case ASUBL:
   301			case ASUBW:
   302			case AORB:
   303			case AORL:
   304			case AORW:
   305			case AXORB:
   306			case AXORL:
   307			case AXORW:
   308			case ASALB:
   309			case ASALL:
   310			case ASALW:
   311			case ASARB:
   312			case ASARL:
   313			case ASARW:
   314			case ARCLB:
   315			case ARCLL:
   316			case ARCLW:
   317			case ARCRB:
   318			case ARCRL:
   319			case ARCRW:
   320			case AROLB:
   321			case AROLL:
   322			case AROLW:
   323			case ARORB:
   324			case ARORL:
   325			case ARORW:
   326			case ASHLB:
   327			case ASHLL:
   328			case ASHLW:
   329			case ASHRB:
   330			case ASHRL:
   331			case ASHRW:
   332			case AIMULL:
   333			case AIMULW:
   334			case ANEGB:
   335			case ANEGL:
   336			case ANEGW:
   337			case ANOTB:
   338			case ANOTL:
   339			case ANOTW:
   340			case AADCL:
   341			case ASBBL:
   342	
   343			case ASETCC:
   344			case ASETCS:
   345			case ASETEQ:
   346			case ASETGE:
   347			case ASETGT:
   348			case ASETHI:
   349			case ASETLE:
   350			case ASETLS:
   351			case ASETLT:
   352			case ASETMI:
   353			case ASETNE:
   354			case ASETOC:
   355			case ASETOS:
   356			case ASETPC:
   357			case ASETPL:
   358			case ASETPS:
   359	
   360			case AXCHGB:
   361			case AXCHGW:
   362			case AXCHGL:
   363				for(z=0; z<BITS; z++) {
   364					r->set.b[z] |= bit.b[z];
   365					r->use2.b[z] |= bit.b[z];
   366				}
   367				break;
   368	
   369			/*
   370			 * funny
   371			 */
   372			case AFMOVDP:
   373			case AFMOVFP:
   374			case AFMOVLP:
   375			case AFMOVVP:
   376			case AFMOVWP:
   377			case ACALL:
   378				setaddrs(bit);
   379				break;
   380			}
   381	
   382			switch(p->as) {
   383			case AIMULL:
   384			case AIMULW:
   385				if(p->to.type != D_NONE)
   386					break;
   387	
   388			case AIDIVL:
   389			case AIDIVW:
   390			case ADIVL:
   391			case ADIVW:
   392			case AMULL:
   393			case AMULW:
   394				r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
   395				r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DX);
   396				break;
   397	
   398			case AIDIVB:
   399			case AIMULB:
   400			case ADIVB:
   401			case AMULB:
   402				r->set.b[0] |= RtoB(D_AX);
   403				r->use1.b[0] |= RtoB(D_AX);
   404				break;
   405	
   406			case ACWD:
   407				r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
   408				r->use1.b[0] |= RtoB(D_AX);
   409				break;
   410	
   411			case ACDQ:
   412				r->set.b[0] |= RtoB(D_DX);
   413				r->use1.b[0] |= RtoB(D_AX);
   414				break;
   415	
   416			case AREP:
   417			case AREPN:
   418			case ALOOP:
   419			case ALOOPEQ:
   420			case ALOOPNE:
   421				r->set.b[0] |= RtoB(D_CX);
   422				r->use1.b[0] |= RtoB(D_CX);
   423				break;
   424	
   425			case AMOVSB:
   426			case AMOVSL:
   427			case AMOVSW:
   428			case ACMPSB:
   429			case ACMPSL:
   430			case ACMPSW:
   431				r->set.b[0] |= RtoB(D_SI) | RtoB(D_DI);
   432				r->use1.b[0] |= RtoB(D_SI) | RtoB(D_DI);
   433				break;
   434	
   435			case ASTOSB:
   436			case ASTOSL:
   437			case ASTOSW:
   438			case ASCASB:
   439			case ASCASL:
   440			case ASCASW:
   441				r->set.b[0] |= RtoB(D_DI);
   442				r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DI);
   443				break;
   444	
   445			case AINSB:
   446			case AINSL:
   447			case AINSW:
   448				r->set.b[0] |= RtoB(D_DX) | RtoB(D_DI);
   449				r->use1.b[0] |= RtoB(D_DI);
   450				break;
   451	
   452			case AOUTSB:
   453			case AOUTSL:
   454			case AOUTSW:
   455				r->set.b[0] |= RtoB(D_DI);
   456				r->use1.b[0] |= RtoB(D_DX) | RtoB(D_DI);
   457				break;
   458			}
   459		}
   460		if(firstr == R)
   461			return;
   462	
   463		for(i=0; i<nvar; i++) {
   464			Var *v = var+i;
   465			if(v->addr) {
   466				bit = blsh(i);
   467				for(z=0; z<BITS; z++)
   468					addrs.b[z] |= bit.b[z];
   469			}
   470	
   471	//		print("bit=%2d addr=%d et=%-6E w=%-2d s=%S + %lld\n",
   472	//			i, v->addr, v->etype, v->width, v->sym, v->offset);
   473		}
   474	
   475		if(debug['R'] && debug['v'])
   476			dumpit("pass1", firstr);
   477	
   478		/*
   479		 * pass 2
   480		 * turn branch references to pointers
   481		 * build back pointers
   482		 */
   483		for(r=firstr; r!=R; r=r->link) {
   484			p = r->prog;
   485			if(p->to.type == D_BRANCH) {
   486				if(p->to.branch == P)
   487					fatal("pnil %P", p);
   488				r1 = p->to.branch->reg;
   489				if(r1 == R)
   490					fatal("rnil %P", p);
   491				if(r1 == r) {
   492					//fatal("ref to self %P", p);
   493					continue;
   494				}
   495				r->s2 = r1;
   496				r->p2link = r1->p2;
   497				r1->p2 = r;
   498			}
   499		}
   500	
   501		if(debug['R'] && debug['v'])
   502			dumpit("pass2", firstr);
   503	
   504		/*
   505		 * pass 2.5
   506		 * find looping structure
   507		 */
   508		for(r = firstr; r != R; r = r->link)
   509			r->active = 0;
   510		change = 0;
   511		loopit(firstr, nr);
   512	
   513		if(debug['R'] && debug['v'])
   514			dumpit("pass2.5", firstr);
   515	
   516		/*
   517		 * pass 3
   518		 * iterate propagating usage
   519		 * 	back until flow graph is complete
   520		 */
   521	loop1:
   522		change = 0;
   523		for(r = firstr; r != R; r = r->link)
   524			r->active = 0;
   525		for(r = firstr; r != R; r = r->link)
   526			if(r->prog->as == ARET)
   527				prop(r, zbits, zbits);
   528	loop11:
   529		/* pick up unreachable code */
   530		i = 0;
   531		for(r = firstr; r != R; r = r1) {
   532			r1 = r->link;
   533			if(r1 && r1->active && !r->active) {
   534				prop(r, zbits, zbits);
   535				i = 1;
   536			}
   537		}
   538		if(i)
   539			goto loop11;
   540		if(change)
   541			goto loop1;
   542	
   543		if(debug['R'] && debug['v'])
   544			dumpit("pass3", firstr);
   545	
   546		/*
   547		 * pass 4
   548		 * iterate propagating register/variable synchrony
   549		 * 	forward until graph is complete
   550		 */
   551	loop2:
   552		change = 0;
   553		for(r = firstr; r != R; r = r->link)
   554			r->active = 0;
   555		synch(firstr, zbits);
   556		if(change)
   557			goto loop2;
   558	
   559		if(debug['R'] && debug['v'])
   560			dumpit("pass4", firstr);
   561	
   562		/*
   563		 * pass 4.5
   564		 * move register pseudo-variables into regu.
   565		 */
   566		for(r = firstr; r != R; r = r->link) {
   567			r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
   568	
   569			r->set.b[0] &= ~REGBITS;
   570			r->use1.b[0] &= ~REGBITS;
   571			r->use2.b[0] &= ~REGBITS;
   572			r->refbehind.b[0] &= ~REGBITS;
   573			r->refahead.b[0] &= ~REGBITS;
   574			r->calbehind.b[0] &= ~REGBITS;
   575			r->calahead.b[0] &= ~REGBITS;
   576			r->regdiff.b[0] &= ~REGBITS;
   577			r->act.b[0] &= ~REGBITS;
   578		}
   579	
   580		/*
   581		 * pass 5
   582		 * isolate regions
   583		 * calculate costs (paint1)
   584		 */
   585		r = firstr;
   586		if(r) {
   587			for(z=0; z<BITS; z++)
   588				bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
   589				  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
   590			if(bany(&bit) && !r->refset) {
   591				// should never happen - all variables are preset
   592				if(debug['w'])
   593					print("%L: used and not set: %Q\n", r->prog->lineno, bit);
   594				r->refset = 1;
   595			}
   596		}
   597		for(r = firstr; r != R; r = r->link)
   598			r->act = zbits;
   599		rgp = region;
   600		nregion = 0;
   601		for(r = firstr; r != R; r = r->link) {
   602			for(z=0; z<BITS; z++)
   603				bit.b[z] = r->set.b[z] &
   604				  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
   605			if(bany(&bit) && !r->refset) {
   606				if(debug['w'])
   607					print("%L: set and not used: %Q\n", r->prog->lineno, bit);
   608				r->refset = 1;
   609				excise(r);
   610			}
   611			for(z=0; z<BITS; z++)
   612				bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
   613			while(bany(&bit)) {
   614				i = bnum(bit);
   615				rgp->enter = r;
   616				rgp->varno = i;
   617				change = 0;
   618				paint1(r, i);
   619				bit.b[i/32] &= ~(1L<<(i%32));
   620				if(change <= 0)
   621					continue;
   622				rgp->cost = change;
   623				nregion++;
   624				if(nregion >= NRGN) {
   625					if(debug['R'] && debug['v'])
   626						print("too many regions\n");
   627					goto brk;
   628				}
   629				rgp++;
   630			}
   631		}
   632	brk:
   633		qsort(region, nregion, sizeof(region[0]), rcmp);
   634	
   635		/*
   636		 * pass 6
   637		 * determine used registers (paint2)
   638		 * replace code (paint3)
   639		 */
   640		rgp = region;
   641		for(i=0; i<nregion; i++) {
   642			bit = blsh(rgp->varno);
   643			vreg = paint2(rgp->enter, rgp->varno);
   644			vreg = allreg(vreg, rgp);
   645			if(rgp->regno != 0)
   646				paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
   647			rgp++;
   648		}
   649	
   650		if(debug['R'] && debug['v'])
   651			dumpit("pass6", firstr);
   652	
   653		/*
   654		 * pass 7
   655		 * peep-hole on basic block
   656		 */
   657		if(!debug['R'] || debug['P']) {
   658			peep();
   659		}
   660	
   661		/*
   662		 * eliminate nops
   663		 * free aux structures
   664		 */
   665		for(p=firstp; p!=P; p=p->link) {
   666			while(p->link != P && p->link->as == ANOP)
   667				p->link = p->link->link;
   668			if(p->to.type == D_BRANCH)
   669				while(p->to.branch != P && p->to.branch->as == ANOP)
   670					p->to.branch = p->to.branch->link;
   671		}
   672	
   673		if(r1 != R) {
   674			r1->link = freer;
   675			freer = firstr;
   676		}
   677	
   678		if(debug['R']) {
   679			if(ostats.ncvtreg ||
   680			   ostats.nspill ||
   681			   ostats.nreload ||
   682			   ostats.ndelmov ||
   683			   ostats.nvar ||
   684			   ostats.naddr ||
   685			   0)
   686				print("\nstats\n");
   687	
   688			if(ostats.ncvtreg)
   689				print("	%4d cvtreg\n", ostats.ncvtreg);
   690			if(ostats.nspill)
   691				print("	%4d spill\n", ostats.nspill);
   692			if(ostats.nreload)
   693				print("	%4d reload\n", ostats.nreload);
   694			if(ostats.ndelmov)
   695				print("	%4d delmov\n", ostats.ndelmov);
   696			if(ostats.nvar)
   697				print("	%4d delmov\n", ostats.nvar);
   698			if(ostats.naddr)
   699				print("	%4d delmov\n", ostats.naddr);
   700	
   701			memset(&ostats, 0, sizeof(ostats));
   702		}
   703	}
   704	
   705	/*
   706	 * add mov b,rn
   707	 * just after r
   708	 */
   709	void
   710	addmove(Reg *r, int bn, int rn, int f)
   711	{
   712		Prog *p, *p1;
   713		Adr *a;
   714		Var *v;
   715	
   716		p1 = mal(sizeof(*p1));
   717		clearp(p1);
   718		p1->loc = 9999;
   719	
   720		p = r->prog;
   721		p1->link = p->link;
   722		p->link = p1;
   723		p1->lineno = p->lineno;
   724	
   725		v = var + bn;
   726	
   727		a = &p1->to;
   728		a->sym = v->sym;
   729		a->offset = v->offset;
   730		a->etype = v->etype;
   731		a->type = v->name;
   732		a->gotype = v->gotype;
   733		a->node = v->node;
   734	
   735		// need to clean this up with wptr and
   736		// some of the defaults
   737		p1->as = AMOVL;
   738		switch(v->etype) {
   739		default:
   740			fatal("unknown type\n");
   741		case TINT8:
   742		case TUINT8:
   743		case TBOOL:
   744			p1->as = AMOVB;
   745			break;
   746		case TINT16:
   747		case TUINT16:
   748			p1->as = AMOVW;
   749			break;
   750		case TINT:
   751		case TUINT:
   752		case TINT32:
   753		case TUINT32:
   754		case TPTR32:
   755			break;
   756		}
   757	
   758		p1->from.type = rn;
   759		if(!f) {
   760			p1->from = *a;
   761			*a = zprog.from;
   762			a->type = rn;
   763			if(v->etype == TUINT8)
   764				p1->as = AMOVB;
   765			if(v->etype == TUINT16)
   766				p1->as = AMOVW;
   767		}
   768		if(debug['R'] && debug['v'])
   769			print("%P ===add=== %P\n", p, p1);
   770		ostats.nspill++;
   771	}
   772	
   773	uint32
   774	doregbits(int r)
   775	{
   776		uint32 b;
   777	
   778		b = 0;
   779		if(r >= D_INDIR)
   780			r -= D_INDIR;
   781		if(r >= D_AX && r <= D_DI)
   782			b |= RtoB(r);
   783		else
   784		if(r >= D_AL && r <= D_BL)
   785			b |= RtoB(r-D_AL+D_AX);
   786		else
   787		if(r >= D_AH && r <= D_BH)
   788			b |= RtoB(r-D_AH+D_AX);
   789		return b;
   790	}
   791	
   792	static int
   793	overlap(int32 o1, int w1, int32 o2, int w2)
   794	{
   795		int32 t1, t2;
   796	
   797		t1 = o1+w1;
   798		t2 = o2+w2;
   799	
   800		if(!(t1 > o2 && t2 > o1))
   801			return 0;
   802	
   803		return 1;
   804	}
   805	
   806	Bits
   807	mkvar(Reg *r, Adr *a)
   808	{
   809		Var *v;
   810		int i, t, n, et, z, w, flag, regu;
   811		int32 o;
   812		Bits bit;
   813		Sym *s;
   814	
   815		/*
   816		 * mark registers used
   817		 */
   818		t = a->type;
   819		if(t == D_NONE)
   820			goto none;
   821	
   822		if(r != R)
   823			r->use1.b[0] |= doregbits(a->index);
   824	
   825		switch(t) {
   826		default:
   827			regu = doregbits(t);
   828			if(regu == 0)
   829				goto none;
   830			bit = zbits;
   831			bit.b[0] = regu;
   832			return bit;
   833	
   834		case D_ADDR:
   835			a->type = a->index;
   836			bit = mkvar(r, a);
   837			setaddrs(bit);
   838			a->type = t;
   839			ostats.naddr++;
   840			goto none;
   841	
   842		case D_EXTERN:
   843		case D_STATIC:
   844		case D_PARAM:
   845		case D_AUTO:
   846			n = t;
   847			break;
   848		}
   849	
   850		s = a->sym;
   851		if(s == S)
   852			goto none;
   853		if(s->name[0] == '.')
   854			goto none;
   855		et = a->etype;
   856		o = a->offset;
   857		w = a->width;
   858	
   859		flag = 0;
   860		for(i=0; i<nvar; i++) {
   861			v = var+i;
   862			if(v->sym == s && v->name == n) {
   863				if(v->offset == o)
   864				if(v->etype == et)
   865				if(v->width == w)
   866					return blsh(i);
   867	
   868				// if they overlap, disable both
   869				if(overlap(v->offset, v->width, o, w)) {
   870					if(debug['R'])
   871						print("disable %s\n", v->sym->name);
   872					v->addr = 1;
   873					flag = 1;
   874				}
   875			}
   876		}
   877	
   878		switch(et) {
   879		case 0:
   880		case TFUNC:
   881			goto none;
   882		}
   883	
   884		if(nvar >= NVAR) {
   885			if(debug['w'] > 1 && s)
   886				fatal("variable not optimized: %D", a);
   887			goto none;
   888		}
   889	
   890		i = nvar;
   891		nvar++;
   892		v = var+i;
   893		v->sym = s;
   894		v->offset = o;
   895		v->name = n;
   896		v->gotype = a->gotype;
   897		v->etype = et;
   898		v->width = w;
   899		v->addr = flag;		// funny punning
   900		v->node = a->node;
   901	
   902		if(debug['R'])
   903			print("bit=%2d et=%2d w=%d+%d %S %D flag=%d\n", i, et, o, w, s, a, v->addr);
   904		ostats.nvar++;
   905	
   906		bit = blsh(i);
   907		if(n == D_EXTERN || n == D_STATIC)
   908			for(z=0; z<BITS; z++)
   909				externs.b[z] |= bit.b[z];
   910		if(n == D_PARAM)
   911			for(z=0; z<BITS; z++)
   912				params.b[z] |= bit.b[z];
   913	
   914		return bit;
   915	
   916	none:
   917		return zbits;
   918	}
   919	
   920	void
   921	prop(Reg *r, Bits ref, Bits cal)
   922	{
   923		Reg *r1, *r2;
   924		int z;
   925	
   926		for(r1 = r; r1 != R; r1 = r1->p1) {
   927			for(z=0; z<BITS; z++) {
   928				ref.b[z] |= r1->refahead.b[z];
   929				if(ref.b[z] != r1->refahead.b[z]) {
   930					r1->refahead.b[z] = ref.b[z];
   931					change++;
   932				}
   933				cal.b[z] |= r1->calahead.b[z];
   934				if(cal.b[z] != r1->calahead.b[z]) {
   935					r1->calahead.b[z] = cal.b[z];
   936					change++;
   937				}
   938			}
   939			switch(r1->prog->as) {
   940			case ACALL:
   941				if(noreturn(r1->prog))
   942					break;
   943				for(z=0; z<BITS; z++) {
   944					cal.b[z] |= ref.b[z] | externs.b[z];
   945					ref.b[z] = 0;
   946				}
   947				break;
   948	
   949			case ATEXT:
   950				for(z=0; z<BITS; z++) {
   951					cal.b[z] = 0;
   952					ref.b[z] = 0;
   953				}
   954				break;
   955	
   956			case ARET:
   957				for(z=0; z<BITS; z++) {
   958					cal.b[z] = externs.b[z] | ovar.b[z];
   959					ref.b[z] = 0;
   960				}
   961				break;
   962			}
   963			for(z=0; z<BITS; z++) {
   964				ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
   965					r1->use1.b[z] | r1->use2.b[z];
   966				cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
   967				r1->refbehind.b[z] = ref.b[z];
   968				r1->calbehind.b[z] = cal.b[z];
   969			}
   970			if(r1->active)
   971				break;
   972			r1->active = 1;
   973		}
   974		for(; r != r1; r = r->p1)
   975			for(r2 = r->p2; r2 != R; r2 = r2->p2link)
   976				prop(r2, r->refbehind, r->calbehind);
   977	}
   978	
   979	/*
   980	 * find looping structure
   981	 *
   982	 * 1) find reverse postordering
   983	 * 2) find approximate dominators,
   984	 *	the actual dominators if the flow graph is reducible
   985	 *	otherwise, dominators plus some other non-dominators.
   986	 *	See Matthew S. Hecht and Jeffrey D. Ullman,
   987	 *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
   988	 *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
   989	 *	Oct. 1-3, 1973, pp.  207-217.
   990	 * 3) find all nodes with a predecessor dominated by the current node.
   991	 *	such a node is a loop head.
   992	 *	recursively, all preds with a greater rpo number are in the loop
   993	 */
   994	int32
   995	postorder(Reg *r, Reg **rpo2r, int32 n)
   996	{
   997		Reg *r1;
   998	
   999		r->rpo = 1;
  1000		r1 = r->s1;
  1001		if(r1 && !r1->rpo)
  1002			n = postorder(r1, rpo2r, n);
  1003		r1 = r->s2;
  1004		if(r1 && !r1->rpo)
  1005			n = postorder(r1, rpo2r, n);
  1006		rpo2r[n] = r;
  1007		n++;
  1008		return n;
  1009	}
  1010	
  1011	int32
  1012	rpolca(int32 *idom, int32 rpo1, int32 rpo2)
  1013	{
  1014		int32 t;
  1015	
  1016		if(rpo1 == -1)
  1017			return rpo2;
  1018		while(rpo1 != rpo2){
  1019			if(rpo1 > rpo2){
  1020				t = rpo2;
  1021				rpo2 = rpo1;
  1022				rpo1 = t;
  1023			}
  1024			while(rpo1 < rpo2){
  1025				t = idom[rpo2];
  1026				if(t >= rpo2)
  1027					fatal("bad idom");
  1028				rpo2 = t;
  1029			}
  1030		}
  1031		return rpo1;
  1032	}
  1033	
  1034	int
  1035	doms(int32 *idom, int32 r, int32 s)
  1036	{
  1037		while(s > r)
  1038			s = idom[s];
  1039		return s == r;
  1040	}
  1041	
  1042	int
  1043	loophead(int32 *idom, Reg *r)
  1044	{
  1045		int32 src;
  1046	
  1047		src = r->rpo;
  1048		if(r->p1 != R && doms(idom, src, r->p1->rpo))
  1049			return 1;
  1050		for(r = r->p2; r != R; r = r->p2link)
  1051			if(doms(idom, src, r->rpo))
  1052				return 1;
  1053		return 0;
  1054	}
  1055	
  1056	void
  1057	loopmark(Reg **rpo2r, int32 head, Reg *r)
  1058	{
  1059		if(r->rpo < head || r->active == head)
  1060			return;
  1061		r->active = head;
  1062		r->loop += LOOP;
  1063		if(r->p1 != R)
  1064			loopmark(rpo2r, head, r->p1);
  1065		for(r = r->p2; r != R; r = r->p2link)
  1066			loopmark(rpo2r, head, r);
  1067	}
  1068	
  1069	void
  1070	loopit(Reg *r, int32 nr)
  1071	{
  1072		Reg *r1;
  1073		int32 i, d, me;
  1074	
  1075		if(nr > maxnr) {
  1076			rpo2r = mal(nr * sizeof(Reg*));
  1077			idom = mal(nr * sizeof(int32));
  1078			maxnr = nr;
  1079		}
  1080	
  1081		d = postorder(r, rpo2r, 0);
  1082		if(d > nr)
  1083			fatal("too many reg nodes %d %d", d, nr);
  1084		nr = d;
  1085		for(i = 0; i < nr / 2; i++) {
  1086			r1 = rpo2r[i];
  1087			rpo2r[i] = rpo2r[nr - 1 - i];
  1088			rpo2r[nr - 1 - i] = r1;
  1089		}
  1090		for(i = 0; i < nr; i++)
  1091			rpo2r[i]->rpo = i;
  1092	
  1093		idom[0] = 0;
  1094		for(i = 0; i < nr; i++) {
  1095			r1 = rpo2r[i];
  1096			me = r1->rpo;
  1097			d = -1;
  1098			if(r1->p1 != R && r1->p1->rpo < me)
  1099				d = r1->p1->rpo;
  1100			for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  1101				if(r1->rpo < me)
  1102					d = rpolca(idom, d, r1->rpo);
  1103			idom[i] = d;
  1104		}
  1105	
  1106		for(i = 0; i < nr; i++) {
  1107			r1 = rpo2r[i];
  1108			r1->loop++;
  1109			if(r1->p2 != R && loophead(idom, r1))
  1110				loopmark(rpo2r, i, r1);
  1111		}
  1112	}
  1113	
  1114	void
  1115	synch(Reg *r, Bits dif)
  1116	{
  1117		Reg *r1;
  1118		int z;
  1119	
  1120		for(r1 = r; r1 != R; r1 = r1->s1) {
  1121			for(z=0; z<BITS; z++) {
  1122				dif.b[z] = (dif.b[z] &
  1123					~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  1124						r1->set.b[z] | r1->regdiff.b[z];
  1125				if(dif.b[z] != r1->regdiff.b[z]) {
  1126					r1->regdiff.b[z] = dif.b[z];
  1127					change++;
  1128				}
  1129			}
  1130			if(r1->active)
  1131				break;
  1132			r1->active = 1;
  1133			for(z=0; z<BITS; z++)
  1134				dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  1135			if(r1->s2 != R)
  1136				synch(r1->s2, dif);
  1137		}
  1138	}
  1139	
  1140	uint32
  1141	allreg(uint32 b, Rgn *r)
  1142	{
  1143		Var *v;
  1144		int i;
  1145	
  1146		v = var + r->varno;
  1147		r->regno = 0;
  1148		switch(v->etype) {
  1149	
  1150		default:
  1151			fatal("unknown etype %d/%E", bitno(b), v->etype);
  1152			break;
  1153	
  1154		case TINT8:
  1155		case TUINT8:
  1156		case TINT16:
  1157		case TUINT16:
  1158		case TINT32:
  1159		case TUINT32:
  1160		case TINT64:
  1161		case TINT:
  1162		case TUINT:
  1163		case TUINTPTR:
  1164		case TBOOL:
  1165		case TPTR32:
  1166			i = BtoR(~b);
  1167			if(i && r->cost > 0) {
  1168				r->regno = i;
  1169				return RtoB(i);
  1170			}
  1171			break;
  1172	
  1173		case TFLOAT32:
  1174		case TFLOAT64:
  1175			break;
  1176		}
  1177		return 0;
  1178	}
  1179	
  1180	void
  1181	paint1(Reg *r, int bn)
  1182	{
  1183		Reg *r1;
  1184		Prog *p;
  1185		int z;
  1186		uint32 bb;
  1187	
  1188		z = bn/32;
  1189		bb = 1L<<(bn%32);
  1190		if(r->act.b[z] & bb)
  1191			return;
  1192		for(;;) {
  1193			if(!(r->refbehind.b[z] & bb))
  1194				break;
  1195			r1 = r->p1;
  1196			if(r1 == R)
  1197				break;
  1198			if(!(r1->refahead.b[z] & bb))
  1199				break;
  1200			if(r1->act.b[z] & bb)
  1201				break;
  1202			r = r1;
  1203		}
  1204	
  1205		if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  1206			change -= CLOAD * r->loop;
  1207		}
  1208		for(;;) {
  1209			r->act.b[z] |= bb;
  1210			p = r->prog;
  1211	
  1212			if(r->use1.b[z] & bb) {
  1213				change += CREF * r->loop;
  1214				if(p->as == AFMOVL || p->as == AFMOVW)
  1215					if(BtoR(bb) != D_F0)
  1216						change = -CINF;
  1217			}
  1218	
  1219			if((r->use2.b[z]|r->set.b[z]) & bb) {
  1220				change += CREF * r->loop;
  1221				if(p->as == AFMOVL || p->as == AFMOVW)
  1222					if(BtoR(bb) != D_F0)
  1223						change = -CINF;
  1224			}
  1225	
  1226			if(STORE(r) & r->regdiff.b[z] & bb) {
  1227				change -= CLOAD * r->loop;
  1228				if(p->as == AFMOVL || p->as == AFMOVW)
  1229					if(BtoR(bb) != D_F0)
  1230						change = -CINF;
  1231			}
  1232	
  1233			if(r->refbehind.b[z] & bb)
  1234				for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1235					if(r1->refahead.b[z] & bb)
  1236						paint1(r1, bn);
  1237	
  1238			if(!(r->refahead.b[z] & bb))
  1239				break;
  1240			r1 = r->s2;
  1241			if(r1 != R)
  1242				if(r1->refbehind.b[z] & bb)
  1243					paint1(r1, bn);
  1244			r = r->s1;
  1245			if(r == R)
  1246				break;
  1247			if(r->act.b[z] & bb)
  1248				break;
  1249			if(!(r->refbehind.b[z] & bb))
  1250				break;
  1251		}
  1252	}
  1253	
  1254	uint32
  1255	regset(Reg *r, uint32 bb)
  1256	{
  1257		uint32 b, set;
  1258		Adr v;
  1259		int c;
  1260	
  1261		set = 0;
  1262		v = zprog.from;
  1263		while(b = bb & ~(bb-1)) {
  1264			v.type = BtoR(b);
  1265			c = copyu(r->prog, &v, A);
  1266			if(c == 3)
  1267				set |= b;
  1268			bb &= ~b;
  1269		}
  1270		return set;
  1271	}
  1272	
  1273	uint32
  1274	reguse(Reg *r, uint32 bb)
  1275	{
  1276		uint32 b, set;
  1277		Adr v;
  1278		int c;
  1279	
  1280		set = 0;
  1281		v = zprog.from;
  1282		while(b = bb & ~(bb-1)) {
  1283			v.type = BtoR(b);
  1284			c = copyu(r->prog, &v, A);
  1285			if(c == 1 || c == 2 || c == 4)
  1286				set |= b;
  1287			bb &= ~b;
  1288		}
  1289		return set;
  1290	}
  1291	
  1292	uint32
  1293	paint2(Reg *r, int bn)
  1294	{
  1295		Reg *r1;
  1296		int z;
  1297		uint32 bb, vreg, x;
  1298	
  1299		z = bn/32;
  1300		bb = 1L << (bn%32);
  1301		vreg = regbits;
  1302		if(!(r->act.b[z] & bb))
  1303			return vreg;
  1304		for(;;) {
  1305			if(!(r->refbehind.b[z] & bb))
  1306				break;
  1307			r1 = r->p1;
  1308			if(r1 == R)
  1309				break;
  1310			if(!(r1->refahead.b[z] & bb))
  1311				break;
  1312			if(!(r1->act.b[z] & bb))
  1313				break;
  1314			r = r1;
  1315		}
  1316		for(;;) {
  1317			r->act.b[z] &= ~bb;
  1318	
  1319			vreg |= r->regu;
  1320	
  1321			if(r->refbehind.b[z] & bb)
  1322				for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1323					if(r1->refahead.b[z] & bb)
  1324						vreg |= paint2(r1, bn);
  1325	
  1326			if(!(r->refahead.b[z] & bb))
  1327				break;
  1328			r1 = r->s2;
  1329			if(r1 != R)
  1330				if(r1->refbehind.b[z] & bb)
  1331					vreg |= paint2(r1, bn);
  1332			r = r->s1;
  1333			if(r == R)
  1334				break;
  1335			if(!(r->act.b[z] & bb))
  1336				break;
  1337			if(!(r->refbehind.b[z] & bb))
  1338				break;
  1339		}
  1340	
  1341		bb = vreg;
  1342		for(; r; r=r->s1) {
  1343			x = r->regu & ~bb;
  1344			if(x) {
  1345				vreg |= reguse(r, x);
  1346				bb |= regset(r, x);
  1347			}
  1348		}
  1349		return vreg;
  1350	}
  1351	
  1352	void
  1353	paint3(Reg *r, int bn, int32 rb, int rn)
  1354	{
  1355		Reg *r1;
  1356		Prog *p;
  1357		int z;
  1358		uint32 bb;
  1359	
  1360		z = bn/32;
  1361		bb = 1L << (bn%32);
  1362		if(r->act.b[z] & bb)
  1363			return;
  1364		for(;;) {
  1365			if(!(r->refbehind.b[z] & bb))
  1366				break;
  1367			r1 = r->p1;
  1368			if(r1 == R)
  1369				break;
  1370			if(!(r1->refahead.b[z] & bb))
  1371				break;
  1372			if(r1->act.b[z] & bb)
  1373				break;
  1374			r = r1;
  1375		}
  1376	
  1377		if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  1378			addmove(r, bn, rn, 0);
  1379		for(;;) {
  1380			r->act.b[z] |= bb;
  1381			p = r->prog;
  1382	
  1383			if(r->use1.b[z] & bb) {
  1384				if(debug['R'] && debug['v'])
  1385					print("%P", p);
  1386				addreg(&p->from, rn);
  1387				if(debug['R'] && debug['v'])
  1388					print(" ===change== %P\n", p);
  1389			}
  1390			if((r->use2.b[z]|r->set.b[z]) & bb) {
  1391				if(debug['R'] && debug['v'])
  1392					print("%P", p);
  1393				addreg(&p->to, rn);
  1394				if(debug['R'] && debug['v'])
  1395					print(" ===change== %P\n", p);
  1396			}
  1397	
  1398			if(STORE(r) & r->regdiff.b[z] & bb)
  1399				addmove(r, bn, rn, 1);
  1400			r->regu |= rb;
  1401	
  1402			if(r->refbehind.b[z] & bb)
  1403				for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  1404					if(r1->refahead.b[z] & bb)
  1405						paint3(r1, bn, rb, rn);
  1406	
  1407			if(!(r->refahead.b[z] & bb))
  1408				break;
  1409			r1 = r->s2;
  1410			if(r1 != R)
  1411				if(r1->refbehind.b[z] & bb)
  1412					paint3(r1, bn, rb, rn);
  1413			r = r->s1;
  1414			if(r == R)
  1415				break;
  1416			if(r->act.b[z] & bb)
  1417				break;
  1418			if(!(r->refbehind.b[z] & bb))
  1419				break;
  1420		}
  1421	}
  1422	
  1423	void
  1424	addreg(Adr *a, int rn)
  1425	{
  1426	
  1427		a->sym = 0;
  1428		a->offset = 0;
  1429		a->type = rn;
  1430	
  1431		ostats.ncvtreg++;
  1432	}
  1433	
  1434	int32
  1435	RtoB(int r)
  1436	{
  1437	
  1438		if(r < D_AX || r > D_DI)
  1439			return 0;
  1440		return 1L << (r-D_AX);
  1441	}
  1442	
  1443	int
  1444	BtoR(int32 b)
  1445	{
  1446	
  1447		b &= 0xffL;
  1448		if(b == 0)
  1449			return 0;
  1450		return bitno(b) + D_AX;
  1451	}
  1452	
  1453	void
  1454	dumpone(Reg *r)
  1455	{
  1456		int z;
  1457		Bits bit;
  1458	
  1459		print("%d:%P", r->loop, r->prog);
  1460		for(z=0; z<BITS; z++)
  1461			bit.b[z] =
  1462				r->set.b[z] |
  1463				r->use1.b[z] |
  1464				r->use2.b[z] |
  1465				r->refbehind.b[z] |
  1466				r->refahead.b[z] |
  1467				r->calbehind.b[z] |
  1468				r->calahead.b[z] |
  1469				r->regdiff.b[z] |
  1470				r->act.b[z] |
  1471					0;
  1472		if(bany(&bit)) {
  1473			print("\t");
  1474			if(bany(&r->set))
  1475				print(" s:%Q", r->set);
  1476			if(bany(&r->use1))
  1477				print(" u1:%Q", r->use1);
  1478			if(bany(&r->use2))
  1479				print(" u2:%Q", r->use2);
  1480			if(bany(&r->refbehind))
  1481				print(" rb:%Q ", r->refbehind);
  1482			if(bany(&r->refahead))
  1483				print(" ra:%Q ", r->refahead);
  1484			if(bany(&r->calbehind))
  1485				print("cb:%Q ", r->calbehind);
  1486			if(bany(&r->calahead))
  1487				print(" ca:%Q ", r->calahead);
  1488			if(bany(&r->regdiff))
  1489				print(" d:%Q ", r->regdiff);
  1490			if(bany(&r->act))
  1491				print(" a:%Q ", r->act);
  1492		}
  1493		print("\n");
  1494	}
  1495	
  1496	void
  1497	dumpit(char *str, Reg *r0)
  1498	{
  1499		Reg *r, *r1;
  1500	
  1501		print("\n%s\n", str);
  1502		for(r = r0; r != R; r = r->link) {
  1503			dumpone(r);
  1504			r1 = r->p2;
  1505			if(r1 != R) {
  1506				print("	pred:");
  1507				for(; r1 != R; r1 = r1->p2link)
  1508					print(" %.4ud", r1->prog->loc);
  1509				print("\n");
  1510			}
  1511	//		r1 = r->s1;
  1512	//		if(r1 != R) {
  1513	//			print("	succ:");
  1514	//			for(; r1 != R; r1 = r1->s1)
  1515	//				print(" %.4ud", r1->prog->loc);
  1516	//			print("\n");
  1517	//		}
  1518		}
  1519	}
  1520	
  1521	static Sym*	symlist[10];
  1522	
  1523	int
  1524	noreturn(Prog *p)
  1525	{
  1526		Sym *s;
  1527		int i;
  1528	
  1529		if(symlist[0] == S) {
  1530			symlist[0] = pkglookup("panicindex", runtimepkg);
  1531			symlist[1] = pkglookup("panicslice", runtimepkg);
  1532			symlist[2] = pkglookup("throwinit", runtimepkg);
  1533			symlist[3] = pkglookup("panic", runtimepkg);
  1534			symlist[4] = pkglookup("panicwrap", runtimepkg);
  1535		}
  1536	
  1537		s = p->to.sym;
  1538		if(s == S)
  1539			return 0;
  1540		for(i=0; symlist[i]!=S; i++)
  1541			if(s == symlist[i])
  1542				return 1;
  1543		return 0;
  1544	}

release.r60.3. Except as noted, this content is licensed under a Creative Commons Attribution 3.0 License.