The Go Programming Language

Text file src/cmd/6l/span.c

     1	// Inferno utils/6l/span.c
     2	// http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3	//
     4	//	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5	//	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6	//	Portions Copyright © 1997-1999 Vita Nuova Limited
     7	//	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8	//	Portions Copyright © 2004,2006 Bruce Ellis
     9	//	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10	//	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11	//	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12	//
    13	// Permission is hereby granted, free of charge, to any person obtaining a copy
    14	// of this software and associated documentation files (the "Software"), to deal
    15	// in the Software without restriction, including without limitation the rights
    16	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17	// copies of the Software, and to permit persons to whom the Software is
    18	// furnished to do so, subject to the following conditions:
    19	//
    20	// The above copyright notice and this permission notice shall be included in
    21	// all copies or substantial portions of the Software.
    22	//
    23	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29	// THE SOFTWARE.
    30	
    31	// Instruction layout.
    32	
    33	#include	"l.h"
    34	#include	"../ld/lib.h"
    35	
    36	static int	rexflag;
    37	static int	asmode;
    38	static vlong	vaddr(Adr*, Reloc*);
    39	
    40	void
    41	span1(Sym *s)
    42	{
    43		Prog *p, *q;
    44		int32 c, v, loop;
    45		uchar *bp;
    46		int n, m, i;
    47	
    48		cursym = s;
    49		
    50		if(s->p != nil)
    51			return;
    52	
    53		for(p = s->text; p != P; p = p->link) {
    54			p->back = 2;	// use short branches first time through
    55			if((q = p->pcond) != P && (q->back & 2))
    56				p->back |= 1;	// backward jump
    57	
    58			if(p->as == AADJSP) {
    59				p->to.type = D_SP;
    60				v = -p->from.offset;
    61				p->from.offset = v;
    62				p->as = p->mode != 64? AADDL: AADDQ;
    63				if(v < 0) {
    64					p->as = p->mode != 64? ASUBL: ASUBQ;
    65					v = -v;
    66					p->from.offset = v;
    67				}
    68				if(v == 0)
    69					p->as = ANOP;
    70			}
    71		}
    72		
    73		n = 0;
    74		do {
    75			loop = 0;
    76			memset(s->r, 0, s->nr*sizeof s->r[0]);
    77			s->nr = 0;
    78			s->np = 0;
    79			c = 0;
    80			for(p = s->text; p != P; p = p->link) {
    81				p->pc = c;
    82	
    83				// process forward jumps to p
    84				for(q = p->comefrom; q != P; q = q->forwd) {
    85					v = p->pc - (q->pc + q->mark);
    86					if(q->back & 2)	{	// short
    87						if(v > 127) {
    88							loop++;
    89							q->back ^= 2;
    90						}
    91						s->p[q->pc+1] = v;
    92					} else {
    93						bp = s->p + q->pc + q->mark - 4;
    94						*bp++ = v;
    95						*bp++ = v>>8;
    96						*bp++ = v>>16;
    97						*bp = v>>24;
    98					}	
    99				}
   100				p->comefrom = P;
   101	
   102				asmins(p);
   103				p->pc = c;
   104				m = andptr-and;
   105				symgrow(s, p->pc+m);
   106				memmove(s->p+p->pc, and, m);
   107				p->mark = m;
   108				c += m;
   109			}
   110			if(++n > 20) {
   111				diag("span must be looping");
   112				errorexit();
   113			}
   114		} while(loop);
   115		s->size = c;
   116	
   117		if(debug['a'] > 1) {
   118			print("span1 %s %lld (%d tries)\n %.6ux", s->name, s->size, n, 0);
   119			for(i=0; i<s->np; i++) {
   120				print(" %.2ux", s->p[i]);
   121				if(i%16 == 15)
   122					print("\n  %.6ux", i+1);
   123			}
   124			if(i%16)
   125				print("\n");
   126		
   127			for(i=0; i<s->nr; i++) {
   128				Reloc *r;
   129				
   130				r = &s->r[i];
   131				print(" rel %#.4ux/%d %s%+lld\n", r->off, r->siz, r->sym->name, r->add);
   132			}
   133		}
   134	}
   135	
   136	void
   137	span(void)
   138	{
   139		Prog *p, *q;
   140		int32 v;
   141		int n;
   142	
   143		if(debug['v'])
   144			Bprint(&bso, "%5.2f span\n", cputime());
   145	
   146		// NOTE(rsc): If we get rid of the globals we should
   147		// be able to parallelize these iterations.
   148		for(cursym = textp; cursym != nil; cursym = cursym->next) {
   149			if(cursym->p != nil)
   150				continue;
   151			// TODO: move into span1
   152			for(p = cursym->text; p != P; p = p->link) {
   153				n = 0;
   154				if(p->to.type == D_BRANCH)
   155					if(p->pcond == P)
   156						p->pcond = p;
   157				if((q = p->pcond) != P)
   158					if(q->back != 2)
   159						n = 1;
   160				p->back = n;
   161				if(p->as == AADJSP) {
   162					p->to.type = D_SP;
   163					v = -p->from.offset;
   164					p->from.offset = v;
   165					p->as = p->mode != 64? AADDL: AADDQ;
   166					if(v < 0) {
   167						p->as = p->mode != 64? ASUBL: ASUBQ;
   168						v = -v;
   169						p->from.offset = v;
   170					}
   171					if(v == 0)
   172						p->as = ANOP;
   173				}
   174			}
   175			span1(cursym);
   176		}
   177	}
   178	
   179	void
   180	xdefine(char *p, int t, vlong v)
   181	{
   182		Sym *s;
   183	
   184		s = lookup(p, 0);
   185		s->type = t;
   186		s->value = v;
   187		s->reachable = 1;
   188		s->special = 1;
   189	}
   190	
   191	void
   192	instinit(void)
   193	{
   194		int c, i;
   195	
   196		for(i=1; optab[i].as; i++) {
   197			c = optab[i].as;
   198			if(opindex[c] != nil) {
   199				diag("phase error in optab: %d (%A)", i, c);
   200				errorexit();
   201			}
   202			opindex[c] = &optab[i];
   203		}
   204	
   205		for(i=0; i<Ymax; i++)
   206			ycover[i*Ymax + i] = 1;
   207	
   208		ycover[Yi0*Ymax + Yi8] = 1;
   209		ycover[Yi1*Ymax + Yi8] = 1;
   210	
   211		ycover[Yi0*Ymax + Ys32] = 1;
   212		ycover[Yi1*Ymax + Ys32] = 1;
   213		ycover[Yi8*Ymax + Ys32] = 1;
   214	
   215		ycover[Yi0*Ymax + Yi32] = 1;
   216		ycover[Yi1*Ymax + Yi32] = 1;
   217		ycover[Yi8*Ymax + Yi32] = 1;
   218		ycover[Ys32*Ymax + Yi32] = 1;
   219	
   220		ycover[Yi0*Ymax + Yi64] = 1;
   221		ycover[Yi1*Ymax + Yi64] = 1;
   222		ycover[Yi8*Ymax + Yi64] = 1;
   223		ycover[Ys32*Ymax + Yi64] = 1;
   224		ycover[Yi32*Ymax + Yi64] = 1;
   225	
   226		ycover[Yal*Ymax + Yrb] = 1;
   227		ycover[Ycl*Ymax + Yrb] = 1;
   228		ycover[Yax*Ymax + Yrb] = 1;
   229		ycover[Ycx*Ymax + Yrb] = 1;
   230		ycover[Yrx*Ymax + Yrb] = 1;
   231		ycover[Yrl*Ymax + Yrb] = 1;
   232	
   233		ycover[Ycl*Ymax + Ycx] = 1;
   234	
   235		ycover[Yax*Ymax + Yrx] = 1;
   236		ycover[Ycx*Ymax + Yrx] = 1;
   237	
   238		ycover[Yax*Ymax + Yrl] = 1;
   239		ycover[Ycx*Ymax + Yrl] = 1;
   240		ycover[Yrx*Ymax + Yrl] = 1;
   241	
   242		ycover[Yf0*Ymax + Yrf] = 1;
   243	
   244		ycover[Yal*Ymax + Ymb] = 1;
   245		ycover[Ycl*Ymax + Ymb] = 1;
   246		ycover[Yax*Ymax + Ymb] = 1;
   247		ycover[Ycx*Ymax + Ymb] = 1;
   248		ycover[Yrx*Ymax + Ymb] = 1;
   249		ycover[Yrb*Ymax + Ymb] = 1;
   250		ycover[Yrl*Ymax + Ymb] = 1;
   251		ycover[Ym*Ymax + Ymb] = 1;
   252	
   253		ycover[Yax*Ymax + Yml] = 1;
   254		ycover[Ycx*Ymax + Yml] = 1;
   255		ycover[Yrx*Ymax + Yml] = 1;
   256		ycover[Yrl*Ymax + Yml] = 1;
   257		ycover[Ym*Ymax + Yml] = 1;
   258	
   259		ycover[Yax*Ymax + Ymm] = 1;
   260		ycover[Ycx*Ymax + Ymm] = 1;
   261		ycover[Yrx*Ymax + Ymm] = 1;
   262		ycover[Yrl*Ymax + Ymm] = 1;
   263		ycover[Ym*Ymax + Ymm] = 1;
   264		ycover[Ymr*Ymax + Ymm] = 1;
   265	
   266		ycover[Yax*Ymax + Yxm] = 1;
   267		ycover[Ycx*Ymax + Yxm] = 1;
   268		ycover[Yrx*Ymax + Yxm] = 1;
   269		ycover[Yrl*Ymax + Yxm] = 1;
   270		ycover[Ym*Ymax + Yxm] = 1;
   271		ycover[Yxr*Ymax + Yxm] = 1;
   272	
   273		for(i=0; i<D_NONE; i++) {
   274			reg[i] = -1;
   275			if(i >= D_AL && i <= D_R15B) {
   276				reg[i] = (i-D_AL) & 7;
   277				if(i >= D_SPB && i <= D_DIB)
   278					regrex[i] = 0x40;
   279				if(i >= D_R8B && i <= D_R15B)
   280					regrex[i] = Rxr | Rxx | Rxb;
   281			}
   282			if(i >= D_AH && i<= D_BH)
   283				reg[i] = 4 + ((i-D_AH) & 7);
   284			if(i >= D_AX && i <= D_R15) {
   285				reg[i] = (i-D_AX) & 7;
   286				if(i >= D_R8)
   287					regrex[i] = Rxr | Rxx | Rxb;
   288			}
   289			if(i >= D_F0 && i <= D_F0+7)
   290				reg[i] = (i-D_F0) & 7;
   291			if(i >= D_M0 && i <= D_M0+7)
   292				reg[i] = (i-D_M0) & 7;
   293			if(i >= D_X0 && i <= D_X0+15) {
   294				reg[i] = (i-D_X0) & 7;
   295				if(i >= D_X0+8)
   296					regrex[i] = Rxr | Rxx | Rxb;
   297			}
   298			if(i >= D_CR+8 && i <= D_CR+15)
   299				regrex[i] = Rxr;
   300		}
   301	}
   302	
   303	int
   304	prefixof(Adr *a)
   305	{
   306		switch(a->type) {
   307		case D_INDIR+D_CS:
   308			return 0x2e;
   309		case D_INDIR+D_DS:
   310			return 0x3e;
   311		case D_INDIR+D_ES:
   312			return 0x26;
   313		case D_INDIR+D_FS:
   314			return 0x64;
   315		case D_INDIR+D_GS:
   316			return 0x65;
   317		}
   318		return 0;
   319	}
   320	
   321	int
   322	oclass(Adr *a)
   323	{
   324		vlong v;
   325		int32 l;
   326	
   327		if(a->type >= D_INDIR || a->index != D_NONE) {
   328			if(a->index != D_NONE && a->scale == 0) {
   329				if(a->type == D_ADDR) {
   330					switch(a->index) {
   331					case D_EXTERN:
   332					case D_STATIC:
   333						return Yi32;	/* TO DO: Yi64 */
   334					case D_AUTO:
   335					case D_PARAM:
   336						return Yiauto;
   337					}
   338					return Yxxx;
   339				}
   340				return Ycol;
   341			}
   342			return Ym;
   343		}
   344		switch(a->type)
   345		{
   346		case D_AL:
   347			return Yal;
   348	
   349		case D_AX:
   350			return Yax;
   351	
   352	/*
   353		case D_SPB:
   354	*/
   355		case D_BPB:
   356		case D_SIB:
   357		case D_DIB:
   358		case D_R8B:
   359		case D_R9B:
   360		case D_R10B:
   361		case D_R11B:
   362		case D_R12B:
   363		case D_R13B:
   364		case D_R14B:
   365		case D_R15B:
   366			if(asmode != 64)
   367				return Yxxx;
   368		case D_DL:
   369		case D_BL:
   370		case D_AH:
   371		case D_CH:
   372		case D_DH:
   373		case D_BH:
   374			return Yrb;
   375	
   376		case D_CL:
   377			return Ycl;
   378	
   379		case D_CX:
   380			return Ycx;
   381	
   382		case D_DX:
   383		case D_BX:
   384			return Yrx;
   385	
   386		case D_R8:	/* not really Yrl */
   387		case D_R9:
   388		case D_R10:
   389		case D_R11:
   390		case D_R12:
   391		case D_R13:
   392		case D_R14:
   393		case D_R15:
   394			if(asmode != 64)
   395				return Yxxx;
   396		case D_SP:
   397		case D_BP:
   398		case D_SI:
   399		case D_DI:
   400			return Yrl;
   401	
   402		case D_F0+0:
   403			return	Yf0;
   404	
   405		case D_F0+1:
   406		case D_F0+2:
   407		case D_F0+3:
   408		case D_F0+4:
   409		case D_F0+5:
   410		case D_F0+6:
   411		case D_F0+7:
   412			return	Yrf;
   413	
   414		case D_M0+0:
   415		case D_M0+1:
   416		case D_M0+2:
   417		case D_M0+3:
   418		case D_M0+4:
   419		case D_M0+5:
   420		case D_M0+6:
   421		case D_M0+7:
   422			return	Ymr;
   423	
   424		case D_X0+0:
   425		case D_X0+1:
   426		case D_X0+2:
   427		case D_X0+3:
   428		case D_X0+4:
   429		case D_X0+5:
   430		case D_X0+6:
   431		case D_X0+7:
   432		case D_X0+8:
   433		case D_X0+9:
   434		case D_X0+10:
   435		case D_X0+11:
   436		case D_X0+12:
   437		case D_X0+13:
   438		case D_X0+14:
   439		case D_X0+15:
   440			return	Yxr;
   441	
   442		case D_NONE:
   443			return Ynone;
   444	
   445		case D_CS:	return	Ycs;
   446		case D_SS:	return	Yss;
   447		case D_DS:	return	Yds;
   448		case D_ES:	return	Yes;
   449		case D_FS:	return	Yfs;
   450		case D_GS:	return	Ygs;
   451	
   452		case D_GDTR:	return	Ygdtr;
   453		case D_IDTR:	return	Yidtr;
   454		case D_LDTR:	return	Yldtr;
   455		case D_MSW:	return	Ymsw;
   456		case D_TASK:	return	Ytask;
   457	
   458		case D_CR+0:	return	Ycr0;
   459		case D_CR+1:	return	Ycr1;
   460		case D_CR+2:	return	Ycr2;
   461		case D_CR+3:	return	Ycr3;
   462		case D_CR+4:	return	Ycr4;
   463		case D_CR+5:	return	Ycr5;
   464		case D_CR+6:	return	Ycr6;
   465		case D_CR+7:	return	Ycr7;
   466		case D_CR+8:	return	Ycr8;
   467	
   468		case D_DR+0:	return	Ydr0;
   469		case D_DR+1:	return	Ydr1;
   470		case D_DR+2:	return	Ydr2;
   471		case D_DR+3:	return	Ydr3;
   472		case D_DR+4:	return	Ydr4;
   473		case D_DR+5:	return	Ydr5;
   474		case D_DR+6:	return	Ydr6;
   475		case D_DR+7:	return	Ydr7;
   476	
   477		case D_TR+0:	return	Ytr0;
   478		case D_TR+1:	return	Ytr1;
   479		case D_TR+2:	return	Ytr2;
   480		case D_TR+3:	return	Ytr3;
   481		case D_TR+4:	return	Ytr4;
   482		case D_TR+5:	return	Ytr5;
   483		case D_TR+6:	return	Ytr6;
   484		case D_TR+7:	return	Ytr7;
   485	
   486		case D_EXTERN:
   487		case D_STATIC:
   488		case D_AUTO:
   489		case D_PARAM:
   490			return Ym;
   491	
   492		case D_CONST:
   493		case D_ADDR:
   494			if(a->sym == S) {
   495				v = a->offset;
   496				if(v == 0)
   497					return Yi0;
   498				if(v == 1)
   499					return Yi1;
   500				if(v >= -128 && v <= 127)
   501					return Yi8;
   502				l = v;
   503				if((vlong)l == v)
   504					return Ys32;	/* can sign extend */
   505				if((v>>32) == 0)
   506					return Yi32;	/* unsigned */
   507				return Yi64;
   508			}
   509			return Yi32;	/* TO DO: D_ADDR as Yi64 */
   510	
   511		case D_BRANCH:
   512			return Ybr;
   513		}
   514		return Yxxx;
   515	}
   516	
   517	void
   518	asmidx(int scale, int index, int base)
   519	{
   520		int i;
   521	
   522		switch(index) {
   523		default:
   524			goto bad;
   525	
   526		case D_NONE:
   527			i = 4 << 3;
   528			goto bas;
   529	
   530		case D_R8:
   531		case D_R9:
   532		case D_R10:
   533		case D_R11:
   534		case D_R12:
   535		case D_R13:
   536		case D_R14:
   537		case D_R15:
   538			if(asmode != 64)
   539				goto bad;
   540		case D_AX:
   541		case D_CX:
   542		case D_DX:
   543		case D_BX:
   544		case D_BP:
   545		case D_SI:
   546		case D_DI:
   547			i = reg[index] << 3;
   548			break;
   549		}
   550		switch(scale) {
   551		default:
   552			goto bad;
   553		case 1:
   554			break;
   555		case 2:
   556			i |= (1<<6);
   557			break;
   558		case 4:
   559			i |= (2<<6);
   560			break;
   561		case 8:
   562			i |= (3<<6);
   563			break;
   564		}
   565	bas:
   566		switch(base) {
   567		default:
   568			goto bad;
   569		case D_NONE:	/* must be mod=00 */
   570			i |= 5;
   571			break;
   572		case D_R8:
   573		case D_R9:
   574		case D_R10:
   575		case D_R11:
   576		case D_R12:
   577		case D_R13:
   578		case D_R14:
   579		case D_R15:
   580			if(asmode != 64)
   581				goto bad;
   582		case D_AX:
   583		case D_CX:
   584		case D_DX:
   585		case D_BX:
   586		case D_SP:
   587		case D_BP:
   588		case D_SI:
   589		case D_DI:
   590			i |= reg[base];
   591			break;
   592		}
   593		*andptr++ = i;
   594		return;
   595	bad:
   596		diag("asmidx: bad address %d/%d/%d", scale, index, base);
   597		*andptr++ = 0;
   598		return;
   599	}
   600	
   601	static void
   602	put4(int32 v)
   603	{
   604		andptr[0] = v;
   605		andptr[1] = v>>8;
   606		andptr[2] = v>>16;
   607		andptr[3] = v>>24;
   608		andptr += 4;
   609	}
   610	
   611	static void
   612	relput4(Prog *p, Adr *a)
   613	{
   614		vlong v;
   615		Reloc rel, *r;
   616		
   617		v = vaddr(a, &rel);
   618		if(rel.siz != 0) {
   619			if(rel.siz != 4)
   620				diag("bad reloc");
   621			r = addrel(cursym);
   622			*r = rel;
   623			r->off = p->pc + andptr - and;
   624		}
   625		put4(v);
   626	}
   627	
   628	static void
   629	put8(vlong v)
   630	{
   631		andptr[0] = v;
   632		andptr[1] = v>>8;
   633		andptr[2] = v>>16;
   634		andptr[3] = v>>24;
   635		andptr[4] = v>>32;
   636		andptr[5] = v>>40;
   637		andptr[6] = v>>48;
   638		andptr[7] = v>>56;
   639		andptr += 8;
   640	}
   641	
   642	/*
   643	static void
   644	relput8(Prog *p, Adr *a)
   645	{
   646		vlong v;
   647		Reloc rel, *r;
   648		
   649		v = vaddr(a, &rel);
   650		if(rel.siz != 0) {
   651			r = addrel(cursym);
   652			*r = rel;
   653			r->siz = 8;
   654			r->off = p->pc + andptr - and;
   655		}
   656		put8(v);
   657	}
   658	*/
   659	
   660	vlong
   661	symaddr(Sym *s)
   662	{
   663		if(!s->reachable)
   664			diag("unreachable symbol in symaddr - %s", s->name);
   665		return s->value;
   666	}
   667	
   668	static vlong
   669	vaddr(Adr *a, Reloc *r)
   670	{
   671		int t;
   672		vlong v;
   673		Sym *s;
   674		
   675		if(r != nil)
   676			memset(r, 0, sizeof *r);
   677	
   678		t = a->type;
   679		v = a->offset;
   680		if(t == D_ADDR)
   681			t = a->index;
   682		switch(t) {
   683		case D_STATIC:
   684		case D_EXTERN:
   685			s = a->sym;
   686			if(!s->reachable)
   687				diag("unreachable symbol in vaddr - %s", s->name);
   688			if(r == nil) {
   689				diag("need reloc for %D", a);
   690				errorexit();
   691			}
   692			r->type = D_ADDR;
   693			r->siz = 4;	// TODO: 8 for external symbols
   694			r->off = -1;	// caller must fill in
   695			r->sym = s;
   696			r->add = v;
   697			v = 0;
   698		}
   699		return v;
   700	}
   701	
   702	static void
   703	asmandsz(Adr *a, int r, int rex, int m64)
   704	{
   705		int32 v;
   706		int t, scale;
   707		Reloc rel;
   708	
   709		USED(m64);
   710		rex &= (0x40 | Rxr);
   711		v = a->offset;
   712		t = a->type;
   713		rel.siz = 0;
   714		if(a->index != D_NONE) {
   715			if(t < D_INDIR) { 
   716				switch(t) {
   717				default:
   718					goto bad;
   719				case D_STATIC:
   720				case D_EXTERN:
   721					t = D_NONE;
   722					v = vaddr(a, &rel);
   723					break;
   724				case D_AUTO:
   725				case D_PARAM:
   726					t = D_SP;
   727					break;
   728				}
   729			} else
   730				t -= D_INDIR;
   731			rexflag |= (regrex[(int)a->index] & Rxx) | (regrex[t] & Rxb) | rex;
   732			if(t == D_NONE) {
   733				*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
   734				asmidx(a->scale, a->index, t);
   735				goto putrelv;
   736			}
   737			if(v == 0 && rel.siz == 0 && t != D_BP && t != D_R13) {
   738				*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
   739				asmidx(a->scale, a->index, t);
   740				return;
   741			}
   742			if(v >= -128 && v < 128 && rel.siz == 0) {
   743				*andptr++ = (1 << 6) | (4 << 0) | (r << 3);
   744				asmidx(a->scale, a->index, t);
   745				*andptr++ = v;
   746				return;
   747			}
   748			*andptr++ = (2 << 6) | (4 << 0) | (r << 3);
   749			asmidx(a->scale, a->index, t);
   750			goto putrelv;
   751		}
   752		if(t >= D_AL && t <= D_X0+15) {
   753			if(v)
   754				goto bad;
   755			*andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
   756			rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
   757			return;
   758		}
   759		
   760		scale = a->scale;
   761		if(t < D_INDIR) {
   762			switch(a->type) {
   763			default:
   764				goto bad;
   765			case D_STATIC:
   766			case D_EXTERN:
   767				t = D_NONE;
   768				v = vaddr(a, &rel);
   769				break;
   770			case D_AUTO:
   771			case D_PARAM:
   772				t = D_SP;
   773				break;
   774			}
   775			scale = 1;
   776		} else
   777			t -= D_INDIR;
   778	
   779		rexflag |= (regrex[t] & Rxb) | rex;
   780		if(t == D_NONE || (D_CS <= t && t <= D_GS)) {
   781			if(asmode != 64){
   782				*andptr++ = (0 << 6) | (5 << 0) | (r << 3);
   783				goto putrelv;
   784			}
   785			/* temporary */
   786			*andptr++ = (0 <<  6) | (4 << 0) | (r << 3);	/* sib present */
   787			*andptr++ = (0 << 6) | (4 << 3) | (5 << 0);	/* DS:d32 */
   788			goto putrelv;
   789		}
   790		if(t == D_SP || t == D_R12) {
   791			if(v == 0) {
   792				*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
   793				asmidx(scale, D_NONE, t);
   794				return;
   795			}
   796			if(v >= -128 && v < 128) {
   797				*andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3);
   798				asmidx(scale, D_NONE, t);
   799				*andptr++ = v;
   800				return;
   801			}
   802			*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
   803			asmidx(scale, D_NONE, t);
   804			goto putrelv;
   805		}
   806		if(t >= D_AX && t <= D_R15) {
   807			if(v == 0 && t != D_BP && t != D_R13) {
   808				*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
   809				return;
   810			}
   811			if(v >= -128 && v < 128) {
   812				andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
   813				andptr[1] = v;
   814				andptr += 2;
   815				return;
   816			}
   817			*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
   818			goto putrelv;
   819		}
   820		goto bad;
   821		
   822	putrelv:
   823		if(rel.siz != 0) {
   824			Reloc *r;
   825	
   826			if(rel.siz != 4) {
   827				diag("bad rel");
   828				goto bad;
   829			}
   830			r = addrel(cursym);
   831			*r = rel;
   832			r->off = curp->pc + andptr - and;
   833		}
   834		put4(v);
   835		return;
   836	
   837	bad:
   838		diag("asmand: bad address %D", a);
   839		return;
   840	}
   841	
   842	void
   843	asmand(Adr *a, Adr *ra)
   844	{
   845		asmandsz(a, reg[ra->type], regrex[ra->type], 0);
   846	}
   847	
   848	void
   849	asmando(Adr *a, int o)
   850	{
   851		asmandsz(a, o, 0, 0);
   852	}
   853	
   854	static void
   855	bytereg(Adr *a, char *t)
   856	{
   857		if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15)) {
   858			a->type = D_AL + (a->type-D_AX);
   859			*t = 0;
   860		}
   861	}
   862	
   863	#define	E	0xff
   864	Movtab	ymovtab[] =
   865	{
   866	/* push */
   867		{APUSHL,	Ycs,	Ynone,	0,	0x0e,E,0,0},
   868		{APUSHL,	Yss,	Ynone,	0,	0x16,E,0,0},
   869		{APUSHL,	Yds,	Ynone,	0,	0x1e,E,0,0},
   870		{APUSHL,	Yes,	Ynone,	0,	0x06,E,0,0},
   871		{APUSHL,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
   872		{APUSHL,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
   873		{APUSHQ,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
   874		{APUSHQ,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
   875	
   876		{APUSHW,	Ycs,	Ynone,	0,	Pe,0x0e,E,0},
   877		{APUSHW,	Yss,	Ynone,	0,	Pe,0x16,E,0},
   878		{APUSHW,	Yds,	Ynone,	0,	Pe,0x1e,E,0},
   879		{APUSHW,	Yes,	Ynone,	0,	Pe,0x06,E,0},
   880		{APUSHW,	Yfs,	Ynone,	0,	Pe,0x0f,0xa0,E},
   881		{APUSHW,	Ygs,	Ynone,	0,	Pe,0x0f,0xa8,E},
   882	
   883	/* pop */
   884		{APOPL,	Ynone,	Yds,	0,	0x1f,E,0,0},
   885		{APOPL,	Ynone,	Yes,	0,	0x07,E,0,0},
   886		{APOPL,	Ynone,	Yss,	0,	0x17,E,0,0},
   887		{APOPL,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
   888		{APOPL,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
   889		{APOPQ,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
   890		{APOPQ,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
   891	
   892		{APOPW,	Ynone,	Yds,	0,	Pe,0x1f,E,0},
   893		{APOPW,	Ynone,	Yes,	0,	Pe,0x07,E,0},
   894		{APOPW,	Ynone,	Yss,	0,	Pe,0x17,E,0},
   895		{APOPW,	Ynone,	Yfs,	0,	Pe,0x0f,0xa1,E},
   896		{APOPW,	Ynone,	Ygs,	0,	Pe,0x0f,0xa9,E},
   897	
   898	/* mov seg */
   899		{AMOVW,	Yes,	Yml,	1,	0x8c,0,0,0},
   900		{AMOVW,	Ycs,	Yml,	1,	0x8c,1,0,0},
   901		{AMOVW,	Yss,	Yml,	1,	0x8c,2,0,0},
   902		{AMOVW,	Yds,	Yml,	1,	0x8c,3,0,0},
   903		{AMOVW,	Yfs,	Yml,	1,	0x8c,4,0,0},
   904		{AMOVW,	Ygs,	Yml,	1,	0x8c,5,0,0},
   905	
   906		{AMOVW,	Yml,	Yes,	2,	0x8e,0,0,0},
   907		{AMOVW,	Yml,	Ycs,	2,	0x8e,1,0,0},
   908		{AMOVW,	Yml,	Yss,	2,	0x8e,2,0,0},
   909		{AMOVW,	Yml,	Yds,	2,	0x8e,3,0,0},
   910		{AMOVW,	Yml,	Yfs,	2,	0x8e,4,0,0},
   911		{AMOVW,	Yml,	Ygs,	2,	0x8e,5,0,0},
   912	
   913	/* mov cr */
   914		{AMOVL,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
   915		{AMOVL,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
   916		{AMOVL,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
   917		{AMOVL,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
   918		{AMOVL,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
   919		{AMOVQ,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
   920		{AMOVQ,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
   921		{AMOVQ,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
   922		{AMOVQ,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
   923		{AMOVQ,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
   924	
   925		{AMOVL,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
   926		{AMOVL,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
   927		{AMOVL,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
   928		{AMOVL,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
   929		{AMOVL,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
   930		{AMOVQ,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
   931		{AMOVQ,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
   932		{AMOVQ,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
   933		{AMOVQ,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
   934		{AMOVQ,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
   935	
   936	/* mov dr */
   937		{AMOVL,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
   938		{AMOVL,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
   939		{AMOVL,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
   940		{AMOVQ,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
   941		{AMOVQ,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
   942		{AMOVQ,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
   943	
   944		{AMOVL,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
   945		{AMOVL,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
   946		{AMOVL,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
   947		{AMOVQ,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
   948		{AMOVQ,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
   949		{AMOVQ,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
   950	
   951	/* mov tr */
   952		{AMOVL,	Ytr6,	Yml,	3,	0x0f,0x24,6,0},
   953		{AMOVL,	Ytr7,	Yml,	3,	0x0f,0x24,7,0},
   954	
   955		{AMOVL,	Yml,	Ytr6,	4,	0x0f,0x26,6,E},
   956		{AMOVL,	Yml,	Ytr7,	4,	0x0f,0x26,7,E},
   957	
   958	/* lgdt, sgdt, lidt, sidt */
   959		{AMOVL,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
   960		{AMOVL,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
   961		{AMOVL,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
   962		{AMOVL,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
   963		{AMOVQ,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
   964		{AMOVQ,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
   965		{AMOVQ,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
   966		{AMOVQ,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
   967	
   968	/* lldt, sldt */
   969		{AMOVW,	Yml,	Yldtr,	4,	0x0f,0x00,2,0},
   970		{AMOVW,	Yldtr,	Yml,	3,	0x0f,0x00,0,0},
   971	
   972	/* lmsw, smsw */
   973		{AMOVW,	Yml,	Ymsw,	4,	0x0f,0x01,6,0},
   974		{AMOVW,	Ymsw,	Yml,	3,	0x0f,0x01,4,0},
   975	
   976	/* ltr, str */
   977		{AMOVW,	Yml,	Ytask,	4,	0x0f,0x00,3,0},
   978		{AMOVW,	Ytask,	Yml,	3,	0x0f,0x00,1,0},
   979	
   980	/* load full pointer */
   981		{AMOVL,	Yml,	Ycol,	5,	0,0,0,0},
   982		{AMOVW,	Yml,	Ycol,	5,	Pe,0,0,0},
   983	
   984	/* double shift */
   985		{ASHLL,	Ycol,	Yml,	6,	0xa4,0xa5,0,0},
   986		{ASHRL,	Ycol,	Yml,	6,	0xac,0xad,0,0},
   987		{ASHLQ,	Ycol,	Yml,	6,	Pw,0xa4,0xa5,0},
   988		{ASHRQ,	Ycol,	Yml,	6,	Pw,0xac,0xad,0},
   989		{ASHLW,	Ycol,	Yml,	6,	Pe,0xa4,0xa5,0},
   990		{ASHRW,	Ycol,	Yml,	6,	Pe,0xac,0xad,0},
   991		0
   992	};
   993	
   994	int
   995	isax(Adr *a)
   996	{
   997	
   998		switch(a->type) {
   999		case D_AX:
  1000		case D_AL:
  1001		case D_AH:
  1002		case D_INDIR+D_AX:
  1003			return 1;
  1004		}
  1005		if(a->index == D_AX)
  1006			return 1;
  1007		return 0;
  1008	}
  1009	
  1010	void
  1011	subreg(Prog *p, int from, int to)
  1012	{
  1013	
  1014		if(debug['Q'])
  1015			print("\n%P	s/%R/%R/\n", p, from, to);
  1016	
  1017		if(p->from.type == from)
  1018			p->from.type = to;
  1019		if(p->to.type == from)
  1020			p->to.type = to;
  1021	
  1022		if(p->from.index == from)
  1023			p->from.index = to;
  1024		if(p->to.index == from)
  1025			p->to.index = to;
  1026	
  1027		from += D_INDIR;
  1028		if(p->from.type == from)
  1029			p->from.type = to+D_INDIR;
  1030		if(p->to.type == from)
  1031			p->to.type = to+D_INDIR;
  1032	
  1033		if(debug['Q'])
  1034			print("%P\n", p);
  1035	}
  1036	
  1037	static int
  1038	mediaop(Optab *o, int op, int osize, int z)
  1039	{
  1040		switch(op){
  1041		case Pm:
  1042		case Pe:
  1043		case Pf2:
  1044		case Pf3:
  1045			if(osize != 1){
  1046				if(op != Pm)
  1047					*andptr++ = op;
  1048				*andptr++ = Pm;
  1049				op = o->op[++z];
  1050				break;
  1051			}
  1052		default:
  1053			if(andptr == and || andptr[-1] != Pm)
  1054				*andptr++ = Pm;
  1055			break;
  1056		}
  1057		*andptr++ = op;
  1058		return z;
  1059	}
  1060	
  1061	void
  1062	doasm(Prog *p)
  1063	{
  1064		Optab *o;
  1065		Prog *q, pp;
  1066		uchar *t;
  1067		Movtab *mo;
  1068		int z, op, ft, tt, xo, l, pre;
  1069		vlong v;
  1070		Reloc rel, *r;
  1071		Adr *a;
  1072		
  1073		curp = p;	// TODO
  1074	
  1075		o = opindex[p->as];
  1076		if(o == nil) {
  1077			diag("asmins: missing op %P", p);
  1078			return;
  1079		}
  1080		
  1081		pre = prefixof(&p->from);
  1082		if(pre)
  1083			*andptr++ = pre;
  1084		pre = prefixof(&p->to);
  1085		if(pre)
  1086			*andptr++ = pre;
  1087	
  1088		if(p->ft == 0)
  1089			p->ft = oclass(&p->from);
  1090		if(p->tt == 0)
  1091			p->tt = oclass(&p->to);
  1092	
  1093		ft = p->ft * Ymax;
  1094		tt = p->tt * Ymax;
  1095	
  1096		t = o->ytab;
  1097		if(t == 0) {
  1098			diag("asmins: noproto %P", p);
  1099			return;
  1100		}
  1101		xo = o->op[0] == 0x0f;
  1102		for(z=0; *t; z+=t[3]+xo,t+=4)
  1103			if(ycover[ft+t[0]])
  1104			if(ycover[tt+t[1]])
  1105				goto found;
  1106		goto domov;
  1107	
  1108	found:
  1109		switch(o->prefix) {
  1110		case Pq:	/* 16 bit escape and opcode escape */
  1111			*andptr++ = Pe;
  1112			*andptr++ = Pm;
  1113			break;
  1114	
  1115		case Pf2:	/* xmm opcode escape */
  1116		case Pf3:
  1117			*andptr++ = o->prefix;
  1118			*andptr++ = Pm;
  1119			break;
  1120	
  1121		case Pm:	/* opcode escape */
  1122			*andptr++ = Pm;
  1123			break;
  1124	
  1125		case Pe:	/* 16 bit escape */
  1126			*andptr++ = Pe;
  1127			break;
  1128	
  1129		case Pw:	/* 64-bit escape */
  1130			if(p->mode != 64)
  1131				diag("asmins: illegal 64: %P", p);
  1132			rexflag |= Pw;
  1133			break;
  1134	
  1135		case Pb:	/* botch */
  1136			bytereg(&p->from, &p->ft);
  1137			bytereg(&p->to, &p->tt);
  1138			break;
  1139	
  1140		case P32:	/* 32 bit but illegal if 64-bit mode */
  1141			if(p->mode == 64)
  1142				diag("asmins: illegal in 64-bit mode: %P", p);
  1143			break;
  1144	
  1145		case Py:	/* 64-bit only, no prefix */
  1146			if(p->mode != 64)
  1147				diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
  1148			break;
  1149		}
  1150	
  1151		op = o->op[z];
  1152		if(op == 0x0f) {
  1153			*andptr++ = op;
  1154			op = o->op[++z];
  1155		}
  1156		switch(t[2]) {
  1157		default:
  1158			diag("asmins: unknown z %d %P", t[2], p);
  1159			return;
  1160	
  1161		case Zpseudo:
  1162			break;
  1163	
  1164		case Zlit:
  1165			for(; op = o->op[z]; z++)
  1166				*andptr++ = op;
  1167			break;
  1168	
  1169		case Zlitm_r:
  1170			for(; op = o->op[z]; z++)
  1171				*andptr++ = op;
  1172			asmand(&p->from, &p->to);
  1173			break;
  1174	
  1175		case Zmb_r:
  1176			bytereg(&p->from, &p->ft);
  1177			/* fall through */
  1178		case Zm_r:
  1179			*andptr++ = op;
  1180			asmand(&p->from, &p->to);
  1181			break;
  1182	
  1183		case Zm_r_xm:
  1184			mediaop(o, op, t[3], z);
  1185			asmand(&p->from, &p->to);
  1186			break;
  1187	
  1188		case Zm_r_xm_nr:
  1189			rexflag = 0;
  1190			mediaop(o, op, t[3], z);
  1191			asmand(&p->from, &p->to);
  1192			break;
  1193	
  1194		case Zm_r_i_xm:
  1195			mediaop(o, op, t[3], z);
  1196			asmand(&p->from, &p->to);
  1197			*andptr++ = p->to.offset;
  1198			break;
  1199	
  1200		case Zm_r_3d:
  1201			*andptr++ = 0x0f;
  1202			*andptr++ = 0x0f;
  1203			asmand(&p->from, &p->to);
  1204			*andptr++ = op;
  1205			break;
  1206	
  1207		case Zibm_r:
  1208			*andptr++ = op;
  1209			asmand(&p->from, &p->to);
  1210			*andptr++ = p->to.offset;
  1211			break;
  1212	
  1213		case Zaut_r:
  1214			*andptr++ = 0x8d;	/* leal */
  1215			if(p->from.type != D_ADDR)
  1216				diag("asmins: Zaut sb type ADDR");
  1217			p->from.type = p->from.index;
  1218			p->from.index = D_NONE;
  1219			asmand(&p->from, &p->to);
  1220			p->from.index = p->from.type;
  1221			p->from.type = D_ADDR;
  1222			break;
  1223	
  1224		case Zm_o:
  1225			*andptr++ = op;
  1226			asmando(&p->from, o->op[z+1]);
  1227			break;
  1228	
  1229		case Zr_m:
  1230			*andptr++ = op;
  1231			asmand(&p->to, &p->from);
  1232			break;
  1233	
  1234		case Zr_m_xm:
  1235			mediaop(o, op, t[3], z);
  1236			asmand(&p->to, &p->from);
  1237			break;
  1238	
  1239		case Zr_m_xm_nr:
  1240			rexflag = 0;
  1241			mediaop(o, op, t[3], z);
  1242			asmand(&p->to, &p->from);
  1243			break;
  1244	
  1245		case Zr_m_i_xm:
  1246			mediaop(o, op, t[3], z);
  1247			asmand(&p->to, &p->from);
  1248			*andptr++ = p->from.offset;
  1249			break;
  1250	
  1251		case Zo_m:
  1252			*andptr++ = op;
  1253			asmando(&p->to, o->op[z+1]);
  1254			break;
  1255	
  1256		case Zo_m64:
  1257			*andptr++ = op;
  1258			asmandsz(&p->to, o->op[z+1], 0, 1);
  1259			break;
  1260	
  1261		case Zm_ibo:
  1262			*andptr++ = op;
  1263			asmando(&p->from, o->op[z+1]);
  1264			*andptr++ = vaddr(&p->to, nil);
  1265			break;
  1266	
  1267		case Zibo_m:
  1268			*andptr++ = op;
  1269			asmando(&p->to, o->op[z+1]);
  1270			*andptr++ = vaddr(&p->from, nil);
  1271			break;
  1272	
  1273		case Zibo_m_xm:
  1274			z = mediaop(o, op, t[3], z);
  1275			asmando(&p->to, o->op[z+1]);
  1276			*andptr++ = vaddr(&p->from, nil);
  1277			break;
  1278	
  1279		case Z_ib:
  1280		case Zib_:
  1281			if(t[2] == Zib_)
  1282				a = &p->from;
  1283			else
  1284				a = &p->to;
  1285			*andptr++ = op;
  1286			*andptr++ = vaddr(a, nil);
  1287			break;
  1288	
  1289		case Zib_rp:
  1290			rexflag |= regrex[p->to.type] & (Rxb|0x40);
  1291			*andptr++ = op + reg[p->to.type];
  1292			*andptr++ = vaddr(&p->from, nil);
  1293			break;
  1294	
  1295		case Zil_rp:
  1296			rexflag |= regrex[p->to.type] & Rxb;
  1297			*andptr++ = op + reg[p->to.type];
  1298			if(o->prefix == Pe) {
  1299				v = vaddr(&p->from, nil);
  1300				*andptr++ = v;
  1301				*andptr++ = v>>8;
  1302			}
  1303			else
  1304				relput4(p, &p->from);
  1305			break;
  1306	
  1307		case Zo_iw:
  1308			*andptr++ = op;
  1309			if(p->from.type != D_NONE){
  1310				v = vaddr(&p->from, nil);
  1311				*andptr++ = v;
  1312				*andptr++ = v>>8;
  1313			}
  1314			break;
  1315	
  1316		case Ziq_rp:
  1317			v = vaddr(&p->from, &rel);
  1318			l = v>>32;
  1319			if(l == 0 && rel.siz != 8){
  1320				//p->mark |= 0100;
  1321				//print("zero: %llux %P\n", v, p);
  1322				rexflag &= ~(0x40|Rxw);
  1323				rexflag |= regrex[p->to.type] & Rxb;
  1324				*andptr++ = 0xb8 + reg[p->to.type];
  1325				if(rel.type != 0) {
  1326					r = addrel(cursym);
  1327					*r = rel;
  1328					r->off = p->pc + andptr - and;
  1329				}
  1330				put4(v);
  1331			}else if(l == -1 && (v&((uvlong)1<<31))!=0){	/* sign extend */
  1332				//p->mark |= 0100;
  1333				//print("sign: %llux %P\n", v, p);
  1334				*andptr ++ = 0xc7;
  1335				asmando(&p->to, 0);
  1336				put4(v);
  1337			}else{	/* need all 8 */
  1338				//print("all: %llux %P\n", v, p);
  1339				rexflag |= regrex[p->to.type] & Rxb;
  1340				*andptr++ = op + reg[p->to.type];
  1341				if(rel.type != 0) {
  1342					r = addrel(cursym);
  1343					*r = rel;
  1344					r->off = p->pc + andptr - and;
  1345				}
  1346				put8(v);
  1347			}
  1348			break;
  1349	
  1350		case Zib_rr:
  1351			*andptr++ = op;
  1352			asmand(&p->to, &p->to);
  1353			*andptr++ = vaddr(&p->from, nil);
  1354			break;
  1355	
  1356		case Z_il:
  1357		case Zil_:
  1358			if(t[2] == Zil_)
  1359				a = &p->from;
  1360			else
  1361				a = &p->to;
  1362			*andptr++ = op;
  1363			if(o->prefix == Pe) {
  1364				v = vaddr(a, nil);
  1365				*andptr++ = v;
  1366				*andptr++ = v>>8;
  1367			}
  1368			else
  1369				relput4(p, a);
  1370			break;
  1371	
  1372		case Zm_ilo:
  1373		case Zilo_m:
  1374			*andptr++ = op;
  1375			if(t[2] == Zilo_m) {
  1376				a = &p->from;
  1377				asmando(&p->to, o->op[z+1]);
  1378			} else {
  1379				a = &p->to;
  1380				asmando(&p->from, o->op[z+1]);
  1381			}
  1382			if(o->prefix == Pe) {
  1383				v = vaddr(a, nil);
  1384				*andptr++ = v;
  1385				*andptr++ = v>>8;
  1386			}
  1387			else
  1388				relput4(p, a);
  1389			break;
  1390	
  1391		case Zil_rr:
  1392			*andptr++ = op;
  1393			asmand(&p->to, &p->to);
  1394			if(o->prefix == Pe) {
  1395				v = vaddr(&p->from, nil);
  1396				*andptr++ = v;
  1397				*andptr++ = v>>8;
  1398			}
  1399			else
  1400				relput4(p, &p->from);
  1401			break;
  1402	
  1403		case Z_rp:
  1404			rexflag |= regrex[p->to.type] & (Rxb|0x40);
  1405			*andptr++ = op + reg[p->to.type];
  1406			break;
  1407	
  1408		case Zrp_:
  1409			rexflag |= regrex[p->from.type] & (Rxb|0x40);
  1410			*andptr++ = op + reg[p->from.type];
  1411			break;
  1412	
  1413		case Zclr:
  1414			*andptr++ = op;
  1415			asmand(&p->to, &p->to);
  1416			break;
  1417	
  1418		case Zcall:
  1419			q = p->pcond;
  1420			if(q == nil) {
  1421				diag("call without target");
  1422				errorexit();
  1423			}
  1424			if(q->as != ATEXT) {
  1425				// Could handle this case by making D_PCREL
  1426				// record the Prog* instead of the Sym*, but let's
  1427				// wait until the need arises.
  1428				diag("call of non-TEXT %P", q);
  1429				errorexit();
  1430			}
  1431			*andptr++ = op;
  1432			r = addrel(cursym);
  1433			r->off = p->pc + andptr - and;
  1434			r->sym = q->from.sym;
  1435			r->type = D_PCREL;
  1436			r->siz = 4;
  1437			put4(0);
  1438			break;
  1439	
  1440		case Zbr:
  1441		case Zjmp:
  1442			// TODO: jump across functions needs reloc
  1443			q = p->pcond;
  1444			if(q == nil) {
  1445				diag("jmp/branch without target");
  1446				errorexit();
  1447			}
  1448			if(q->as == ATEXT) {
  1449				if(t[2] == Zbr) {
  1450					diag("branch to ATEXT");
  1451					errorexit();
  1452				}
  1453				*andptr++ = o->op[z+1];
  1454				r = addrel(cursym);
  1455				r->off = p->pc + andptr - and;
  1456				r->sym = q->from.sym;
  1457				r->type = D_PCREL;
  1458				r->siz = 4;
  1459				put4(0);
  1460				break;
  1461			}
  1462			// Assumes q is in this function.
  1463			// TODO: Check in input, preserve in brchain.
  1464	
  1465			// Fill in backward jump now.
  1466			if(p->back & 1) {
  1467				v = q->pc - (p->pc + 2);
  1468				if(v >= -128) {
  1469					*andptr++ = op;
  1470					*andptr++ = v;
  1471				} else {
  1472					v -= 5-2;
  1473					if(t[2] == Zbr) {
  1474						*andptr++ = 0x0f;
  1475						v--;
  1476					}
  1477					*andptr++ = o->op[z+1];
  1478					*andptr++ = v;
  1479					*andptr++ = v>>8;
  1480					*andptr++ = v>>16;
  1481					*andptr++ = v>>24;
  1482				}
  1483				break;
  1484			}
  1485			
  1486			// Annotate target; will fill in later.
  1487			p->forwd = q->comefrom;
  1488			q->comefrom = p;
  1489			if(p->back & 2)	{ // short
  1490				*andptr++ = op;
  1491				*andptr++ = 0;
  1492			} else {
  1493				if(t[2] == Zbr)
  1494					*andptr++ = 0x0f;
  1495				*andptr++ = o->op[z+1];
  1496				*andptr++ = 0;
  1497				*andptr++ = 0;
  1498				*andptr++ = 0;
  1499				*andptr++ = 0;
  1500			}
  1501			break;
  1502					
  1503	/*
  1504			v = q->pc - p->pc - 2;
  1505			if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  1506				*andptr++ = op;
  1507				*andptr++ = v;
  1508			} else {
  1509				v -= 5-2;
  1510				if(t[2] == Zbr) {
  1511					*andptr++ = 0x0f;
  1512					v--;
  1513				}
  1514				*andptr++ = o->op[z+1];
  1515				*andptr++ = v;
  1516				*andptr++ = v>>8;
  1517				*andptr++ = v>>16;
  1518				*andptr++ = v>>24;
  1519			}
  1520	*/
  1521			break;
  1522	
  1523		case Zloop:
  1524			q = p->pcond;
  1525			if(q == nil) {
  1526				diag("loop without target");
  1527				errorexit();
  1528			}
  1529			v = q->pc - p->pc - 2;
  1530			if(v < -128 && v > 127)
  1531				diag("loop too far: %P", p);
  1532			*andptr++ = op;
  1533			*andptr++ = v;
  1534			break;
  1535	
  1536		case Zbyte:
  1537			v = vaddr(&p->from, &rel);
  1538			if(rel.siz != 0) {
  1539				rel.siz = op;
  1540				r = addrel(cursym);
  1541				*r = rel;
  1542				r->off = p->pc + andptr - and;
  1543			}
  1544			*andptr++ = v;
  1545			if(op > 1) {
  1546				*andptr++ = v>>8;
  1547				if(op > 2) {
  1548					*andptr++ = v>>16;
  1549					*andptr++ = v>>24;
  1550					if(op > 4) {
  1551						*andptr++ = v>>32;
  1552						*andptr++ = v>>40;
  1553						*andptr++ = v>>48;
  1554						*andptr++ = v>>56;
  1555					}
  1556				}
  1557			}
  1558			break;
  1559		}
  1560		return;
  1561	
  1562	domov:
  1563		for(mo=ymovtab; mo->as; mo++)
  1564			if(p->as == mo->as)
  1565			if(ycover[ft+mo->ft])
  1566			if(ycover[tt+mo->tt]){
  1567				t = mo->op;
  1568				goto mfound;
  1569			}
  1570	bad:
  1571		if(p->mode != 64){
  1572			/*
  1573			 * here, the assembly has failed.
  1574			 * if its a byte instruction that has
  1575			 * unaddressable registers, try to
  1576			 * exchange registers and reissue the
  1577			 * instruction with the operands renamed.
  1578			 */
  1579			pp = *p;
  1580			z = p->from.type;
  1581			if(z >= D_BP && z <= D_DI) {
  1582				if(isax(&p->to)) {
  1583					*andptr++ = 0x87;			/* xchg lhs,bx */
  1584					asmando(&p->from, reg[D_BX]);
  1585					subreg(&pp, z, D_BX);
  1586					doasm(&pp);
  1587					*andptr++ = 0x87;			/* xchg lhs,bx */
  1588					asmando(&p->from, reg[D_BX]);
  1589				} else {
  1590					*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  1591					subreg(&pp, z, D_AX);
  1592					doasm(&pp);
  1593					*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
  1594				}
  1595				return;
  1596			}
  1597			z = p->to.type;
  1598			if(z >= D_BP && z <= D_DI) {
  1599				if(isax(&p->from)) {
  1600					*andptr++ = 0x87;			/* xchg rhs,bx */
  1601					asmando(&p->to, reg[D_BX]);
  1602					subreg(&pp, z, D_BX);
  1603					doasm(&pp);
  1604					*andptr++ = 0x87;			/* xchg rhs,bx */
  1605					asmando(&p->to, reg[D_BX]);
  1606				} else {
  1607					*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  1608					subreg(&pp, z, D_AX);
  1609					doasm(&pp);
  1610					*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
  1611				}
  1612				return;
  1613			}
  1614		}
  1615		diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p);
  1616		return;
  1617	
  1618	mfound:
  1619		switch(mo->code) {
  1620		default:
  1621			diag("asmins: unknown mov %d %P", mo->code, p);
  1622			break;
  1623	
  1624		case 0:	/* lit */
  1625			for(z=0; t[z]!=E; z++)
  1626				*andptr++ = t[z];
  1627			break;
  1628	
  1629		case 1:	/* r,m */
  1630			*andptr++ = t[0];
  1631			asmando(&p->to, t[1]);
  1632			break;
  1633	
  1634		case 2:	/* m,r */
  1635			*andptr++ = t[0];
  1636			asmando(&p->from, t[1]);
  1637			break;
  1638	
  1639		case 3:	/* r,m - 2op */
  1640			*andptr++ = t[0];
  1641			*andptr++ = t[1];
  1642			asmando(&p->to, t[2]);
  1643			rexflag |= regrex[p->from.type] & (Rxr|0x40);
  1644			break;
  1645	
  1646		case 4:	/* m,r - 2op */
  1647			*andptr++ = t[0];
  1648			*andptr++ = t[1];
  1649			asmando(&p->from, t[2]);
  1650			rexflag |= regrex[p->to.type] & (Rxr|0x40);
  1651			break;
  1652	
  1653		case 5:	/* load full pointer, trash heap */
  1654			if(t[0])
  1655				*andptr++ = t[0];
  1656			switch(p->to.index) {
  1657			default:
  1658				goto bad;
  1659			case D_DS:
  1660				*andptr++ = 0xc5;
  1661				break;
  1662			case D_SS:
  1663				*andptr++ = 0x0f;
  1664				*andptr++ = 0xb2;
  1665				break;
  1666			case D_ES:
  1667				*andptr++ = 0xc4;
  1668				break;
  1669			case D_FS:
  1670				*andptr++ = 0x0f;
  1671				*andptr++ = 0xb4;
  1672				break;
  1673			case D_GS:
  1674				*andptr++ = 0x0f;
  1675				*andptr++ = 0xb5;
  1676				break;
  1677			}
  1678			asmand(&p->from, &p->to);
  1679			break;
  1680	
  1681		case 6:	/* double shift */
  1682			if(t[0] == Pw){
  1683				if(p->mode != 64)
  1684					diag("asmins: illegal 64: %P", p);
  1685				rexflag |= Pw;
  1686				t++;
  1687			}else if(t[0] == Pe){
  1688				*andptr++ = Pe;
  1689				t++;
  1690			}
  1691			z = p->from.type;
  1692			switch(z) {
  1693			default:
  1694				goto bad;
  1695			case D_CONST:
  1696				*andptr++ = 0x0f;
  1697				*andptr++ = t[0];
  1698				asmandsz(&p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0);
  1699				*andptr++ = p->from.offset;
  1700				break;
  1701			case D_CL:
  1702			case D_CX:
  1703				*andptr++ = 0x0f;
  1704				*andptr++ = t[1];
  1705				asmandsz(&p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0);
  1706				break;
  1707			}
  1708			break;
  1709		}
  1710	}
  1711	
  1712	void
  1713	asmins(Prog *p)
  1714	{
  1715		int n, np, c;
  1716		Reloc *r;
  1717	
  1718		rexflag = 0;
  1719		andptr = and;
  1720		asmode = p->mode;
  1721		doasm(p);
  1722		if(rexflag){
  1723			/*
  1724			 * as befits the whole approach of the architecture,
  1725			 * the rex prefix must appear before the first opcode byte
  1726			 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  1727			 * before the 0f opcode escape!), or it might be ignored.
  1728			 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  1729			 */
  1730			if(p->mode != 64)
  1731				diag("asmins: illegal in mode %d: %P", p->mode, p);
  1732			n = andptr - and;
  1733			for(np = 0; np < n; np++) {
  1734				c = and[np];
  1735				if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
  1736					break;
  1737			}
  1738			for(r=cursym->r+cursym->nr; r-- > cursym->r; ) {
  1739				if(r->off < p->pc)
  1740					break;
  1741				r->off++;
  1742			}
  1743			memmove(and+np+1, and+np, n-np);
  1744			and[np] = 0x40 | rexflag;
  1745			andptr++;
  1746		}
  1747	}

release.r60.3. Except as noted, this content is licensed under a Creative Commons Attribution 3.0 License.