1 // Inferno utils/6l/span.c
2 // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
3 //
4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6 // Portions Copyright © 1997-1999 Vita Nuova Limited
7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8 // Portions Copyright © 2004,2006 Bruce Ellis
9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11 // Portions Copyright © 2009 The Go Authors. All rights reserved.
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a copy
14 // of this software and associated documentation files (the "Software"), to deal
15 // in the Software without restriction, including without limitation the rights
16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 // copies of the Software, and to permit persons to whom the Software is
18 // furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included in
21 // all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 // THE SOFTWARE.
30
31 // Instruction layout.
32
33 #include "l.h"
34 #include "../ld/lib.h"
35
36 static int rexflag;
37 static int asmode;
38 static vlong vaddr(Adr*, Reloc*);
39
40 void
41 span1(Sym *s)
42 {
43 Prog *p, *q;
44 int32 c, v, loop;
45 uchar *bp;
46 int n, m, i;
47
48 cursym = s;
49
50 if(s->p != nil)
51 return;
52
53 for(p = s->text; p != P; p = p->link) {
54 p->back = 2; // use short branches first time through
55 if((q = p->pcond) != P && (q->back & 2))
56 p->back |= 1; // backward jump
57
58 if(p->as == AADJSP) {
59 p->to.type = D_SP;
60 v = -p->from.offset;
61 p->from.offset = v;
62 p->as = p->mode != 64? AADDL: AADDQ;
63 if(v < 0) {
64 p->as = p->mode != 64? ASUBL: ASUBQ;
65 v = -v;
66 p->from.offset = v;
67 }
68 if(v == 0)
69 p->as = ANOP;
70 }
71 }
72
73 n = 0;
74 do {
75 loop = 0;
76 memset(s->r, 0, s->nr*sizeof s->r[0]);
77 s->nr = 0;
78 s->np = 0;
79 c = 0;
80 for(p = s->text; p != P; p = p->link) {
81 p->pc = c;
82
83 // process forward jumps to p
84 for(q = p->comefrom; q != P; q = q->forwd) {
85 v = p->pc - (q->pc + q->mark);
86 if(q->back & 2) { // short
87 if(v > 127) {
88 loop++;
89 q->back ^= 2;
90 }
91 s->p[q->pc+1] = v;
92 } else {
93 bp = s->p + q->pc + q->mark - 4;
94 *bp++ = v;
95 *bp++ = v>>8;
96 *bp++ = v>>16;
97 *bp = v>>24;
98 }
99 }
100 p->comefrom = P;
101
102 asmins(p);
103 p->pc = c;
104 m = andptr-and;
105 symgrow(s, p->pc+m);
106 memmove(s->p+p->pc, and, m);
107 p->mark = m;
108 c += m;
109 }
110 if(++n > 20) {
111 diag("span must be looping");
112 errorexit();
113 }
114 } while(loop);
115 s->size = c;
116
117 if(debug['a'] > 1) {
118 print("span1 %s %lld (%d tries)\n %.6ux", s->name, s->size, n, 0);
119 for(i=0; i<s->np; i++) {
120 print(" %.2ux", s->p[i]);
121 if(i%16 == 15)
122 print("\n %.6ux", i+1);
123 }
124 if(i%16)
125 print("\n");
126
127 for(i=0; i<s->nr; i++) {
128 Reloc *r;
129
130 r = &s->r[i];
131 print(" rel %#.4ux/%d %s%+lld\n", r->off, r->siz, r->sym->name, r->add);
132 }
133 }
134 }
135
136 void
137 span(void)
138 {
139 Prog *p, *q;
140 int32 v;
141 int n;
142
143 if(debug['v'])
144 Bprint(&bso, "%5.2f span\n", cputime());
145
146 // NOTE(rsc): If we get rid of the globals we should
147 // be able to parallelize these iterations.
148 for(cursym = textp; cursym != nil; cursym = cursym->next) {
149 if(cursym->p != nil)
150 continue;
151 // TODO: move into span1
152 for(p = cursym->text; p != P; p = p->link) {
153 n = 0;
154 if(p->to.type == D_BRANCH)
155 if(p->pcond == P)
156 p->pcond = p;
157 if((q = p->pcond) != P)
158 if(q->back != 2)
159 n = 1;
160 p->back = n;
161 if(p->as == AADJSP) {
162 p->to.type = D_SP;
163 v = -p->from.offset;
164 p->from.offset = v;
165 p->as = p->mode != 64? AADDL: AADDQ;
166 if(v < 0) {
167 p->as = p->mode != 64? ASUBL: ASUBQ;
168 v = -v;
169 p->from.offset = v;
170 }
171 if(v == 0)
172 p->as = ANOP;
173 }
174 }
175 span1(cursym);
176 }
177 }
178
179 void
180 xdefine(char *p, int t, vlong v)
181 {
182 Sym *s;
183
184 s = lookup(p, 0);
185 s->type = t;
186 s->value = v;
187 s->reachable = 1;
188 s->special = 1;
189 }
190
191 void
192 instinit(void)
193 {
194 int c, i;
195
196 for(i=1; optab[i].as; i++) {
197 c = optab[i].as;
198 if(opindex[c] != nil) {
199 diag("phase error in optab: %d (%A)", i, c);
200 errorexit();
201 }
202 opindex[c] = &optab[i];
203 }
204
205 for(i=0; i<Ymax; i++)
206 ycover[i*Ymax + i] = 1;
207
208 ycover[Yi0*Ymax + Yi8] = 1;
209 ycover[Yi1*Ymax + Yi8] = 1;
210
211 ycover[Yi0*Ymax + Ys32] = 1;
212 ycover[Yi1*Ymax + Ys32] = 1;
213 ycover[Yi8*Ymax + Ys32] = 1;
214
215 ycover[Yi0*Ymax + Yi32] = 1;
216 ycover[Yi1*Ymax + Yi32] = 1;
217 ycover[Yi8*Ymax + Yi32] = 1;
218 ycover[Ys32*Ymax + Yi32] = 1;
219
220 ycover[Yi0*Ymax + Yi64] = 1;
221 ycover[Yi1*Ymax + Yi64] = 1;
222 ycover[Yi8*Ymax + Yi64] = 1;
223 ycover[Ys32*Ymax + Yi64] = 1;
224 ycover[Yi32*Ymax + Yi64] = 1;
225
226 ycover[Yal*Ymax + Yrb] = 1;
227 ycover[Ycl*Ymax + Yrb] = 1;
228 ycover[Yax*Ymax + Yrb] = 1;
229 ycover[Ycx*Ymax + Yrb] = 1;
230 ycover[Yrx*Ymax + Yrb] = 1;
231 ycover[Yrl*Ymax + Yrb] = 1;
232
233 ycover[Ycl*Ymax + Ycx] = 1;
234
235 ycover[Yax*Ymax + Yrx] = 1;
236 ycover[Ycx*Ymax + Yrx] = 1;
237
238 ycover[Yax*Ymax + Yrl] = 1;
239 ycover[Ycx*Ymax + Yrl] = 1;
240 ycover[Yrx*Ymax + Yrl] = 1;
241
242 ycover[Yf0*Ymax + Yrf] = 1;
243
244 ycover[Yal*Ymax + Ymb] = 1;
245 ycover[Ycl*Ymax + Ymb] = 1;
246 ycover[Yax*Ymax + Ymb] = 1;
247 ycover[Ycx*Ymax + Ymb] = 1;
248 ycover[Yrx*Ymax + Ymb] = 1;
249 ycover[Yrb*Ymax + Ymb] = 1;
250 ycover[Yrl*Ymax + Ymb] = 1;
251 ycover[Ym*Ymax + Ymb] = 1;
252
253 ycover[Yax*Ymax + Yml] = 1;
254 ycover[Ycx*Ymax + Yml] = 1;
255 ycover[Yrx*Ymax + Yml] = 1;
256 ycover[Yrl*Ymax + Yml] = 1;
257 ycover[Ym*Ymax + Yml] = 1;
258
259 ycover[Yax*Ymax + Ymm] = 1;
260 ycover[Ycx*Ymax + Ymm] = 1;
261 ycover[Yrx*Ymax + Ymm] = 1;
262 ycover[Yrl*Ymax + Ymm] = 1;
263 ycover[Ym*Ymax + Ymm] = 1;
264 ycover[Ymr*Ymax + Ymm] = 1;
265
266 ycover[Yax*Ymax + Yxm] = 1;
267 ycover[Ycx*Ymax + Yxm] = 1;
268 ycover[Yrx*Ymax + Yxm] = 1;
269 ycover[Yrl*Ymax + Yxm] = 1;
270 ycover[Ym*Ymax + Yxm] = 1;
271 ycover[Yxr*Ymax + Yxm] = 1;
272
273 for(i=0; i<D_NONE; i++) {
274 reg[i] = -1;
275 if(i >= D_AL && i <= D_R15B) {
276 reg[i] = (i-D_AL) & 7;
277 if(i >= D_SPB && i <= D_DIB)
278 regrex[i] = 0x40;
279 if(i >= D_R8B && i <= D_R15B)
280 regrex[i] = Rxr | Rxx | Rxb;
281 }
282 if(i >= D_AH && i<= D_BH)
283 reg[i] = 4 + ((i-D_AH) & 7);
284 if(i >= D_AX && i <= D_R15) {
285 reg[i] = (i-D_AX) & 7;
286 if(i >= D_R8)
287 regrex[i] = Rxr | Rxx | Rxb;
288 }
289 if(i >= D_F0 && i <= D_F0+7)
290 reg[i] = (i-D_F0) & 7;
291 if(i >= D_M0 && i <= D_M0+7)
292 reg[i] = (i-D_M0) & 7;
293 if(i >= D_X0 && i <= D_X0+15) {
294 reg[i] = (i-D_X0) & 7;
295 if(i >= D_X0+8)
296 regrex[i] = Rxr | Rxx | Rxb;
297 }
298 if(i >= D_CR+8 && i <= D_CR+15)
299 regrex[i] = Rxr;
300 }
301 }
302
303 int
304 prefixof(Adr *a)
305 {
306 switch(a->type) {
307 case D_INDIR+D_CS:
308 return 0x2e;
309 case D_INDIR+D_DS:
310 return 0x3e;
311 case D_INDIR+D_ES:
312 return 0x26;
313 case D_INDIR+D_FS:
314 return 0x64;
315 case D_INDIR+D_GS:
316 return 0x65;
317 }
318 return 0;
319 }
320
321 int
322 oclass(Adr *a)
323 {
324 vlong v;
325 int32 l;
326
327 if(a->type >= D_INDIR || a->index != D_NONE) {
328 if(a->index != D_NONE && a->scale == 0) {
329 if(a->type == D_ADDR) {
330 switch(a->index) {
331 case D_EXTERN:
332 case D_STATIC:
333 return Yi32; /* TO DO: Yi64 */
334 case D_AUTO:
335 case D_PARAM:
336 return Yiauto;
337 }
338 return Yxxx;
339 }
340 return Ycol;
341 }
342 return Ym;
343 }
344 switch(a->type)
345 {
346 case D_AL:
347 return Yal;
348
349 case D_AX:
350 return Yax;
351
352 /*
353 case D_SPB:
354 */
355 case D_BPB:
356 case D_SIB:
357 case D_DIB:
358 case D_R8B:
359 case D_R9B:
360 case D_R10B:
361 case D_R11B:
362 case D_R12B:
363 case D_R13B:
364 case D_R14B:
365 case D_R15B:
366 if(asmode != 64)
367 return Yxxx;
368 case D_DL:
369 case D_BL:
370 case D_AH:
371 case D_CH:
372 case D_DH:
373 case D_BH:
374 return Yrb;
375
376 case D_CL:
377 return Ycl;
378
379 case D_CX:
380 return Ycx;
381
382 case D_DX:
383 case D_BX:
384 return Yrx;
385
386 case D_R8: /* not really Yrl */
387 case D_R9:
388 case D_R10:
389 case D_R11:
390 case D_R12:
391 case D_R13:
392 case D_R14:
393 case D_R15:
394 if(asmode != 64)
395 return Yxxx;
396 case D_SP:
397 case D_BP:
398 case D_SI:
399 case D_DI:
400 return Yrl;
401
402 case D_F0+0:
403 return Yf0;
404
405 case D_F0+1:
406 case D_F0+2:
407 case D_F0+3:
408 case D_F0+4:
409 case D_F0+5:
410 case D_F0+6:
411 case D_F0+7:
412 return Yrf;
413
414 case D_M0+0:
415 case D_M0+1:
416 case D_M0+2:
417 case D_M0+3:
418 case D_M0+4:
419 case D_M0+5:
420 case D_M0+6:
421 case D_M0+7:
422 return Ymr;
423
424 case D_X0+0:
425 case D_X0+1:
426 case D_X0+2:
427 case D_X0+3:
428 case D_X0+4:
429 case D_X0+5:
430 case D_X0+6:
431 case D_X0+7:
432 case D_X0+8:
433 case D_X0+9:
434 case D_X0+10:
435 case D_X0+11:
436 case D_X0+12:
437 case D_X0+13:
438 case D_X0+14:
439 case D_X0+15:
440 return Yxr;
441
442 case D_NONE:
443 return Ynone;
444
445 case D_CS: return Ycs;
446 case D_SS: return Yss;
447 case D_DS: return Yds;
448 case D_ES: return Yes;
449 case D_FS: return Yfs;
450 case D_GS: return Ygs;
451
452 case D_GDTR: return Ygdtr;
453 case D_IDTR: return Yidtr;
454 case D_LDTR: return Yldtr;
455 case D_MSW: return Ymsw;
456 case D_TASK: return Ytask;
457
458 case D_CR+0: return Ycr0;
459 case D_CR+1: return Ycr1;
460 case D_CR+2: return Ycr2;
461 case D_CR+3: return Ycr3;
462 case D_CR+4: return Ycr4;
463 case D_CR+5: return Ycr5;
464 case D_CR+6: return Ycr6;
465 case D_CR+7: return Ycr7;
466 case D_CR+8: return Ycr8;
467
468 case D_DR+0: return Ydr0;
469 case D_DR+1: return Ydr1;
470 case D_DR+2: return Ydr2;
471 case D_DR+3: return Ydr3;
472 case D_DR+4: return Ydr4;
473 case D_DR+5: return Ydr5;
474 case D_DR+6: return Ydr6;
475 case D_DR+7: return Ydr7;
476
477 case D_TR+0: return Ytr0;
478 case D_TR+1: return Ytr1;
479 case D_TR+2: return Ytr2;
480 case D_TR+3: return Ytr3;
481 case D_TR+4: return Ytr4;
482 case D_TR+5: return Ytr5;
483 case D_TR+6: return Ytr6;
484 case D_TR+7: return Ytr7;
485
486 case D_EXTERN:
487 case D_STATIC:
488 case D_AUTO:
489 case D_PARAM:
490 return Ym;
491
492 case D_CONST:
493 case D_ADDR:
494 if(a->sym == S) {
495 v = a->offset;
496 if(v == 0)
497 return Yi0;
498 if(v == 1)
499 return Yi1;
500 if(v >= -128 && v <= 127)
501 return Yi8;
502 l = v;
503 if((vlong)l == v)
504 return Ys32; /* can sign extend */
505 if((v>>32) == 0)
506 return Yi32; /* unsigned */
507 return Yi64;
508 }
509 return Yi32; /* TO DO: D_ADDR as Yi64 */
510
511 case D_BRANCH:
512 return Ybr;
513 }
514 return Yxxx;
515 }
516
517 void
518 asmidx(int scale, int index, int base)
519 {
520 int i;
521
522 switch(index) {
523 default:
524 goto bad;
525
526 case D_NONE:
527 i = 4 << 3;
528 goto bas;
529
530 case D_R8:
531 case D_R9:
532 case D_R10:
533 case D_R11:
534 case D_R12:
535 case D_R13:
536 case D_R14:
537 case D_R15:
538 if(asmode != 64)
539 goto bad;
540 case D_AX:
541 case D_CX:
542 case D_DX:
543 case D_BX:
544 case D_BP:
545 case D_SI:
546 case D_DI:
547 i = reg[index] << 3;
548 break;
549 }
550 switch(scale) {
551 default:
552 goto bad;
553 case 1:
554 break;
555 case 2:
556 i |= (1<<6);
557 break;
558 case 4:
559 i |= (2<<6);
560 break;
561 case 8:
562 i |= (3<<6);
563 break;
564 }
565 bas:
566 switch(base) {
567 default:
568 goto bad;
569 case D_NONE: /* must be mod=00 */
570 i |= 5;
571 break;
572 case D_R8:
573 case D_R9:
574 case D_R10:
575 case D_R11:
576 case D_R12:
577 case D_R13:
578 case D_R14:
579 case D_R15:
580 if(asmode != 64)
581 goto bad;
582 case D_AX:
583 case D_CX:
584 case D_DX:
585 case D_BX:
586 case D_SP:
587 case D_BP:
588 case D_SI:
589 case D_DI:
590 i |= reg[base];
591 break;
592 }
593 *andptr++ = i;
594 return;
595 bad:
596 diag("asmidx: bad address %d/%d/%d", scale, index, base);
597 *andptr++ = 0;
598 return;
599 }
600
601 static void
602 put4(int32 v)
603 {
604 andptr[0] = v;
605 andptr[1] = v>>8;
606 andptr[2] = v>>16;
607 andptr[3] = v>>24;
608 andptr += 4;
609 }
610
611 static void
612 relput4(Prog *p, Adr *a)
613 {
614 vlong v;
615 Reloc rel, *r;
616
617 v = vaddr(a, &rel);
618 if(rel.siz != 0) {
619 if(rel.siz != 4)
620 diag("bad reloc");
621 r = addrel(cursym);
622 *r = rel;
623 r->off = p->pc + andptr - and;
624 }
625 put4(v);
626 }
627
628 static void
629 put8(vlong v)
630 {
631 andptr[0] = v;
632 andptr[1] = v>>8;
633 andptr[2] = v>>16;
634 andptr[3] = v>>24;
635 andptr[4] = v>>32;
636 andptr[5] = v>>40;
637 andptr[6] = v>>48;
638 andptr[7] = v>>56;
639 andptr += 8;
640 }
641
642 /*
643 static void
644 relput8(Prog *p, Adr *a)
645 {
646 vlong v;
647 Reloc rel, *r;
648
649 v = vaddr(a, &rel);
650 if(rel.siz != 0) {
651 r = addrel(cursym);
652 *r = rel;
653 r->siz = 8;
654 r->off = p->pc + andptr - and;
655 }
656 put8(v);
657 }
658 */
659
660 vlong
661 symaddr(Sym *s)
662 {
663 if(!s->reachable)
664 diag("unreachable symbol in symaddr - %s", s->name);
665 return s->value;
666 }
667
668 static vlong
669 vaddr(Adr *a, Reloc *r)
670 {
671 int t;
672 vlong v;
673 Sym *s;
674
675 if(r != nil)
676 memset(r, 0, sizeof *r);
677
678 t = a->type;
679 v = a->offset;
680 if(t == D_ADDR)
681 t = a->index;
682 switch(t) {
683 case D_STATIC:
684 case D_EXTERN:
685 s = a->sym;
686 if(!s->reachable)
687 diag("unreachable symbol in vaddr - %s", s->name);
688 if(r == nil) {
689 diag("need reloc for %D", a);
690 errorexit();
691 }
692 r->type = D_ADDR;
693 r->siz = 4; // TODO: 8 for external symbols
694 r->off = -1; // caller must fill in
695 r->sym = s;
696 r->add = v;
697 v = 0;
698 }
699 return v;
700 }
701
702 static void
703 asmandsz(Adr *a, int r, int rex, int m64)
704 {
705 int32 v;
706 int t, scale;
707 Reloc rel;
708
709 USED(m64);
710 rex &= (0x40 | Rxr);
711 v = a->offset;
712 t = a->type;
713 rel.siz = 0;
714 if(a->index != D_NONE) {
715 if(t < D_INDIR) {
716 switch(t) {
717 default:
718 goto bad;
719 case D_STATIC:
720 case D_EXTERN:
721 t = D_NONE;
722 v = vaddr(a, &rel);
723 break;
724 case D_AUTO:
725 case D_PARAM:
726 t = D_SP;
727 break;
728 }
729 } else
730 t -= D_INDIR;
731 rexflag |= (regrex[(int)a->index] & Rxx) | (regrex[t] & Rxb) | rex;
732 if(t == D_NONE) {
733 *andptr++ = (0 << 6) | (4 << 0) | (r << 3);
734 asmidx(a->scale, a->index, t);
735 goto putrelv;
736 }
737 if(v == 0 && rel.siz == 0 && t != D_BP && t != D_R13) {
738 *andptr++ = (0 << 6) | (4 << 0) | (r << 3);
739 asmidx(a->scale, a->index, t);
740 return;
741 }
742 if(v >= -128 && v < 128 && rel.siz == 0) {
743 *andptr++ = (1 << 6) | (4 << 0) | (r << 3);
744 asmidx(a->scale, a->index, t);
745 *andptr++ = v;
746 return;
747 }
748 *andptr++ = (2 << 6) | (4 << 0) | (r << 3);
749 asmidx(a->scale, a->index, t);
750 goto putrelv;
751 }
752 if(t >= D_AL && t <= D_X0+15) {
753 if(v)
754 goto bad;
755 *andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
756 rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
757 return;
758 }
759
760 scale = a->scale;
761 if(t < D_INDIR) {
762 switch(a->type) {
763 default:
764 goto bad;
765 case D_STATIC:
766 case D_EXTERN:
767 t = D_NONE;
768 v = vaddr(a, &rel);
769 break;
770 case D_AUTO:
771 case D_PARAM:
772 t = D_SP;
773 break;
774 }
775 scale = 1;
776 } else
777 t -= D_INDIR;
778
779 rexflag |= (regrex[t] & Rxb) | rex;
780 if(t == D_NONE || (D_CS <= t && t <= D_GS)) {
781 if(asmode != 64){
782 *andptr++ = (0 << 6) | (5 << 0) | (r << 3);
783 goto putrelv;
784 }
785 /* temporary */
786 *andptr++ = (0 << 6) | (4 << 0) | (r << 3); /* sib present */
787 *andptr++ = (0 << 6) | (4 << 3) | (5 << 0); /* DS:d32 */
788 goto putrelv;
789 }
790 if(t == D_SP || t == D_R12) {
791 if(v == 0) {
792 *andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
793 asmidx(scale, D_NONE, t);
794 return;
795 }
796 if(v >= -128 && v < 128) {
797 *andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3);
798 asmidx(scale, D_NONE, t);
799 *andptr++ = v;
800 return;
801 }
802 *andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
803 asmidx(scale, D_NONE, t);
804 goto putrelv;
805 }
806 if(t >= D_AX && t <= D_R15) {
807 if(v == 0 && t != D_BP && t != D_R13) {
808 *andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
809 return;
810 }
811 if(v >= -128 && v < 128) {
812 andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
813 andptr[1] = v;
814 andptr += 2;
815 return;
816 }
817 *andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
818 goto putrelv;
819 }
820 goto bad;
821
822 putrelv:
823 if(rel.siz != 0) {
824 Reloc *r;
825
826 if(rel.siz != 4) {
827 diag("bad rel");
828 goto bad;
829 }
830 r = addrel(cursym);
831 *r = rel;
832 r->off = curp->pc + andptr - and;
833 }
834 put4(v);
835 return;
836
837 bad:
838 diag("asmand: bad address %D", a);
839 return;
840 }
841
842 void
843 asmand(Adr *a, Adr *ra)
844 {
845 asmandsz(a, reg[ra->type], regrex[ra->type], 0);
846 }
847
848 void
849 asmando(Adr *a, int o)
850 {
851 asmandsz(a, o, 0, 0);
852 }
853
854 static void
855 bytereg(Adr *a, char *t)
856 {
857 if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15)) {
858 a->type = D_AL + (a->type-D_AX);
859 *t = 0;
860 }
861 }
862
863 #define E 0xff
864 Movtab ymovtab[] =
865 {
866 /* push */
867 {APUSHL, Ycs, Ynone, 0, 0x0e,E,0,0},
868 {APUSHL, Yss, Ynone, 0, 0x16,E,0,0},
869 {APUSHL, Yds, Ynone, 0, 0x1e,E,0,0},
870 {APUSHL, Yes, Ynone, 0, 0x06,E,0,0},
871 {APUSHL, Yfs, Ynone, 0, 0x0f,0xa0,E,0},
872 {APUSHL, Ygs, Ynone, 0, 0x0f,0xa8,E,0},
873 {APUSHQ, Yfs, Ynone, 0, 0x0f,0xa0,E,0},
874 {APUSHQ, Ygs, Ynone, 0, 0x0f,0xa8,E,0},
875
876 {APUSHW, Ycs, Ynone, 0, Pe,0x0e,E,0},
877 {APUSHW, Yss, Ynone, 0, Pe,0x16,E,0},
878 {APUSHW, Yds, Ynone, 0, Pe,0x1e,E,0},
879 {APUSHW, Yes, Ynone, 0, Pe,0x06,E,0},
880 {APUSHW, Yfs, Ynone, 0, Pe,0x0f,0xa0,E},
881 {APUSHW, Ygs, Ynone, 0, Pe,0x0f,0xa8,E},
882
883 /* pop */
884 {APOPL, Ynone, Yds, 0, 0x1f,E,0,0},
885 {APOPL, Ynone, Yes, 0, 0x07,E,0,0},
886 {APOPL, Ynone, Yss, 0, 0x17,E,0,0},
887 {APOPL, Ynone, Yfs, 0, 0x0f,0xa1,E,0},
888 {APOPL, Ynone, Ygs, 0, 0x0f,0xa9,E,0},
889 {APOPQ, Ynone, Yfs, 0, 0x0f,0xa1,E,0},
890 {APOPQ, Ynone, Ygs, 0, 0x0f,0xa9,E,0},
891
892 {APOPW, Ynone, Yds, 0, Pe,0x1f,E,0},
893 {APOPW, Ynone, Yes, 0, Pe,0x07,E,0},
894 {APOPW, Ynone, Yss, 0, Pe,0x17,E,0},
895 {APOPW, Ynone, Yfs, 0, Pe,0x0f,0xa1,E},
896 {APOPW, Ynone, Ygs, 0, Pe,0x0f,0xa9,E},
897
898 /* mov seg */
899 {AMOVW, Yes, Yml, 1, 0x8c,0,0,0},
900 {AMOVW, Ycs, Yml, 1, 0x8c,1,0,0},
901 {AMOVW, Yss, Yml, 1, 0x8c,2,0,0},
902 {AMOVW, Yds, Yml, 1, 0x8c,3,0,0},
903 {AMOVW, Yfs, Yml, 1, 0x8c,4,0,0},
904 {AMOVW, Ygs, Yml, 1, 0x8c,5,0,0},
905
906 {AMOVW, Yml, Yes, 2, 0x8e,0,0,0},
907 {AMOVW, Yml, Ycs, 2, 0x8e,1,0,0},
908 {AMOVW, Yml, Yss, 2, 0x8e,2,0,0},
909 {AMOVW, Yml, Yds, 2, 0x8e,3,0,0},
910 {AMOVW, Yml, Yfs, 2, 0x8e,4,0,0},
911 {AMOVW, Yml, Ygs, 2, 0x8e,5,0,0},
912
913 /* mov cr */
914 {AMOVL, Ycr0, Yml, 3, 0x0f,0x20,0,0},
915 {AMOVL, Ycr2, Yml, 3, 0x0f,0x20,2,0},
916 {AMOVL, Ycr3, Yml, 3, 0x0f,0x20,3,0},
917 {AMOVL, Ycr4, Yml, 3, 0x0f,0x20,4,0},
918 {AMOVL, Ycr8, Yml, 3, 0x0f,0x20,8,0},
919 {AMOVQ, Ycr0, Yml, 3, 0x0f,0x20,0,0},
920 {AMOVQ, Ycr2, Yml, 3, 0x0f,0x20,2,0},
921 {AMOVQ, Ycr3, Yml, 3, 0x0f,0x20,3,0},
922 {AMOVQ, Ycr4, Yml, 3, 0x0f,0x20,4,0},
923 {AMOVQ, Ycr8, Yml, 3, 0x0f,0x20,8,0},
924
925 {AMOVL, Yml, Ycr0, 4, 0x0f,0x22,0,0},
926 {AMOVL, Yml, Ycr2, 4, 0x0f,0x22,2,0},
927 {AMOVL, Yml, Ycr3, 4, 0x0f,0x22,3,0},
928 {AMOVL, Yml, Ycr4, 4, 0x0f,0x22,4,0},
929 {AMOVL, Yml, Ycr8, 4, 0x0f,0x22,8,0},
930 {AMOVQ, Yml, Ycr0, 4, 0x0f,0x22,0,0},
931 {AMOVQ, Yml, Ycr2, 4, 0x0f,0x22,2,0},
932 {AMOVQ, Yml, Ycr3, 4, 0x0f,0x22,3,0},
933 {AMOVQ, Yml, Ycr4, 4, 0x0f,0x22,4,0},
934 {AMOVQ, Yml, Ycr8, 4, 0x0f,0x22,8,0},
935
936 /* mov dr */
937 {AMOVL, Ydr0, Yml, 3, 0x0f,0x21,0,0},
938 {AMOVL, Ydr6, Yml, 3, 0x0f,0x21,6,0},
939 {AMOVL, Ydr7, Yml, 3, 0x0f,0x21,7,0},
940 {AMOVQ, Ydr0, Yml, 3, 0x0f,0x21,0,0},
941 {AMOVQ, Ydr6, Yml, 3, 0x0f,0x21,6,0},
942 {AMOVQ, Ydr7, Yml, 3, 0x0f,0x21,7,0},
943
944 {AMOVL, Yml, Ydr0, 4, 0x0f,0x23,0,0},
945 {AMOVL, Yml, Ydr6, 4, 0x0f,0x23,6,0},
946 {AMOVL, Yml, Ydr7, 4, 0x0f,0x23,7,0},
947 {AMOVQ, Yml, Ydr0, 4, 0x0f,0x23,0,0},
948 {AMOVQ, Yml, Ydr6, 4, 0x0f,0x23,6,0},
949 {AMOVQ, Yml, Ydr7, 4, 0x0f,0x23,7,0},
950
951 /* mov tr */
952 {AMOVL, Ytr6, Yml, 3, 0x0f,0x24,6,0},
953 {AMOVL, Ytr7, Yml, 3, 0x0f,0x24,7,0},
954
955 {AMOVL, Yml, Ytr6, 4, 0x0f,0x26,6,E},
956 {AMOVL, Yml, Ytr7, 4, 0x0f,0x26,7,E},
957
958 /* lgdt, sgdt, lidt, sidt */
959 {AMOVL, Ym, Ygdtr, 4, 0x0f,0x01,2,0},
960 {AMOVL, Ygdtr, Ym, 3, 0x0f,0x01,0,0},
961 {AMOVL, Ym, Yidtr, 4, 0x0f,0x01,3,0},
962 {AMOVL, Yidtr, Ym, 3, 0x0f,0x01,1,0},
963 {AMOVQ, Ym, Ygdtr, 4, 0x0f,0x01,2,0},
964 {AMOVQ, Ygdtr, Ym, 3, 0x0f,0x01,0,0},
965 {AMOVQ, Ym, Yidtr, 4, 0x0f,0x01,3,0},
966 {AMOVQ, Yidtr, Ym, 3, 0x0f,0x01,1,0},
967
968 /* lldt, sldt */
969 {AMOVW, Yml, Yldtr, 4, 0x0f,0x00,2,0},
970 {AMOVW, Yldtr, Yml, 3, 0x0f,0x00,0,0},
971
972 /* lmsw, smsw */
973 {AMOVW, Yml, Ymsw, 4, 0x0f,0x01,6,0},
974 {AMOVW, Ymsw, Yml, 3, 0x0f,0x01,4,0},
975
976 /* ltr, str */
977 {AMOVW, Yml, Ytask, 4, 0x0f,0x00,3,0},
978 {AMOVW, Ytask, Yml, 3, 0x0f,0x00,1,0},
979
980 /* load full pointer */
981 {AMOVL, Yml, Ycol, 5, 0,0,0,0},
982 {AMOVW, Yml, Ycol, 5, Pe,0,0,0},
983
984 /* double shift */
985 {ASHLL, Ycol, Yml, 6, 0xa4,0xa5,0,0},
986 {ASHRL, Ycol, Yml, 6, 0xac,0xad,0,0},
987 {ASHLQ, Ycol, Yml, 6, Pw,0xa4,0xa5,0},
988 {ASHRQ, Ycol, Yml, 6, Pw,0xac,0xad,0},
989 {ASHLW, Ycol, Yml, 6, Pe,0xa4,0xa5,0},
990 {ASHRW, Ycol, Yml, 6, Pe,0xac,0xad,0},
991 0
992 };
993
994 int
995 isax(Adr *a)
996 {
997
998 switch(a->type) {
999 case D_AX:
1000 case D_AL:
1001 case D_AH:
1002 case D_INDIR+D_AX:
1003 return 1;
1004 }
1005 if(a->index == D_AX)
1006 return 1;
1007 return 0;
1008 }
1009
1010 void
1011 subreg(Prog *p, int from, int to)
1012 {
1013
1014 if(debug['Q'])
1015 print("\n%P s/%R/%R/\n", p, from, to);
1016
1017 if(p->from.type == from)
1018 p->from.type = to;
1019 if(p->to.type == from)
1020 p->to.type = to;
1021
1022 if(p->from.index == from)
1023 p->from.index = to;
1024 if(p->to.index == from)
1025 p->to.index = to;
1026
1027 from += D_INDIR;
1028 if(p->from.type == from)
1029 p->from.type = to+D_INDIR;
1030 if(p->to.type == from)
1031 p->to.type = to+D_INDIR;
1032
1033 if(debug['Q'])
1034 print("%P\n", p);
1035 }
1036
1037 static int
1038 mediaop(Optab *o, int op, int osize, int z)
1039 {
1040 switch(op){
1041 case Pm:
1042 case Pe:
1043 case Pf2:
1044 case Pf3:
1045 if(osize != 1){
1046 if(op != Pm)
1047 *andptr++ = op;
1048 *andptr++ = Pm;
1049 op = o->op[++z];
1050 break;
1051 }
1052 default:
1053 if(andptr == and || andptr[-1] != Pm)
1054 *andptr++ = Pm;
1055 break;
1056 }
1057 *andptr++ = op;
1058 return z;
1059 }
1060
1061 void
1062 doasm(Prog *p)
1063 {
1064 Optab *o;
1065 Prog *q, pp;
1066 uchar *t;
1067 Movtab *mo;
1068 int z, op, ft, tt, xo, l, pre;
1069 vlong v;
1070 Reloc rel, *r;
1071 Adr *a;
1072
1073 curp = p; // TODO
1074
1075 o = opindex[p->as];
1076 if(o == nil) {
1077 diag("asmins: missing op %P", p);
1078 return;
1079 }
1080
1081 pre = prefixof(&p->from);
1082 if(pre)
1083 *andptr++ = pre;
1084 pre = prefixof(&p->to);
1085 if(pre)
1086 *andptr++ = pre;
1087
1088 if(p->ft == 0)
1089 p->ft = oclass(&p->from);
1090 if(p->tt == 0)
1091 p->tt = oclass(&p->to);
1092
1093 ft = p->ft * Ymax;
1094 tt = p->tt * Ymax;
1095
1096 t = o->ytab;
1097 if(t == 0) {
1098 diag("asmins: noproto %P", p);
1099 return;
1100 }
1101 xo = o->op[0] == 0x0f;
1102 for(z=0; *t; z+=t[3]+xo,t+=4)
1103 if(ycover[ft+t[0]])
1104 if(ycover[tt+t[1]])
1105 goto found;
1106 goto domov;
1107
1108 found:
1109 switch(o->prefix) {
1110 case Pq: /* 16 bit escape and opcode escape */
1111 *andptr++ = Pe;
1112 *andptr++ = Pm;
1113 break;
1114
1115 case Pf2: /* xmm opcode escape */
1116 case Pf3:
1117 *andptr++ = o->prefix;
1118 *andptr++ = Pm;
1119 break;
1120
1121 case Pm: /* opcode escape */
1122 *andptr++ = Pm;
1123 break;
1124
1125 case Pe: /* 16 bit escape */
1126 *andptr++ = Pe;
1127 break;
1128
1129 case Pw: /* 64-bit escape */
1130 if(p->mode != 64)
1131 diag("asmins: illegal 64: %P", p);
1132 rexflag |= Pw;
1133 break;
1134
1135 case Pb: /* botch */
1136 bytereg(&p->from, &p->ft);
1137 bytereg(&p->to, &p->tt);
1138 break;
1139
1140 case P32: /* 32 bit but illegal if 64-bit mode */
1141 if(p->mode == 64)
1142 diag("asmins: illegal in 64-bit mode: %P", p);
1143 break;
1144
1145 case Py: /* 64-bit only, no prefix */
1146 if(p->mode != 64)
1147 diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
1148 break;
1149 }
1150
1151 op = o->op[z];
1152 if(op == 0x0f) {
1153 *andptr++ = op;
1154 op = o->op[++z];
1155 }
1156 switch(t[2]) {
1157 default:
1158 diag("asmins: unknown z %d %P", t[2], p);
1159 return;
1160
1161 case Zpseudo:
1162 break;
1163
1164 case Zlit:
1165 for(; op = o->op[z]; z++)
1166 *andptr++ = op;
1167 break;
1168
1169 case Zlitm_r:
1170 for(; op = o->op[z]; z++)
1171 *andptr++ = op;
1172 asmand(&p->from, &p->to);
1173 break;
1174
1175 case Zmb_r:
1176 bytereg(&p->from, &p->ft);
1177 /* fall through */
1178 case Zm_r:
1179 *andptr++ = op;
1180 asmand(&p->from, &p->to);
1181 break;
1182
1183 case Zm_r_xm:
1184 mediaop(o, op, t[3], z);
1185 asmand(&p->from, &p->to);
1186 break;
1187
1188 case Zm_r_xm_nr:
1189 rexflag = 0;
1190 mediaop(o, op, t[3], z);
1191 asmand(&p->from, &p->to);
1192 break;
1193
1194 case Zm_r_i_xm:
1195 mediaop(o, op, t[3], z);
1196 asmand(&p->from, &p->to);
1197 *andptr++ = p->to.offset;
1198 break;
1199
1200 case Zm_r_3d:
1201 *andptr++ = 0x0f;
1202 *andptr++ = 0x0f;
1203 asmand(&p->from, &p->to);
1204 *andptr++ = op;
1205 break;
1206
1207 case Zibm_r:
1208 *andptr++ = op;
1209 asmand(&p->from, &p->to);
1210 *andptr++ = p->to.offset;
1211 break;
1212
1213 case Zaut_r:
1214 *andptr++ = 0x8d; /* leal */
1215 if(p->from.type != D_ADDR)
1216 diag("asmins: Zaut sb type ADDR");
1217 p->from.type = p->from.index;
1218 p->from.index = D_NONE;
1219 asmand(&p->from, &p->to);
1220 p->from.index = p->from.type;
1221 p->from.type = D_ADDR;
1222 break;
1223
1224 case Zm_o:
1225 *andptr++ = op;
1226 asmando(&p->from, o->op[z+1]);
1227 break;
1228
1229 case Zr_m:
1230 *andptr++ = op;
1231 asmand(&p->to, &p->from);
1232 break;
1233
1234 case Zr_m_xm:
1235 mediaop(o, op, t[3], z);
1236 asmand(&p->to, &p->from);
1237 break;
1238
1239 case Zr_m_xm_nr:
1240 rexflag = 0;
1241 mediaop(o, op, t[3], z);
1242 asmand(&p->to, &p->from);
1243 break;
1244
1245 case Zr_m_i_xm:
1246 mediaop(o, op, t[3], z);
1247 asmand(&p->to, &p->from);
1248 *andptr++ = p->from.offset;
1249 break;
1250
1251 case Zo_m:
1252 *andptr++ = op;
1253 asmando(&p->to, o->op[z+1]);
1254 break;
1255
1256 case Zo_m64:
1257 *andptr++ = op;
1258 asmandsz(&p->to, o->op[z+1], 0, 1);
1259 break;
1260
1261 case Zm_ibo:
1262 *andptr++ = op;
1263 asmando(&p->from, o->op[z+1]);
1264 *andptr++ = vaddr(&p->to, nil);
1265 break;
1266
1267 case Zibo_m:
1268 *andptr++ = op;
1269 asmando(&p->to, o->op[z+1]);
1270 *andptr++ = vaddr(&p->from, nil);
1271 break;
1272
1273 case Zibo_m_xm:
1274 z = mediaop(o, op, t[3], z);
1275 asmando(&p->to, o->op[z+1]);
1276 *andptr++ = vaddr(&p->from, nil);
1277 break;
1278
1279 case Z_ib:
1280 case Zib_:
1281 if(t[2] == Zib_)
1282 a = &p->from;
1283 else
1284 a = &p->to;
1285 *andptr++ = op;
1286 *andptr++ = vaddr(a, nil);
1287 break;
1288
1289 case Zib_rp:
1290 rexflag |= regrex[p->to.type] & (Rxb|0x40);
1291 *andptr++ = op + reg[p->to.type];
1292 *andptr++ = vaddr(&p->from, nil);
1293 break;
1294
1295 case Zil_rp:
1296 rexflag |= regrex[p->to.type] & Rxb;
1297 *andptr++ = op + reg[p->to.type];
1298 if(o->prefix == Pe) {
1299 v = vaddr(&p->from, nil);
1300 *andptr++ = v;
1301 *andptr++ = v>>8;
1302 }
1303 else
1304 relput4(p, &p->from);
1305 break;
1306
1307 case Zo_iw:
1308 *andptr++ = op;
1309 if(p->from.type != D_NONE){
1310 v = vaddr(&p->from, nil);
1311 *andptr++ = v;
1312 *andptr++ = v>>8;
1313 }
1314 break;
1315
1316 case Ziq_rp:
1317 v = vaddr(&p->from, &rel);
1318 l = v>>32;
1319 if(l == 0 && rel.siz != 8){
1320 //p->mark |= 0100;
1321 //print("zero: %llux %P\n", v, p);
1322 rexflag &= ~(0x40|Rxw);
1323 rexflag |= regrex[p->to.type] & Rxb;
1324 *andptr++ = 0xb8 + reg[p->to.type];
1325 if(rel.type != 0) {
1326 r = addrel(cursym);
1327 *r = rel;
1328 r->off = p->pc + andptr - and;
1329 }
1330 put4(v);
1331 }else if(l == -1 && (v&((uvlong)1<<31))!=0){ /* sign extend */
1332 //p->mark |= 0100;
1333 //print("sign: %llux %P\n", v, p);
1334 *andptr ++ = 0xc7;
1335 asmando(&p->to, 0);
1336 put4(v);
1337 }else{ /* need all 8 */
1338 //print("all: %llux %P\n", v, p);
1339 rexflag |= regrex[p->to.type] & Rxb;
1340 *andptr++ = op + reg[p->to.type];
1341 if(rel.type != 0) {
1342 r = addrel(cursym);
1343 *r = rel;
1344 r->off = p->pc + andptr - and;
1345 }
1346 put8(v);
1347 }
1348 break;
1349
1350 case Zib_rr:
1351 *andptr++ = op;
1352 asmand(&p->to, &p->to);
1353 *andptr++ = vaddr(&p->from, nil);
1354 break;
1355
1356 case Z_il:
1357 case Zil_:
1358 if(t[2] == Zil_)
1359 a = &p->from;
1360 else
1361 a = &p->to;
1362 *andptr++ = op;
1363 if(o->prefix == Pe) {
1364 v = vaddr(a, nil);
1365 *andptr++ = v;
1366 *andptr++ = v>>8;
1367 }
1368 else
1369 relput4(p, a);
1370 break;
1371
1372 case Zm_ilo:
1373 case Zilo_m:
1374 *andptr++ = op;
1375 if(t[2] == Zilo_m) {
1376 a = &p->from;
1377 asmando(&p->to, o->op[z+1]);
1378 } else {
1379 a = &p->to;
1380 asmando(&p->from, o->op[z+1]);
1381 }
1382 if(o->prefix == Pe) {
1383 v = vaddr(a, nil);
1384 *andptr++ = v;
1385 *andptr++ = v>>8;
1386 }
1387 else
1388 relput4(p, a);
1389 break;
1390
1391 case Zil_rr:
1392 *andptr++ = op;
1393 asmand(&p->to, &p->to);
1394 if(o->prefix == Pe) {
1395 v = vaddr(&p->from, nil);
1396 *andptr++ = v;
1397 *andptr++ = v>>8;
1398 }
1399 else
1400 relput4(p, &p->from);
1401 break;
1402
1403 case Z_rp:
1404 rexflag |= regrex[p->to.type] & (Rxb|0x40);
1405 *andptr++ = op + reg[p->to.type];
1406 break;
1407
1408 case Zrp_:
1409 rexflag |= regrex[p->from.type] & (Rxb|0x40);
1410 *andptr++ = op + reg[p->from.type];
1411 break;
1412
1413 case Zclr:
1414 *andptr++ = op;
1415 asmand(&p->to, &p->to);
1416 break;
1417
1418 case Zcall:
1419 q = p->pcond;
1420 if(q == nil) {
1421 diag("call without target");
1422 errorexit();
1423 }
1424 if(q->as != ATEXT) {
1425 // Could handle this case by making D_PCREL
1426 // record the Prog* instead of the Sym*, but let's
1427 // wait until the need arises.
1428 diag("call of non-TEXT %P", q);
1429 errorexit();
1430 }
1431 *andptr++ = op;
1432 r = addrel(cursym);
1433 r->off = p->pc + andptr - and;
1434 r->sym = q->from.sym;
1435 r->type = D_PCREL;
1436 r->siz = 4;
1437 put4(0);
1438 break;
1439
1440 case Zbr:
1441 case Zjmp:
1442 // TODO: jump across functions needs reloc
1443 q = p->pcond;
1444 if(q == nil) {
1445 diag("jmp/branch without target");
1446 errorexit();
1447 }
1448 if(q->as == ATEXT) {
1449 if(t[2] == Zbr) {
1450 diag("branch to ATEXT");
1451 errorexit();
1452 }
1453 *andptr++ = o->op[z+1];
1454 r = addrel(cursym);
1455 r->off = p->pc + andptr - and;
1456 r->sym = q->from.sym;
1457 r->type = D_PCREL;
1458 r->siz = 4;
1459 put4(0);
1460 break;
1461 }
1462 // Assumes q is in this function.
1463 // TODO: Check in input, preserve in brchain.
1464
1465 // Fill in backward jump now.
1466 if(p->back & 1) {
1467 v = q->pc - (p->pc + 2);
1468 if(v >= -128) {
1469 *andptr++ = op;
1470 *andptr++ = v;
1471 } else {
1472 v -= 5-2;
1473 if(t[2] == Zbr) {
1474 *andptr++ = 0x0f;
1475 v--;
1476 }
1477 *andptr++ = o->op[z+1];
1478 *andptr++ = v;
1479 *andptr++ = v>>8;
1480 *andptr++ = v>>16;
1481 *andptr++ = v>>24;
1482 }
1483 break;
1484 }
1485
1486 // Annotate target; will fill in later.
1487 p->forwd = q->comefrom;
1488 q->comefrom = p;
1489 if(p->back & 2) { // short
1490 *andptr++ = op;
1491 *andptr++ = 0;
1492 } else {
1493 if(t[2] == Zbr)
1494 *andptr++ = 0x0f;
1495 *andptr++ = o->op[z+1];
1496 *andptr++ = 0;
1497 *andptr++ = 0;
1498 *andptr++ = 0;
1499 *andptr++ = 0;
1500 }
1501 break;
1502
1503 /*
1504 v = q->pc - p->pc - 2;
1505 if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
1506 *andptr++ = op;
1507 *andptr++ = v;
1508 } else {
1509 v -= 5-2;
1510 if(t[2] == Zbr) {
1511 *andptr++ = 0x0f;
1512 v--;
1513 }
1514 *andptr++ = o->op[z+1];
1515 *andptr++ = v;
1516 *andptr++ = v>>8;
1517 *andptr++ = v>>16;
1518 *andptr++ = v>>24;
1519 }
1520 */
1521 break;
1522
1523 case Zloop:
1524 q = p->pcond;
1525 if(q == nil) {
1526 diag("loop without target");
1527 errorexit();
1528 }
1529 v = q->pc - p->pc - 2;
1530 if(v < -128 && v > 127)
1531 diag("loop too far: %P", p);
1532 *andptr++ = op;
1533 *andptr++ = v;
1534 break;
1535
1536 case Zbyte:
1537 v = vaddr(&p->from, &rel);
1538 if(rel.siz != 0) {
1539 rel.siz = op;
1540 r = addrel(cursym);
1541 *r = rel;
1542 r->off = p->pc + andptr - and;
1543 }
1544 *andptr++ = v;
1545 if(op > 1) {
1546 *andptr++ = v>>8;
1547 if(op > 2) {
1548 *andptr++ = v>>16;
1549 *andptr++ = v>>24;
1550 if(op > 4) {
1551 *andptr++ = v>>32;
1552 *andptr++ = v>>40;
1553 *andptr++ = v>>48;
1554 *andptr++ = v>>56;
1555 }
1556 }
1557 }
1558 break;
1559 }
1560 return;
1561
1562 domov:
1563 for(mo=ymovtab; mo->as; mo++)
1564 if(p->as == mo->as)
1565 if(ycover[ft+mo->ft])
1566 if(ycover[tt+mo->tt]){
1567 t = mo->op;
1568 goto mfound;
1569 }
1570 bad:
1571 if(p->mode != 64){
1572 /*
1573 * here, the assembly has failed.
1574 * if its a byte instruction that has
1575 * unaddressable registers, try to
1576 * exchange registers and reissue the
1577 * instruction with the operands renamed.
1578 */
1579 pp = *p;
1580 z = p->from.type;
1581 if(z >= D_BP && z <= D_DI) {
1582 if(isax(&p->to)) {
1583 *andptr++ = 0x87; /* xchg lhs,bx */
1584 asmando(&p->from, reg[D_BX]);
1585 subreg(&pp, z, D_BX);
1586 doasm(&pp);
1587 *andptr++ = 0x87; /* xchg lhs,bx */
1588 asmando(&p->from, reg[D_BX]);
1589 } else {
1590 *andptr++ = 0x90 + reg[z]; /* xchg lsh,ax */
1591 subreg(&pp, z, D_AX);
1592 doasm(&pp);
1593 *andptr++ = 0x90 + reg[z]; /* xchg lsh,ax */
1594 }
1595 return;
1596 }
1597 z = p->to.type;
1598 if(z >= D_BP && z <= D_DI) {
1599 if(isax(&p->from)) {
1600 *andptr++ = 0x87; /* xchg rhs,bx */
1601 asmando(&p->to, reg[D_BX]);
1602 subreg(&pp, z, D_BX);
1603 doasm(&pp);
1604 *andptr++ = 0x87; /* xchg rhs,bx */
1605 asmando(&p->to, reg[D_BX]);
1606 } else {
1607 *andptr++ = 0x90 + reg[z]; /* xchg rsh,ax */
1608 subreg(&pp, z, D_AX);
1609 doasm(&pp);
1610 *andptr++ = 0x90 + reg[z]; /* xchg rsh,ax */
1611 }
1612 return;
1613 }
1614 }
1615 diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p);
1616 return;
1617
1618 mfound:
1619 switch(mo->code) {
1620 default:
1621 diag("asmins: unknown mov %d %P", mo->code, p);
1622 break;
1623
1624 case 0: /* lit */
1625 for(z=0; t[z]!=E; z++)
1626 *andptr++ = t[z];
1627 break;
1628
1629 case 1: /* r,m */
1630 *andptr++ = t[0];
1631 asmando(&p->to, t[1]);
1632 break;
1633
1634 case 2: /* m,r */
1635 *andptr++ = t[0];
1636 asmando(&p->from, t[1]);
1637 break;
1638
1639 case 3: /* r,m - 2op */
1640 *andptr++ = t[0];
1641 *andptr++ = t[1];
1642 asmando(&p->to, t[2]);
1643 rexflag |= regrex[p->from.type] & (Rxr|0x40);
1644 break;
1645
1646 case 4: /* m,r - 2op */
1647 *andptr++ = t[0];
1648 *andptr++ = t[1];
1649 asmando(&p->from, t[2]);
1650 rexflag |= regrex[p->to.type] & (Rxr|0x40);
1651 break;
1652
1653 case 5: /* load full pointer, trash heap */
1654 if(t[0])
1655 *andptr++ = t[0];
1656 switch(p->to.index) {
1657 default:
1658 goto bad;
1659 case D_DS:
1660 *andptr++ = 0xc5;
1661 break;
1662 case D_SS:
1663 *andptr++ = 0x0f;
1664 *andptr++ = 0xb2;
1665 break;
1666 case D_ES:
1667 *andptr++ = 0xc4;
1668 break;
1669 case D_FS:
1670 *andptr++ = 0x0f;
1671 *andptr++ = 0xb4;
1672 break;
1673 case D_GS:
1674 *andptr++ = 0x0f;
1675 *andptr++ = 0xb5;
1676 break;
1677 }
1678 asmand(&p->from, &p->to);
1679 break;
1680
1681 case 6: /* double shift */
1682 if(t[0] == Pw){
1683 if(p->mode != 64)
1684 diag("asmins: illegal 64: %P", p);
1685 rexflag |= Pw;
1686 t++;
1687 }else if(t[0] == Pe){
1688 *andptr++ = Pe;
1689 t++;
1690 }
1691 z = p->from.type;
1692 switch(z) {
1693 default:
1694 goto bad;
1695 case D_CONST:
1696 *andptr++ = 0x0f;
1697 *andptr++ = t[0];
1698 asmandsz(&p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0);
1699 *andptr++ = p->from.offset;
1700 break;
1701 case D_CL:
1702 case D_CX:
1703 *andptr++ = 0x0f;
1704 *andptr++ = t[1];
1705 asmandsz(&p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0);
1706 break;
1707 }
1708 break;
1709 }
1710 }
1711
1712 void
1713 asmins(Prog *p)
1714 {
1715 int n, np, c;
1716 Reloc *r;
1717
1718 rexflag = 0;
1719 andptr = and;
1720 asmode = p->mode;
1721 doasm(p);
1722 if(rexflag){
1723 /*
1724 * as befits the whole approach of the architecture,
1725 * the rex prefix must appear before the first opcode byte
1726 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
1727 * before the 0f opcode escape!), or it might be ignored.
1728 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
1729 */
1730 if(p->mode != 64)
1731 diag("asmins: illegal in mode %d: %P", p->mode, p);
1732 n = andptr - and;
1733 for(np = 0; np < n; np++) {
1734 c = and[np];
1735 if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
1736 break;
1737 }
1738 for(r=cursym->r+cursym->nr; r-- > cursym->r; ) {
1739 if(r->off < p->pc)
1740 break;
1741 r->off++;
1742 }
1743 memmove(and+np+1, and+np, n-np);
1744 and[np] = 0x40 | rexflag;
1745 andptr++;
1746 }
1747 }