The Go Programming Language

Text file src/pkg/big/arith_386.s

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// This file provides fast assembly versions for the elementary
     6	// arithmetic operations on vectors implemented in arith.go.
     7	
     8	// func mulWW(x, y Word) (z1, z0 Word)
     9	TEXT ·mulWW(SB),7,$0
    10		MOVL x+0(FP), AX
    11		MULL y+4(FP)
    12		MOVL DX, z1+8(FP)
    13		MOVL AX, z0+12(FP)
    14		RET
    15	
    16	
    17	// func divWW(x1, x0, y Word) (q, r Word)
    18	TEXT ·divWW(SB),7,$0
    19		MOVL x1+0(FP), DX
    20		MOVL x0+4(FP), AX
    21		DIVL y+8(FP)
    22		MOVL AX, q+12(FP)
    23		MOVL DX, r+16(FP)
    24		RET
    25	
    26	
    27	// func addVV(z, x, y []Word) (c Word)
    28	TEXT ·addVV(SB),7,$0
    29		MOVL z+0(FP), DI
    30		MOVL x+12(FP), SI
    31		MOVL y+24(FP), CX
    32		MOVL n+4(FP), BP
    33		MOVL $0, BX		// i = 0
    34		MOVL $0, DX		// c = 0
    35		JMP E1
    36	
    37	L1:	MOVL (SI)(BX*4), AX
    38		RCRL $1, DX
    39		ADCL (CX)(BX*4), AX
    40		RCLL $1, DX
    41		MOVL AX, (DI)(BX*4)
    42		ADDL $1, BX		// i++
    43	
    44	E1:	CMPL BX, BP		// i < n
    45		JL L1
    46	
    47		MOVL DX, c+36(FP)
    48		RET
    49	
    50	
    51	// func subVV(z, x, y []Word) (c Word)
    52	// (same as addVV except for SBBL instead of ADCL and label names)
    53	TEXT ·subVV(SB),7,$0
    54		MOVL z+0(FP), DI
    55		MOVL x+12(FP), SI
    56		MOVL y+24(FP), CX
    57		MOVL n+4(FP), BP
    58		MOVL $0, BX		// i = 0
    59		MOVL $0, DX		// c = 0
    60		JMP E2
    61	
    62	L2:	MOVL (SI)(BX*4), AX
    63		RCRL $1, DX
    64		SBBL (CX)(BX*4), AX
    65		RCLL $1, DX
    66		MOVL AX, (DI)(BX*4)
    67		ADDL $1, BX		// i++
    68	
    69	E2:	CMPL BX, BP		// i < n
    70		JL L2
    71	
    72		MOVL DX, c+36(FP)
    73		RET
    74	
    75	
    76	// func addVW(z, x []Word, y Word) (c Word)
    77	TEXT ·addVW(SB),7,$0
    78		MOVL z+0(FP), DI
    79		MOVL x+12(FP), SI
    80		MOVL y+24(FP), AX	// c = y
    81		MOVL n+4(FP), BP
    82		MOVL $0, BX		// i = 0
    83		JMP E3
    84	
    85	L3:	ADDL (SI)(BX*4), AX
    86		MOVL AX, (DI)(BX*4)
    87		RCLL $1, AX
    88		ANDL $1, AX
    89		ADDL $1, BX		// i++
    90	
    91	E3:	CMPL BX, BP		// i < n
    92		JL L3
    93	
    94		MOVL AX, c+28(FP)
    95		RET
    96	
    97	
    98	// func subVW(z, x []Word, y Word) (c Word)
    99	TEXT ·subVW(SB),7,$0
   100		MOVL z+0(FP), DI
   101		MOVL x+12(FP), SI
   102		MOVL y+24(FP), AX	// c = y
   103		MOVL n+4(FP), BP
   104		MOVL $0, BX		// i = 0
   105		JMP E4
   106	
   107	L4:	MOVL (SI)(BX*4), DX	// TODO(gri) is there a reverse SUBL?
   108		SUBL AX, DX
   109		MOVL DX, (DI)(BX*4)
   110		RCLL $1, AX
   111		ANDL $1, AX
   112		ADDL $1, BX		// i++
   113	
   114	E4:	CMPL BX, BP		// i < n
   115		JL L4
   116	
   117		MOVL AX, c+28(FP)
   118		RET
   119	
   120	
   121	// func shlVU(z, x []Word, s uint) (c Word)
   122	TEXT ·shlVU(SB),7,$0
   123		MOVL n+4(FP), BX	// i = n
   124		SUBL $1, BX		// i--
   125		JL X8b			// i < 0	(n <= 0)
   126	
   127		// n > 0
   128		MOVL z+0(FP), DI
   129		MOVL x+12(FP), SI
   130		MOVL s+24(FP), CX
   131		MOVL (SI)(BX*4), AX	// w1 = x[n-1]
   132		MOVL $0, DX
   133		SHLL CX, DX:AX		// w1>>ŝ
   134		MOVL DX, c+28(FP)
   135	
   136		CMPL BX, $0
   137		JLE X8a			// i <= 0
   138	
   139		// i > 0
   140	L8:	MOVL AX, DX		// w = w1
   141		MOVL -4(SI)(BX*4), AX	// w1 = x[i-1]
   142		SHLL CX, DX:AX		// w<<s | w1>>ŝ
   143		MOVL DX, (DI)(BX*4)	// z[i] = w<<s | w1>>ŝ
   144		SUBL $1, BX		// i--
   145		JG L8			// i > 0
   146	
   147		// i <= 0
   148	X8a:	SHLL CX, AX		// w1<<s
   149		MOVL AX, (DI)		// z[0] = w1<<s
   150		RET
   151	
   152	X8b:	MOVL $0, c+28(FP)
   153		RET
   154	
   155	
   156	// func shrVU(z, x []Word, s uint) (c Word)
   157	TEXT ·shrVU(SB),7,$0
   158		MOVL n+4(FP), BP
   159		SUBL $1, BP		// n--
   160		JL X9b			// n < 0	(n <= 0)
   161	
   162		// n > 0
   163		MOVL z+0(FP), DI
   164		MOVL x+12(FP), SI
   165		MOVL s+24(FP), CX
   166		MOVL (SI), AX		// w1 = x[0]
   167		MOVL $0, DX
   168		SHRL CX, DX:AX		// w1<<ŝ
   169		MOVL DX, c+28(FP)
   170	
   171		MOVL $0, BX		// i = 0
   172		JMP E9
   173	
   174		// i < n-1
   175	L9:	MOVL AX, DX		// w = w1
   176		MOVL 4(SI)(BX*4), AX	// w1 = x[i+1]
   177		SHRL CX, DX:AX		// w>>s | w1<<ŝ
   178		MOVL DX, (DI)(BX*4)	// z[i] = w>>s | w1<<ŝ
   179		ADDL $1, BX		// i++
   180		
   181	E9:	CMPL BX, BP
   182		JL L9			// i < n-1
   183	
   184		// i >= n-1
   185	X9a:	SHRL CX, AX		// w1>>s
   186		MOVL AX, (DI)(BP*4)	// z[n-1] = w1>>s
   187		RET
   188	
   189	X9b:	MOVL $0, c+28(FP)
   190		RET
   191	
   192	
   193	// func mulAddVWW(z, x []Word, y, r Word) (c Word)
   194	TEXT ·mulAddVWW(SB),7,$0
   195		MOVL z+0(FP), DI
   196		MOVL x+12(FP), SI
   197		MOVL y+24(FP), BP
   198		MOVL r+28(FP), CX	// c = r
   199		MOVL n+4(FP), BX
   200		LEAL (DI)(BX*4), DI
   201		LEAL (SI)(BX*4), SI
   202		NEGL BX			// i = -n
   203		JMP E5
   204	
   205	L5:	MOVL (SI)(BX*4), AX
   206		MULL BP
   207		ADDL CX, AX
   208		ADCL $0, DX
   209		MOVL AX, (DI)(BX*4)
   210		MOVL DX, CX
   211		ADDL $1, BX		// i++
   212	
   213	E5:	CMPL BX, $0		// i < 0
   214		JL L5
   215	
   216		MOVL CX, c+32(FP)
   217		RET
   218	
   219	
   220	// func addMulVVW(z, x []Word, y Word) (c Word)
   221	TEXT ·addMulVVW(SB),7,$0
   222		MOVL z+0(FP), DI
   223		MOVL x+12(FP), SI
   224		MOVL y+24(FP), BP
   225		MOVL n+4(FP), BX
   226		LEAL (DI)(BX*4), DI
   227		LEAL (SI)(BX*4), SI
   228		NEGL BX			// i = -n
   229		MOVL $0, CX		// c = 0
   230		JMP E6
   231	
   232	L6:	MOVL (SI)(BX*4), AX
   233		MULL BP
   234		ADDL CX, AX
   235		ADCL $0, DX
   236		ADDL AX, (DI)(BX*4)
   237		ADCL $0, DX
   238		MOVL DX, CX
   239		ADDL $1, BX		// i++
   240	
   241	E6:	CMPL BX, $0		// i < 0
   242		JL L6
   243	
   244		MOVL CX, c+28(FP)
   245		RET
   246	
   247	
   248	// divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
   249	TEXT ·divWVW(SB),7,$0
   250		MOVL z+0(FP), DI
   251		MOVL xn+12(FP), DX	// r = xn
   252		MOVL x+16(FP), SI
   253		MOVL y+28(FP), CX
   254		MOVL n+4(FP), BX	// i = n
   255		JMP E7
   256	
   257	L7:	MOVL (SI)(BX*4), AX
   258		DIVL CX
   259		MOVL AX, (DI)(BX*4)
   260	
   261	E7:	SUBL $1, BX		// i--
   262		JGE L7			// i >= 0
   263	
   264		MOVL DX, r+32(FP)
   265		RET

release.r60.3. Except as noted, this content is licensed under a Creative Commons Attribution 3.0 License.