The Go Programming Language

Text file src/pkg/big/arith_amd64.s

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// This file provides fast assembly versions for the elementary
     6	// arithmetic operations on vectors implemented in arith.go.
     7	
     8	// TODO(gri) - experiment with unrolled loops for faster execution
     9	
    10	// func mulWW(x, y Word) (z1, z0 Word)
    11	TEXT ·mulWW(SB),7,$0
    12		MOVQ x+0(FP), AX
    13		MULQ y+8(FP)
    14		MOVQ DX, z1+16(FP)
    15		MOVQ AX, z0+24(FP)
    16		RET
    17	
    18	
    19	// func divWW(x1, x0, y Word) (q, r Word)
    20	TEXT ·divWW(SB),7,$0
    21		MOVQ x1+0(FP), DX
    22		MOVQ x0+8(FP), AX
    23		DIVQ y+16(FP)
    24		MOVQ AX, q+24(FP)
    25		MOVQ DX, r+32(FP)
    26		RET
    27	
    28	
    29	// func addVV(z, x, y []Word) (c Word)
    30	TEXT ·addVV(SB),7,$0
    31		MOVQ z+0(FP), R10
    32		MOVQ x+16(FP), R8
    33		MOVQ y+32(FP), R9
    34		MOVL n+8(FP), R11
    35		MOVQ $0, BX		// i = 0
    36		MOVQ $0, DX		// c = 0
    37		JMP E1
    38	
    39	L1:	MOVQ (R8)(BX*8), AX
    40		RCRQ $1, DX
    41		ADCQ (R9)(BX*8), AX
    42		RCLQ $1, DX
    43		MOVQ AX, (R10)(BX*8)
    44		ADDL $1, BX		// i++
    45	
    46	E1:	CMPQ BX, R11		// i < n
    47		JL L1
    48	
    49		MOVQ DX, c+48(FP)
    50		RET
    51	
    52	
    53	// func subVV(z, x, y []Word) (c Word)
    54	// (same as addVV_s except for SBBQ instead of ADCQ and label names)
    55	TEXT ·subVV(SB),7,$0
    56		MOVQ z+0(FP), R10
    57		MOVQ x+16(FP), R8
    58		MOVQ y+32(FP), R9
    59		MOVL n+8(FP), R11
    60		MOVQ $0, BX		// i = 0
    61		MOVQ $0, DX		// c = 0
    62		JMP E2
    63	
    64	L2:	MOVQ (R8)(BX*8), AX
    65		RCRQ $1, DX
    66		SBBQ (R9)(BX*8), AX
    67		RCLQ $1, DX
    68		MOVQ AX, (R10)(BX*8)
    69		ADDL $1, BX		// i++
    70	
    71	E2:	CMPQ BX, R11		// i < n
    72		JL L2
    73	
    74		MOVQ DX, c+48(FP)
    75		RET
    76	
    77	
    78	// func addVW(z, x []Word, y Word) (c Word)
    79	TEXT ·addVW(SB),7,$0
    80		MOVQ z+0(FP), R10
    81		MOVQ x+16(FP), R8
    82		MOVQ y+32(FP), AX	// c = y
    83		MOVL n+8(FP), R11
    84		MOVQ $0, BX		// i = 0
    85		JMP E3
    86	
    87	L3:	ADDQ (R8)(BX*8), AX
    88		MOVQ AX, (R10)(BX*8)
    89		RCLQ $1, AX
    90		ANDQ $1, AX
    91		ADDL $1, BX		// i++
    92	
    93	E3:	CMPQ BX, R11		// i < n
    94		JL L3
    95	
    96		MOVQ AX, c+40(FP)
    97		RET
    98	
    99	
   100	// func subVW(z, x []Word, y Word) (c Word)
   101	TEXT ·subVW(SB),7,$0
   102		MOVQ z+0(FP), R10
   103		MOVQ x+16(FP), R8
   104		MOVQ y+32(FP), AX	// c = y
   105		MOVL n+8(FP), R11
   106		MOVQ $0, BX		// i = 0
   107		JMP E4
   108	
   109	L4:	MOVQ (R8)(BX*8), DX	// TODO(gri) is there a reverse SUBQ?
   110		SUBQ AX, DX
   111		MOVQ DX, (R10)(BX*8)
   112		RCLQ $1, AX
   113		ANDQ $1, AX
   114		ADDL $1, BX		// i++
   115	
   116	E4:	CMPQ BX, R11		// i < n
   117		JL L4
   118	
   119		MOVQ AX, c+40(FP)
   120		RET
   121	
   122	
   123	// func shlVU(z, x []Word, s uint) (c Word)
   124	TEXT ·shlVU(SB),7,$0
   125		MOVL n+8(FP), BX	// i = n
   126		SUBL $1, BX		// i--
   127		JL X8b			// i < 0	(n <= 0)
   128	
   129		// n > 0
   130		MOVQ z+0(FP), R10
   131		MOVQ x+16(FP), R8
   132		MOVL s+32(FP), CX
   133		MOVQ (R8)(BX*8), AX	// w1 = x[n-1]
   134		MOVQ $0, DX
   135		SHLQ CX, DX:AX		// w1>>ŝ
   136		MOVQ DX, c+40(FP)
   137	
   138		CMPL BX, $0
   139		JLE X8a			// i <= 0
   140	
   141		// i > 0
   142	L8:	MOVQ AX, DX		// w = w1
   143		MOVQ -8(R8)(BX*8), AX	// w1 = x[i-1]
   144		SHLQ CX, DX:AX		// w<<s | w1>>ŝ
   145		MOVQ DX, (R10)(BX*8)	// z[i] = w<<s | w1>>ŝ
   146		SUBL $1, BX		// i--
   147		JG L8			// i > 0
   148	
   149		// i <= 0
   150	X8a:	SHLQ CX, AX		// w1<<s
   151		MOVQ AX, (R10)		// z[0] = w1<<s
   152		RET
   153	
   154	X8b:	MOVQ $0, c+40(FP)
   155		RET
   156	
   157	
   158	// func shrVU(z, x []Word, s uint) (c Word)
   159	TEXT ·shrVU(SB),7,$0
   160		MOVL n+8(FP), R11
   161		SUBL $1, R11		// n--
   162		JL X9b			// n < 0	(n <= 0)
   163	
   164		// n > 0
   165		MOVQ z+0(FP), R10
   166		MOVQ x+16(FP), R8
   167		MOVL s+32(FP), CX
   168		MOVQ (R8), AX		// w1 = x[0]
   169		MOVQ $0, DX
   170		SHRQ CX, DX:AX		// w1<<ŝ
   171		MOVQ DX, c+40(FP)
   172	
   173		MOVQ $0, BX		// i = 0
   174		JMP E9
   175	
   176		// i < n-1
   177	L9:	MOVQ AX, DX		// w = w1
   178		MOVQ 8(R8)(BX*8), AX	// w1 = x[i+1]
   179		SHRQ CX, DX:AX		// w>>s | w1<<ŝ
   180		MOVQ DX, (R10)(BX*8)	// z[i] = w>>s | w1<<ŝ
   181		ADDL $1, BX		// i++
   182		
   183	E9:	CMPQ BX, R11
   184		JL L9			// i < n-1
   185	
   186		// i >= n-1
   187	X9a:	SHRQ CX, AX		// w1>>s
   188		MOVQ AX, (R10)(R11*8)	// z[n-1] = w1>>s
   189		RET
   190	
   191	X9b:	MOVQ $0, c+40(FP)
   192		RET
   193	
   194	
   195	// func mulAddVWW(z, x []Word, y, r Word) (c Word)
   196	TEXT ·mulAddVWW(SB),7,$0
   197		MOVQ z+0(FP), R10
   198		MOVQ x+16(FP), R8
   199		MOVQ y+32(FP), R9
   200		MOVQ r+40(FP), CX	// c = r
   201		MOVL n+8(FP), R11
   202		MOVQ $0, BX		// i = 0
   203		JMP E5
   204	
   205	L5:	MOVQ (R8)(BX*8), AX
   206		MULQ R9
   207		ADDQ CX, AX
   208		ADCQ $0, DX
   209		MOVQ AX, (R10)(BX*8)
   210		MOVQ DX, CX
   211		ADDL $1, BX		// i++
   212	
   213	E5:	CMPQ BX, R11		// i < n
   214		JL L5
   215	
   216		MOVQ CX, c+48(FP)
   217		RET
   218	
   219	
   220	// func addMulVVW(z, x []Word, y Word) (c Word)
   221	TEXT ·addMulVVW(SB),7,$0
   222		MOVQ z+0(FP), R10
   223		MOVQ x+16(FP), R8
   224		MOVQ y+32(FP), R9
   225		MOVL n+8(FP), R11
   226		MOVQ $0, BX		// i = 0
   227		MOVQ $0, CX		// c = 0
   228		JMP E6
   229	
   230	L6:	MOVQ (R8)(BX*8), AX
   231		MULQ R9
   232		ADDQ CX, AX
   233		ADCQ $0, DX
   234		ADDQ AX, (R10)(BX*8)
   235		ADCQ $0, DX
   236		MOVQ DX, CX
   237		ADDL $1, BX		// i++
   238	
   239	E6:	CMPQ BX, R11		// i < n
   240		JL L6
   241	
   242		MOVQ CX, c+40(FP)
   243		RET
   244	
   245	
   246	// divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
   247	TEXT ·divWVW(SB),7,$0
   248		MOVQ z+0(FP), R10
   249		MOVQ xn+16(FP), DX	// r = xn
   250		MOVQ x+24(FP), R8
   251		MOVQ y+40(FP), R9
   252		MOVL n+8(FP), BX	// i = n
   253		JMP E7
   254	
   255	L7:	MOVQ (R8)(BX*8), AX
   256		DIVQ R9
   257		MOVQ AX, (R10)(BX*8)
   258	
   259	E7:	SUBL $1, BX		// i--
   260		JGE L7			// i >= 0
   261	
   262		MOVQ DX, r+48(FP)
   263		RET

release.r60.3. Except as noted, this content is licensed under a Creative Commons Attribution 3.0 License.