...
Run Format

Text file src/math/big/arith_ppc64x.s

Documentation: math/big

     1	// Copyright 2013 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// +build !math_big_pure_go,ppc64 !math_big_pure_go,ppc64le
     6	
     7	#include "textflag.h"
     8	
     9	// This file provides fast assembly versions for the elementary
    10	// arithmetic operations on vectors implemented in arith.go.
    11	
    12	// func mulWW(x, y Word) (z1, z0 Word)
    13	TEXT ·mulWW(SB), NOSPLIT, $0
    14		MOVD   x+0(FP), R4
    15		MOVD   y+8(FP), R5
    16		MULHDU R4, R5, R6
    17		MULLD  R4, R5, R7
    18		MOVD   R6, z1+16(FP)
    19		MOVD   R7, z0+24(FP)
    20		RET
    21	
    22	// func addVV(z, y, y []Word) (c Word)
    23	// z[i] = x[i] + y[i] for all i, carrying
    24	TEXT ·addVV(SB), NOSPLIT, $0
    25		MOVD  z_len+8(FP), R7
    26		MOVD  x+24(FP), R8
    27		MOVD  y+48(FP), R9
    28		MOVD  z+0(FP), R10
    29	
    30		MOVD  R0, R4
    31		MOVD  R0, R6  // R6 will be the address index
    32		ADDC R4, R4   // clear CA
    33		MOVD  R7, CTR
    34	
    35		CMP   R0, R7
    36		BEQ   done
    37	
    38	loop:
    39		MOVD  (R8)(R6), R11   // x[i]
    40		MOVD  (R9)(R6), R12   // y[i]
    41		ADDE  R12, R11, R15   // x[i] + y[i] + CA
    42		MOVD  R15, (R10)(R6)  // z[i]
    43	
    44		ADD $8, R6
    45		BC  16, 0, loop	// bdnz
    46	
    47	done:
    48		ADDZE R4
    49		MOVD  R4, c+72(FP)
    50		RET
    51	
    52	// func subVV(z, x, y []Word) (c Word)
    53	// z[i] = x[i] - y[i] for all i, carrying
    54	TEXT ·subVV(SB), NOSPLIT, $0
    55		MOVD z_len+8(FP), R7
    56		MOVD x+24(FP), R8
    57		MOVD y+48(FP), R9
    58		MOVD z+0(FP), R10
    59	
    60		MOVD  R0, R4  // c = 0
    61		MOVD  R0, R6
    62		SUBC R0, R0  // clear CA
    63		MOVD  R7, CTR
    64	
    65		CMP R0, R7
    66		BEQ  sublend
    67	
    68	// amd64 saves and restores CF, but I believe they only have to do that because all of
    69	// their math operations clobber it - we should just be able to recover it at the end.
    70	subloop:
    71		MOVD  (R8)(R6), R11 // x[i]
    72		MOVD  (R9)(R6), R12 // y[i]
    73	
    74		SUBE R12, R11, R15
    75		MOVD R15, (R10)(R6)
    76	
    77		ADD $8, R6
    78		BC  16, 0, subloop  // bdnz
    79	
    80	sublend:
    81	
    82		ADDZE R4
    83		XOR   $1, R4
    84		MOVD  R4, c+72(FP)
    85		RET
    86	
    87	TEXT ·addVW(SB), NOSPLIT, $0
    88		BR ·addVW_g(SB)
    89	
    90	TEXT ·subVW(SB), NOSPLIT, $0
    91		BR ·subVW_g(SB)
    92	
    93	TEXT ·shlVU(SB), NOSPLIT, $0
    94		BR ·shlVU_g(SB)
    95	
    96	TEXT ·shrVU(SB), NOSPLIT, $0
    97		BR ·shrVU_g(SB)
    98	
    99	// func mulAddVWW(z, x []Word, y, r Word) (c Word)
   100	TEXT ·mulAddVWW(SB), NOSPLIT, $0
   101		MOVD z+0(FP), R10	// R10 = z[]
   102		MOVD x+24(FP), R8	// R8 = x[]
   103		MOVD y+48(FP), R9	// R9 = y
   104		MOVD r+56(FP), R4	// R4 = r = c
   105		MOVD z_len+8(FP), R11	// R11 = z_len
   106	
   107		MOVD R0, R3		// R3 will be the index register
   108		CMP  R0, R11
   109		MOVD R11, CTR		// Initialize loop counter
   110		BEQ  done
   111	
   112	loop:
   113		MOVD   (R8)(R3), R20	// x[i]
   114		MULLD  R9, R20, R6	// R6 = z0 = Low-order(x[i]*y)
   115		MULHDU R9, R20, R7	// R7 = z1 = High-order(x[i]*y)
   116		ADDC   R4, R6		// Compute sum for z1 and z0
   117		ADDZE  R7
   118		MOVD   R6, (R10)(R3)	// z[i]
   119		MOVD   R7, R4		// c
   120		ADD    $8, R3
   121		BC  16, 0, loop		// bdnz
   122	
   123	done:
   124		MOVD R4, c+64(FP)
   125		RET
   126	
   127	// func addMulVVW(z, x []Word, y Word) (c Word)
   128	TEXT ·addMulVVW(SB), NOSPLIT, $0
   129		MOVD z+0(FP), R10	// R10 = z[]
   130		MOVD x+24(FP), R8	// R8 = x[]
   131		MOVD y+48(FP), R9	// R9 = y
   132		MOVD z_len+8(FP), R22	// R22 = z_len
   133	
   134		MOVD R0, R3		// R3 will be the index register
   135		CMP  R0, R22
   136		MOVD R0, R4		// R4 = c = 0
   137		MOVD R22, CTR		// Initialize loop counter
   138		BEQ  done
   139	
   140	loop:
   141		MOVD  (R8)(R3), R20	// Load x[i]
   142		MOVD  (R10)(R3), R21	// Load z[i]
   143		MULLD  R9, R20, R6	// R6 = Low-order(x[i]*y)
   144		MULHDU R9, R20, R7	// R7 = High-order(x[i]*y)
   145		ADDC   R21, R6		// R6 = z0
   146		ADDZE  R7		// R7 = z1
   147		ADDC   R4, R6		// R6 = z0 + c + 0
   148		ADDZE  R7, R4           // c += z1
   149		MOVD   R6, (R10)(R3)	// Store z[i]
   150		ADD    $8, R3
   151		BC  16, 0, loop		// bdnz
   152	
   153	done:
   154		MOVD R4, c+56(FP)
   155		RET
   156	
   157	// func divWW(x1, x0, y Word) (q, r Word)
   158	TEXT ·divWW(SB), NOSPLIT, $0
   159		MOVD x1+0(FP), R4
   160		MOVD x0+8(FP), R5
   161		MOVD y+16(FP), R6
   162	
   163		CMPU R4, R6
   164		BGE  divbigger
   165	
   166		// from the programmer's note in ch. 3 of the ISA manual, p.74
   167		DIVDEU R6, R4, R3
   168		DIVDU  R6, R5, R7
   169		MULLD  R6, R3, R8
   170		MULLD  R6, R7, R20
   171		SUB    R20, R5, R10
   172		ADD    R7, R3, R3
   173		SUB    R8, R10, R4
   174		CMPU   R4, R10
   175		BLT    adjust
   176		CMPU   R4, R6
   177		BLT    end
   178	
   179	adjust:
   180		MOVD $1, R21
   181		ADD  R21, R3, R3
   182		SUB  R6, R4, R4
   183	
   184	end:
   185		MOVD R3, q+24(FP)
   186		MOVD R4, r+32(FP)
   187	
   188		RET
   189	
   190	divbigger:
   191		MOVD $-1, R7
   192		MOVD R7, q+24(FP)
   193		MOVD R7, r+32(FP)
   194		RET
   195	
   196	TEXT ·divWVW(SB), NOSPLIT, $0
   197		BR ·divWVW_g(SB)

View as plain text