1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // This file provides fast assembly versions for the elementary
6 // arithmetic operations on vectors implemented in arith.go.
7
8 // func mulWW(x, y Word) (z1, z0 Word)
9 TEXT ·mulWW(SB),7,$0
10 MOVL x+0(FP), AX
11 MULL y+4(FP)
12 MOVL DX, z1+8(FP)
13 MOVL AX, z0+12(FP)
14 RET
15
16
17 // func divWW(x1, x0, y Word) (q, r Word)
18 TEXT ·divWW(SB),7,$0
19 MOVL x1+0(FP), DX
20 MOVL x0+4(FP), AX
21 DIVL y+8(FP)
22 MOVL AX, q+12(FP)
23 MOVL DX, r+16(FP)
24 RET
25
26
27 // func addVV(z, x, y []Word) (c Word)
28 TEXT ·addVV(SB),7,$0
29 MOVL z+0(FP), DI
30 MOVL x+12(FP), SI
31 MOVL y+24(FP), CX
32 MOVL n+4(FP), BP
33 MOVL $0, BX // i = 0
34 MOVL $0, DX // c = 0
35 JMP E1
36
37 L1: MOVL (SI)(BX*4), AX
38 RCRL $1, DX
39 ADCL (CX)(BX*4), AX
40 RCLL $1, DX
41 MOVL AX, (DI)(BX*4)
42 ADDL $1, BX // i++
43
44 E1: CMPL BX, BP // i < n
45 JL L1
46
47 MOVL DX, c+36(FP)
48 RET
49
50
51 // func subVV(z, x, y []Word) (c Word)
52 // (same as addVV except for SBBL instead of ADCL and label names)
53 TEXT ·subVV(SB),7,$0
54 MOVL z+0(FP), DI
55 MOVL x+12(FP), SI
56 MOVL y+24(FP), CX
57 MOVL n+4(FP), BP
58 MOVL $0, BX // i = 0
59 MOVL $0, DX // c = 0
60 JMP E2
61
62 L2: MOVL (SI)(BX*4), AX
63 RCRL $1, DX
64 SBBL (CX)(BX*4), AX
65 RCLL $1, DX
66 MOVL AX, (DI)(BX*4)
67 ADDL $1, BX // i++
68
69 E2: CMPL BX, BP // i < n
70 JL L2
71
72 MOVL DX, c+36(FP)
73 RET
74
75
76 // func addVW(z, x []Word, y Word) (c Word)
77 TEXT ·addVW(SB),7,$0
78 MOVL z+0(FP), DI
79 MOVL x+12(FP), SI
80 MOVL y+24(FP), AX // c = y
81 MOVL n+4(FP), BP
82 MOVL $0, BX // i = 0
83 JMP E3
84
85 L3: ADDL (SI)(BX*4), AX
86 MOVL AX, (DI)(BX*4)
87 RCLL $1, AX
88 ANDL $1, AX
89 ADDL $1, BX // i++
90
91 E3: CMPL BX, BP // i < n
92 JL L3
93
94 MOVL AX, c+28(FP)
95 RET
96
97
98 // func subVW(z, x []Word, y Word) (c Word)
99 TEXT ·subVW(SB),7,$0
100 MOVL z+0(FP), DI
101 MOVL x+12(FP), SI
102 MOVL y+24(FP), AX // c = y
103 MOVL n+4(FP), BP
104 MOVL $0, BX // i = 0
105 JMP E4
106
107 L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL?
108 SUBL AX, DX
109 MOVL DX, (DI)(BX*4)
110 RCLL $1, AX
111 ANDL $1, AX
112 ADDL $1, BX // i++
113
114 E4: CMPL BX, BP // i < n
115 JL L4
116
117 MOVL AX, c+28(FP)
118 RET
119
120
121 // func shlVU(z, x []Word, s uint) (c Word)
122 TEXT ·shlVU(SB),7,$0
123 MOVL n+4(FP), BX // i = n
124 SUBL $1, BX // i--
125 JL X8b // i < 0 (n <= 0)
126
127 // n > 0
128 MOVL z+0(FP), DI
129 MOVL x+12(FP), SI
130 MOVL s+24(FP), CX
131 MOVL (SI)(BX*4), AX // w1 = x[n-1]
132 MOVL $0, DX
133 SHLL CX, DX:AX // w1>>ŝ
134 MOVL DX, c+28(FP)
135
136 CMPL BX, $0
137 JLE X8a // i <= 0
138
139 // i > 0
140 L8: MOVL AX, DX // w = w1
141 MOVL -4(SI)(BX*4), AX // w1 = x[i-1]
142 SHLL CX, DX:AX // w<<s | w1>>ŝ
143 MOVL DX, (DI)(BX*4) // z[i] = w<<s | w1>>ŝ
144 SUBL $1, BX // i--
145 JG L8 // i > 0
146
147 // i <= 0
148 X8a: SHLL CX, AX // w1<<s
149 MOVL AX, (DI) // z[0] = w1<<s
150 RET
151
152 X8b: MOVL $0, c+28(FP)
153 RET
154
155
156 // func shrVU(z, x []Word, s uint) (c Word)
157 TEXT ·shrVU(SB),7,$0
158 MOVL n+4(FP), BP
159 SUBL $1, BP // n--
160 JL X9b // n < 0 (n <= 0)
161
162 // n > 0
163 MOVL z+0(FP), DI
164 MOVL x+12(FP), SI
165 MOVL s+24(FP), CX
166 MOVL (SI), AX // w1 = x[0]
167 MOVL $0, DX
168 SHRL CX, DX:AX // w1<<ŝ
169 MOVL DX, c+28(FP)
170
171 MOVL $0, BX // i = 0
172 JMP E9
173
174 // i < n-1
175 L9: MOVL AX, DX // w = w1
176 MOVL 4(SI)(BX*4), AX // w1 = x[i+1]
177 SHRL CX, DX:AX // w>>s | w1<<ŝ
178 MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ
179 ADDL $1, BX // i++
180
181 E9: CMPL BX, BP
182 JL L9 // i < n-1
183
184 // i >= n-1
185 X9a: SHRL CX, AX // w1>>s
186 MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s
187 RET
188
189 X9b: MOVL $0, c+28(FP)
190 RET
191
192
193 // func mulAddVWW(z, x []Word, y, r Word) (c Word)
194 TEXT ·mulAddVWW(SB),7,$0
195 MOVL z+0(FP), DI
196 MOVL x+12(FP), SI
197 MOVL y+24(FP), BP
198 MOVL r+28(FP), CX // c = r
199 MOVL n+4(FP), BX
200 LEAL (DI)(BX*4), DI
201 LEAL (SI)(BX*4), SI
202 NEGL BX // i = -n
203 JMP E5
204
205 L5: MOVL (SI)(BX*4), AX
206 MULL BP
207 ADDL CX, AX
208 ADCL $0, DX
209 MOVL AX, (DI)(BX*4)
210 MOVL DX, CX
211 ADDL $1, BX // i++
212
213 E5: CMPL BX, $0 // i < 0
214 JL L5
215
216 MOVL CX, c+32(FP)
217 RET
218
219
220 // func addMulVVW(z, x []Word, y Word) (c Word)
221 TEXT ·addMulVVW(SB),7,$0
222 MOVL z+0(FP), DI
223 MOVL x+12(FP), SI
224 MOVL y+24(FP), BP
225 MOVL n+4(FP), BX
226 LEAL (DI)(BX*4), DI
227 LEAL (SI)(BX*4), SI
228 NEGL BX // i = -n
229 MOVL $0, CX // c = 0
230 JMP E6
231
232 L6: MOVL (SI)(BX*4), AX
233 MULL BP
234 ADDL CX, AX
235 ADCL $0, DX
236 ADDL AX, (DI)(BX*4)
237 ADCL $0, DX
238 MOVL DX, CX
239 ADDL $1, BX // i++
240
241 E6: CMPL BX, $0 // i < 0
242 JL L6
243
244 MOVL CX, c+28(FP)
245 RET
246
247
248 // divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
249 TEXT ·divWVW(SB),7,$0
250 MOVL z+0(FP), DI
251 MOVL xn+12(FP), DX // r = xn
252 MOVL x+16(FP), SI
253 MOVL y+28(FP), CX
254 MOVL n+4(FP), BX // i = n
255 JMP E7
256
257 L7: MOVL (SI)(BX*4), AX
258 DIVL CX
259 MOVL AX, (DI)(BX*4)
260
261 E7: SUBL $1, BX // i--
262 JGE L7 // i >= 0
263
264 MOVL DX, r+32(FP)
265 RET