...
Run Format

Text file src/crypto/sha512/sha512block_ppc64le.s

Documentation: crypto/sha512

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// This is a derived work from OpenSSL of SHA-2 using assembly optimizations. The
     6	// original code was written by Andy Polyakov <appro@openssl.org> and it's dual
     7	// licensed under OpenSSL and CRYPTOGAMS licenses depending on where you obtain
     8	// it. For further details see http://www.openssl.org/~appro/cryptogams/.
     9	
    10	#include "textflag.h"
    11	
    12	// SHA512 block routine. See sha512block.go for Go equivalent.
    13	//
    14	// The algorithm is detailed in FIPS 180-4:
    15	//
    16	//  http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    17	//
    18	// Wt = Mt; for 0 <= t <= 15
    19	// Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
    20	//
    21	// a = H0
    22	// b = H1
    23	// c = H2
    24	// d = H3
    25	// e = H4
    26	// f = H5
    27	// g = H6
    28	// h = H7
    29	//
    30	// for t = 0 to 79 {
    31	//    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    32	//    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    33	//    h = g
    34	//    g = f
    35	//    f = e
    36	//    e = d + T1
    37	//    d = c
    38	//    c = b
    39	//    b = a
    40	//    a = T1 + T2
    41	// }
    42	//
    43	// H0 = a + H0
    44	// H1 = b + H1
    45	// H2 = c + H2
    46	// H3 = d + H3
    47	// H4 = e + H4
    48	// H5 = f + H5
    49	// H6 = g + H6
    50	// H7 = h + H7
    51	
    52	#define CTX	R3
    53	#define INP	R4
    54	#define END	R5
    55	#define TBL	R6
    56	#define IDX	R7
    57	#define CNT	R8
    58	#define LEN	R9
    59	#define OFFLOAD	R11
    60	#define TEMP	R12
    61	
    62	#define HEX00	R0
    63	#define HEX10	R10
    64	#define HEX20	R25
    65	#define HEX30	R26
    66	#define HEX40	R27
    67	#define HEX50	R28
    68	#define HEX60	R29
    69	#define HEX70	R31
    70	
    71	// V0-V7 are A-H
    72	// V8-V23 are used for the message schedule
    73	#define KI	V24
    74	#define FUNC	V25
    75	#define S0	V26
    76	#define S1	V27
    77	#define s0	V28
    78	#define s1	V29
    79	#define LEMASK	V31	// Permutation control register for little endian
    80	
    81	// 2 copies of each Kt, to fill both doublewords of a vector register
    82	DATA  ·kcon+0x000(SB)/8, $0x428a2f98d728ae22
    83	DATA  ·kcon+0x008(SB)/8, $0x428a2f98d728ae22
    84	DATA  ·kcon+0x010(SB)/8, $0x7137449123ef65cd
    85	DATA  ·kcon+0x018(SB)/8, $0x7137449123ef65cd
    86	DATA  ·kcon+0x020(SB)/8, $0xb5c0fbcfec4d3b2f
    87	DATA  ·kcon+0x028(SB)/8, $0xb5c0fbcfec4d3b2f
    88	DATA  ·kcon+0x030(SB)/8, $0xe9b5dba58189dbbc
    89	DATA  ·kcon+0x038(SB)/8, $0xe9b5dba58189dbbc
    90	DATA  ·kcon+0x040(SB)/8, $0x3956c25bf348b538
    91	DATA  ·kcon+0x048(SB)/8, $0x3956c25bf348b538
    92	DATA  ·kcon+0x050(SB)/8, $0x59f111f1b605d019
    93	DATA  ·kcon+0x058(SB)/8, $0x59f111f1b605d019
    94	DATA  ·kcon+0x060(SB)/8, $0x923f82a4af194f9b
    95	DATA  ·kcon+0x068(SB)/8, $0x923f82a4af194f9b
    96	DATA  ·kcon+0x070(SB)/8, $0xab1c5ed5da6d8118
    97	DATA  ·kcon+0x078(SB)/8, $0xab1c5ed5da6d8118
    98	DATA  ·kcon+0x080(SB)/8, $0xd807aa98a3030242
    99	DATA  ·kcon+0x088(SB)/8, $0xd807aa98a3030242
   100	DATA  ·kcon+0x090(SB)/8, $0x12835b0145706fbe
   101	DATA  ·kcon+0x098(SB)/8, $0x12835b0145706fbe
   102	DATA  ·kcon+0x0A0(SB)/8, $0x243185be4ee4b28c
   103	DATA  ·kcon+0x0A8(SB)/8, $0x243185be4ee4b28c
   104	DATA  ·kcon+0x0B0(SB)/8, $0x550c7dc3d5ffb4e2
   105	DATA  ·kcon+0x0B8(SB)/8, $0x550c7dc3d5ffb4e2
   106	DATA  ·kcon+0x0C0(SB)/8, $0x72be5d74f27b896f
   107	DATA  ·kcon+0x0C8(SB)/8, $0x72be5d74f27b896f
   108	DATA  ·kcon+0x0D0(SB)/8, $0x80deb1fe3b1696b1
   109	DATA  ·kcon+0x0D8(SB)/8, $0x80deb1fe3b1696b1
   110	DATA  ·kcon+0x0E0(SB)/8, $0x9bdc06a725c71235
   111	DATA  ·kcon+0x0E8(SB)/8, $0x9bdc06a725c71235
   112	DATA  ·kcon+0x0F0(SB)/8, $0xc19bf174cf692694
   113	DATA  ·kcon+0x0F8(SB)/8, $0xc19bf174cf692694
   114	DATA  ·kcon+0x100(SB)/8, $0xe49b69c19ef14ad2
   115	DATA  ·kcon+0x108(SB)/8, $0xe49b69c19ef14ad2
   116	DATA  ·kcon+0x110(SB)/8, $0xefbe4786384f25e3
   117	DATA  ·kcon+0x118(SB)/8, $0xefbe4786384f25e3
   118	DATA  ·kcon+0x120(SB)/8, $0x0fc19dc68b8cd5b5
   119	DATA  ·kcon+0x128(SB)/8, $0x0fc19dc68b8cd5b5
   120	DATA  ·kcon+0x130(SB)/8, $0x240ca1cc77ac9c65
   121	DATA  ·kcon+0x138(SB)/8, $0x240ca1cc77ac9c65
   122	DATA  ·kcon+0x140(SB)/8, $0x2de92c6f592b0275
   123	DATA  ·kcon+0x148(SB)/8, $0x2de92c6f592b0275
   124	DATA  ·kcon+0x150(SB)/8, $0x4a7484aa6ea6e483
   125	DATA  ·kcon+0x158(SB)/8, $0x4a7484aa6ea6e483
   126	DATA  ·kcon+0x160(SB)/8, $0x5cb0a9dcbd41fbd4
   127	DATA  ·kcon+0x168(SB)/8, $0x5cb0a9dcbd41fbd4
   128	DATA  ·kcon+0x170(SB)/8, $0x76f988da831153b5
   129	DATA  ·kcon+0x178(SB)/8, $0x76f988da831153b5
   130	DATA  ·kcon+0x180(SB)/8, $0x983e5152ee66dfab
   131	DATA  ·kcon+0x188(SB)/8, $0x983e5152ee66dfab
   132	DATA  ·kcon+0x190(SB)/8, $0xa831c66d2db43210
   133	DATA  ·kcon+0x198(SB)/8, $0xa831c66d2db43210
   134	DATA  ·kcon+0x1A0(SB)/8, $0xb00327c898fb213f
   135	DATA  ·kcon+0x1A8(SB)/8, $0xb00327c898fb213f
   136	DATA  ·kcon+0x1B0(SB)/8, $0xbf597fc7beef0ee4
   137	DATA  ·kcon+0x1B8(SB)/8, $0xbf597fc7beef0ee4
   138	DATA  ·kcon+0x1C0(SB)/8, $0xc6e00bf33da88fc2
   139	DATA  ·kcon+0x1C8(SB)/8, $0xc6e00bf33da88fc2
   140	DATA  ·kcon+0x1D0(SB)/8, $0xd5a79147930aa725
   141	DATA  ·kcon+0x1D8(SB)/8, $0xd5a79147930aa725
   142	DATA  ·kcon+0x1E0(SB)/8, $0x06ca6351e003826f
   143	DATA  ·kcon+0x1E8(SB)/8, $0x06ca6351e003826f
   144	DATA  ·kcon+0x1F0(SB)/8, $0x142929670a0e6e70
   145	DATA  ·kcon+0x1F8(SB)/8, $0x142929670a0e6e70
   146	DATA  ·kcon+0x200(SB)/8, $0x27b70a8546d22ffc
   147	DATA  ·kcon+0x208(SB)/8, $0x27b70a8546d22ffc
   148	DATA  ·kcon+0x210(SB)/8, $0x2e1b21385c26c926
   149	DATA  ·kcon+0x218(SB)/8, $0x2e1b21385c26c926
   150	DATA  ·kcon+0x220(SB)/8, $0x4d2c6dfc5ac42aed
   151	DATA  ·kcon+0x228(SB)/8, $0x4d2c6dfc5ac42aed
   152	DATA  ·kcon+0x230(SB)/8, $0x53380d139d95b3df
   153	DATA  ·kcon+0x238(SB)/8, $0x53380d139d95b3df
   154	DATA  ·kcon+0x240(SB)/8, $0x650a73548baf63de
   155	DATA  ·kcon+0x248(SB)/8, $0x650a73548baf63de
   156	DATA  ·kcon+0x250(SB)/8, $0x766a0abb3c77b2a8
   157	DATA  ·kcon+0x258(SB)/8, $0x766a0abb3c77b2a8
   158	DATA  ·kcon+0x260(SB)/8, $0x81c2c92e47edaee6
   159	DATA  ·kcon+0x268(SB)/8, $0x81c2c92e47edaee6
   160	DATA  ·kcon+0x270(SB)/8, $0x92722c851482353b
   161	DATA  ·kcon+0x278(SB)/8, $0x92722c851482353b
   162	DATA  ·kcon+0x280(SB)/8, $0xa2bfe8a14cf10364
   163	DATA  ·kcon+0x288(SB)/8, $0xa2bfe8a14cf10364
   164	DATA  ·kcon+0x290(SB)/8, $0xa81a664bbc423001
   165	DATA  ·kcon+0x298(SB)/8, $0xa81a664bbc423001
   166	DATA  ·kcon+0x2A0(SB)/8, $0xc24b8b70d0f89791
   167	DATA  ·kcon+0x2A8(SB)/8, $0xc24b8b70d0f89791
   168	DATA  ·kcon+0x2B0(SB)/8, $0xc76c51a30654be30
   169	DATA  ·kcon+0x2B8(SB)/8, $0xc76c51a30654be30
   170	DATA  ·kcon+0x2C0(SB)/8, $0xd192e819d6ef5218
   171	DATA  ·kcon+0x2C8(SB)/8, $0xd192e819d6ef5218
   172	DATA  ·kcon+0x2D0(SB)/8, $0xd69906245565a910
   173	DATA  ·kcon+0x2D8(SB)/8, $0xd69906245565a910
   174	DATA  ·kcon+0x2E0(SB)/8, $0xf40e35855771202a
   175	DATA  ·kcon+0x2E8(SB)/8, $0xf40e35855771202a
   176	DATA  ·kcon+0x2F0(SB)/8, $0x106aa07032bbd1b8
   177	DATA  ·kcon+0x2F8(SB)/8, $0x106aa07032bbd1b8
   178	DATA  ·kcon+0x300(SB)/8, $0x19a4c116b8d2d0c8
   179	DATA  ·kcon+0x308(SB)/8, $0x19a4c116b8d2d0c8
   180	DATA  ·kcon+0x310(SB)/8, $0x1e376c085141ab53
   181	DATA  ·kcon+0x318(SB)/8, $0x1e376c085141ab53
   182	DATA  ·kcon+0x320(SB)/8, $0x2748774cdf8eeb99
   183	DATA  ·kcon+0x328(SB)/8, $0x2748774cdf8eeb99
   184	DATA  ·kcon+0x330(SB)/8, $0x34b0bcb5e19b48a8
   185	DATA  ·kcon+0x338(SB)/8, $0x34b0bcb5e19b48a8
   186	DATA  ·kcon+0x340(SB)/8, $0x391c0cb3c5c95a63
   187	DATA  ·kcon+0x348(SB)/8, $0x391c0cb3c5c95a63
   188	DATA  ·kcon+0x350(SB)/8, $0x4ed8aa4ae3418acb
   189	DATA  ·kcon+0x358(SB)/8, $0x4ed8aa4ae3418acb
   190	DATA  ·kcon+0x360(SB)/8, $0x5b9cca4f7763e373
   191	DATA  ·kcon+0x368(SB)/8, $0x5b9cca4f7763e373
   192	DATA  ·kcon+0x370(SB)/8, $0x682e6ff3d6b2b8a3
   193	DATA  ·kcon+0x378(SB)/8, $0x682e6ff3d6b2b8a3
   194	DATA  ·kcon+0x380(SB)/8, $0x748f82ee5defb2fc
   195	DATA  ·kcon+0x388(SB)/8, $0x748f82ee5defb2fc
   196	DATA  ·kcon+0x390(SB)/8, $0x78a5636f43172f60
   197	DATA  ·kcon+0x398(SB)/8, $0x78a5636f43172f60
   198	DATA  ·kcon+0x3A0(SB)/8, $0x84c87814a1f0ab72
   199	DATA  ·kcon+0x3A8(SB)/8, $0x84c87814a1f0ab72
   200	DATA  ·kcon+0x3B0(SB)/8, $0x8cc702081a6439ec
   201	DATA  ·kcon+0x3B8(SB)/8, $0x8cc702081a6439ec
   202	DATA  ·kcon+0x3C0(SB)/8, $0x90befffa23631e28
   203	DATA  ·kcon+0x3C8(SB)/8, $0x90befffa23631e28
   204	DATA  ·kcon+0x3D0(SB)/8, $0xa4506cebde82bde9
   205	DATA  ·kcon+0x3D8(SB)/8, $0xa4506cebde82bde9
   206	DATA  ·kcon+0x3E0(SB)/8, $0xbef9a3f7b2c67915
   207	DATA  ·kcon+0x3E8(SB)/8, $0xbef9a3f7b2c67915
   208	DATA  ·kcon+0x3F0(SB)/8, $0xc67178f2e372532b
   209	DATA  ·kcon+0x3F8(SB)/8, $0xc67178f2e372532b
   210	DATA  ·kcon+0x400(SB)/8, $0xca273eceea26619c
   211	DATA  ·kcon+0x408(SB)/8, $0xca273eceea26619c
   212	DATA  ·kcon+0x410(SB)/8, $0xd186b8c721c0c207
   213	DATA  ·kcon+0x418(SB)/8, $0xd186b8c721c0c207
   214	DATA  ·kcon+0x420(SB)/8, $0xeada7dd6cde0eb1e
   215	DATA  ·kcon+0x428(SB)/8, $0xeada7dd6cde0eb1e
   216	DATA  ·kcon+0x430(SB)/8, $0xf57d4f7fee6ed178
   217	DATA  ·kcon+0x438(SB)/8, $0xf57d4f7fee6ed178
   218	DATA  ·kcon+0x440(SB)/8, $0x06f067aa72176fba
   219	DATA  ·kcon+0x448(SB)/8, $0x06f067aa72176fba
   220	DATA  ·kcon+0x450(SB)/8, $0x0a637dc5a2c898a6
   221	DATA  ·kcon+0x458(SB)/8, $0x0a637dc5a2c898a6
   222	DATA  ·kcon+0x460(SB)/8, $0x113f9804bef90dae
   223	DATA  ·kcon+0x468(SB)/8, $0x113f9804bef90dae
   224	DATA  ·kcon+0x470(SB)/8, $0x1b710b35131c471b
   225	DATA  ·kcon+0x478(SB)/8, $0x1b710b35131c471b
   226	DATA  ·kcon+0x480(SB)/8, $0x28db77f523047d84
   227	DATA  ·kcon+0x488(SB)/8, $0x28db77f523047d84
   228	DATA  ·kcon+0x490(SB)/8, $0x32caab7b40c72493
   229	DATA  ·kcon+0x498(SB)/8, $0x32caab7b40c72493
   230	DATA  ·kcon+0x4A0(SB)/8, $0x3c9ebe0a15c9bebc
   231	DATA  ·kcon+0x4A8(SB)/8, $0x3c9ebe0a15c9bebc
   232	DATA  ·kcon+0x4B0(SB)/8, $0x431d67c49c100d4c
   233	DATA  ·kcon+0x4B8(SB)/8, $0x431d67c49c100d4c
   234	DATA  ·kcon+0x4C0(SB)/8, $0x4cc5d4becb3e42b6
   235	DATA  ·kcon+0x4C8(SB)/8, $0x4cc5d4becb3e42b6
   236	DATA  ·kcon+0x4D0(SB)/8, $0x597f299cfc657e2a
   237	DATA  ·kcon+0x4D8(SB)/8, $0x597f299cfc657e2a
   238	DATA  ·kcon+0x4E0(SB)/8, $0x5fcb6fab3ad6faec
   239	DATA  ·kcon+0x4E8(SB)/8, $0x5fcb6fab3ad6faec
   240	DATA  ·kcon+0x4F0(SB)/8, $0x6c44198c4a475817
   241	DATA  ·kcon+0x4F8(SB)/8, $0x6c44198c4a475817
   242	DATA  ·kcon+0x500(SB)/8, $0x0000000000000000
   243	DATA  ·kcon+0x508(SB)/8, $0x0000000000000000
   244	DATA  ·kcon+0x510(SB)/8, $0x1011121314151617
   245	DATA  ·kcon+0x518(SB)/8, $0x0001020304050607
   246	GLOBL ·kcon(SB), RODATA, $1312
   247	
   248	#define SHA512ROUND0(a, b, c, d, e, f, g, h, xi) \
   249		VSEL		g, f, e, FUNC; \
   250		VSHASIGMAD	$15, e, $1, S1; \
   251		VADDUDM		xi, h, h; \
   252		VSHASIGMAD	$0, a, $1, S0; \
   253		VADDUDM		FUNC, h, h; \
   254		VXOR		b, a, FUNC; \
   255		VADDUDM		S1, h, h; \
   256		VSEL		b, c, FUNC, FUNC; \
   257		VADDUDM		KI, g, g; \
   258		VADDUDM		h, d, d; \
   259		VADDUDM		FUNC, S0, S0; \
   260		LVX		(TBL)(IDX), KI; \
   261		ADD		$16, IDX; \
   262		VADDUDM		S0, h, h
   263	
   264	#define SHA512ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14) \
   265		VSHASIGMAD	$0, xj_1, $0, s0; \
   266		VSEL		g, f, e, FUNC; \
   267		VSHASIGMAD	$15, e, $1, S1; \
   268		VADDUDM		xi, h, h; \
   269		VSHASIGMAD	$0, a, $1, S0; \
   270		VSHASIGMAD	$15, xj_14, $0, s1; \
   271		VADDUDM		FUNC, h, h; \
   272		VXOR		b, a, FUNC; \
   273		VADDUDM		xj_9, xj, xj; \
   274		VADDUDM		S1, h, h; \
   275		VSEL		b, c, FUNC, FUNC; \
   276		VADDUDM		KI, g, g; \
   277		VADDUDM		h, d, d; \
   278		VADDUDM		FUNC, S0, S0; \
   279		VADDUDM		s0, xj, xj; \
   280		LVX		(TBL)(IDX), KI; \
   281		ADD		$16, IDX; \
   282		VADDUDM		S0, h, h; \
   283		VADDUDM		s1, xj, xj
   284	
   285	// func block(dig *digest, p []byte)
   286	TEXT ·block(SB),0,$128-32
   287		MOVD	dig+0(FP), CTX
   288		MOVD	p_base+8(FP), INP
   289		MOVD	p_len+16(FP), LEN
   290	
   291		SRD	$6, LEN
   292		SLD	$6, LEN
   293	
   294		ADD	INP, LEN, END
   295	
   296		CMP	INP, END
   297		BEQ	end
   298	
   299		MOVD	$·kcon(SB), TBL
   300		MOVD	R1, OFFLOAD
   301	
   302		MOVD	R0, CNT
   303		MOVWZ	$0x10, HEX10
   304		MOVWZ	$0x20, HEX20
   305		MOVWZ	$0x30, HEX30
   306		MOVWZ	$0x40, HEX40
   307		MOVWZ	$0x50, HEX50
   308		MOVWZ	$0x60, HEX60
   309		MOVWZ	$0x70, HEX70
   310	
   311		MOVWZ	$8, IDX
   312		LVSL	(IDX)(R0), LEMASK
   313		VSPLTISB	$0x0F, KI
   314		VXOR	KI, LEMASK, LEMASK
   315	
   316		LXVD2X	(CTX)(HEX00), VS32	// v0 = vs32
   317		LXVD2X	(CTX)(HEX10), VS34	// v2 = vs34
   318		LXVD2X	(CTX)(HEX20), VS36	// v4 = vs36
   319		// unpack the input values into vector registers
   320		VSLDOI	$8, V0, V0, V1
   321		LXVD2X	(CTX)(HEX30), VS38	// v6 = vs38
   322		VSLDOI	$8, V2, V2, V3
   323		VSLDOI	$8, V4, V4, V5
   324		VSLDOI	$8, V6, V6, V7
   325	
   326	loop:
   327		LVX	(TBL)(HEX00), KI
   328		MOVWZ	$16, IDX
   329	
   330		LXVD2X	(INP)(R0), VS40	// load v8 (=vs40) in advance
   331		ADD	$16, INP
   332	
   333		STVX	V0, (OFFLOAD+HEX00)
   334		STVX	V1, (OFFLOAD+HEX10)
   335		STVX	V2, (OFFLOAD+HEX20)
   336		STVX	V3, (OFFLOAD+HEX30)
   337		STVX	V4, (OFFLOAD+HEX40)
   338		STVX	V5, (OFFLOAD+HEX50)
   339		STVX	V6, (OFFLOAD+HEX60)
   340		STVX	V7, (OFFLOAD+HEX70)
   341	
   342		VADDUDM	KI, V7, V7	// h+K[i]
   343		LVX	(TBL)(IDX), KI
   344		ADD	$16, IDX
   345	
   346		VPERM	V8, V8, LEMASK, V8
   347		SHA512ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8)
   348		LXVD2X	(INP)(R0), VS42	// load v10 (=vs42) in advance
   349		ADD	$16, INP, INP
   350		VSLDOI	$8, V8, V8, V9
   351		SHA512ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9)
   352		VPERM	V10, V10, LEMASK, V10
   353		SHA512ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10)
   354		LXVD2X	(INP)(R0), VS44	// load v12 (=vs44) in advance
   355		ADD	$16, INP, INP
   356		VSLDOI	$8, V10, V10, V11
   357		SHA512ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11)
   358		VPERM	V12, V12, LEMASK, V12
   359		SHA512ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12)
   360		LXVD2X	(INP)(R0), VS46	// load v14 (=vs46) in advance
   361		ADD	$16, INP, INP
   362		VSLDOI	$8, V12, V12, V13
   363		SHA512ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13)
   364		VPERM	V14, V14, LEMASK, V14
   365		SHA512ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14)
   366		LXVD2X	(INP)(R0), VS48	// load v16 (=vs48) in advance
   367		ADD	$16, INP, INP
   368		VSLDOI	$8, V14, V14, V15
   369		SHA512ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15)
   370		VPERM	V16, V16, LEMASK, V16
   371		SHA512ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16)
   372		LXVD2X	(INP)(R0), VS50	// load v18 (=vs50) in advance
   373		ADD	$16, INP, INP
   374		VSLDOI	$8, V16, V16, V17
   375		SHA512ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17)
   376		VPERM	V18, V18, LEMASK, V18
   377		SHA512ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18)
   378		LXVD2X	(INP)(R0), VS52	// load v20 (=vs52) in advance
   379		ADD	$16, INP, INP
   380		VSLDOI	$8, V18, V18, V19
   381		SHA512ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19)
   382		VPERM	V20, V20, LEMASK, V20
   383		SHA512ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20)
   384		LXVD2X	(INP)(R0), VS54	// load v22 (=vs54) in advance
   385		ADD	$16, INP, INP
   386		VSLDOI	$8, V20, V20, V21
   387		SHA512ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21)
   388		VPERM	V22, V22, LEMASK, V22
   389		SHA512ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22)
   390		VSLDOI	$8, V22, V22, V23
   391		SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22)
   392	
   393		MOVWZ	$4, TEMP
   394		MOVWZ	TEMP, CTR
   395	
   396	L16_xx:
   397		SHA512ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23)
   398		SHA512ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8)
   399		SHA512ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9)
   400		SHA512ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10)
   401		SHA512ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11)
   402		SHA512ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12)
   403		SHA512ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13)
   404		SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14)
   405		SHA512ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15)
   406		SHA512ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16)
   407		SHA512ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17)
   408		SHA512ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18)
   409		SHA512ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19)
   410		SHA512ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20)
   411		SHA512ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21)
   412		SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22)
   413	
   414		BC	0x10, 0, L16_xx		// bdnz
   415	
   416		LVX	(OFFLOAD)(HEX00), V10
   417	
   418		LVX	(OFFLOAD)(HEX10), V11
   419		VADDUDM	V10, V0, V0
   420		LVX	(OFFLOAD)(HEX20), V12
   421		VADDUDM	V11, V1, V1
   422		LVX	(OFFLOAD)(HEX30), V13
   423		VADDUDM	V12, V2, V2
   424		LVX	(OFFLOAD)(HEX40), V14
   425		VADDUDM	V13, V3, V3
   426		LVX	(OFFLOAD)(HEX50), V15
   427		VADDUDM	V14, V4, V4
   428		LVX	(OFFLOAD)(HEX60), V16
   429		VADDUDM	V15, V5, V5
   430		LVX	(OFFLOAD)(HEX70), V17
   431		VADDUDM	V16, V6, V6
   432		VADDUDM	V17, V7, V7
   433	
   434		CMPU	INP, END
   435		BLT	loop
   436	
   437		VPERM	V0, V1, KI, V0
   438		VPERM	V2, V3, KI, V2
   439		VPERM	V4, V5, KI, V4
   440		VPERM	V6, V7, KI, V6
   441		STXVD2X	VS32, (CTX+HEX00)	// v0 = vs32
   442		STXVD2X	VS34, (CTX+HEX10)	// v2 = vs34
   443		STXVD2X	VS36, (CTX+HEX20)	// v4 = vs36
   444		STXVD2X	VS38, (CTX+HEX30)	// v6 = vs38
   445	
   446	end:
   447		RET
   448	

View as plain text