...
Run Format

Text file src/crypto/sha256/sha256block_ppc64le.s

Documentation: crypto/sha256

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// This is a derived work from OpenSSL of SHA-2 using assembly optimizations. The
     6	// original code was written by Andy Polyakov <appro@openssl.org> and it's dual
     7	// licensed under OpenSSL and CRYPTOGAMS licenses depending on where you obtain
     8	// it. For further details see http://www.openssl.org/~appro/cryptogams/.
     9	
    10	#include "textflag.h"
    11	
    12	// SHA256 block routine. See sha256block.go for Go equivalent.
    13	//
    14	// The algorithm is detailed in FIPS 180-4:
    15	//
    16	//  http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    17	//
    18	// Wt = Mt; for 0 <= t <= 15
    19	// Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
    20	//
    21	// a = H0
    22	// b = H1
    23	// c = H2
    24	// d = H3
    25	// e = H4
    26	// f = H5
    27	// g = H6
    28	// h = H7
    29	//
    30	// for t = 0 to 63 {
    31	//    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    32	//    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    33	//    h = g
    34	//    g = f
    35	//    f = e
    36	//    e = d + T1
    37	//    d = c
    38	//    c = b
    39	//    b = a
    40	//    a = T1 + T2
    41	// }
    42	//
    43	// H0 = a + H0
    44	// H1 = b + H1
    45	// H2 = c + H2
    46	// H3 = d + H3
    47	// H4 = e + H4
    48	// H5 = f + H5
    49	// H6 = g + H6
    50	// H7 = h + H7
    51	
    52	#define CTX	R3
    53	#define INP	R4
    54	#define END	R5
    55	#define TBL	R6
    56	#define IDX	R7
    57	#define CNT	R8
    58	#define LEN	R9
    59	#define OFFLOAD	R11
    60	#define TEMP	R12
    61	
    62	#define HEX00	R0
    63	#define HEX10	R10
    64	#define HEX20	R25
    65	#define HEX30	R26
    66	#define HEX40	R27
    67	#define HEX50	R28
    68	#define HEX60	R29
    69	#define HEX70	R31
    70	
    71	// V0-V7 are A-H
    72	// V8-V23 are used for the message schedule
    73	#define KI	V24
    74	#define FUNC	V25
    75	#define S0	V26
    76	#define S1	V27
    77	#define s0	V28
    78	#define s1	V29
    79	#define LEMASK	V31	// Permutation control register for little endian
    80	
    81	// 4 copies of each Kt, to fill all 4 words of a vector register
    82	DATA  ·kcon+0x000(SB)/8, $0x428a2f98428a2f98
    83	DATA  ·kcon+0x008(SB)/8, $0x428a2f98428a2f98
    84	DATA  ·kcon+0x010(SB)/8, $0x7137449171374491
    85	DATA  ·kcon+0x018(SB)/8, $0x7137449171374491
    86	DATA  ·kcon+0x020(SB)/8, $0xb5c0fbcfb5c0fbcf
    87	DATA  ·kcon+0x028(SB)/8, $0xb5c0fbcfb5c0fbcf
    88	DATA  ·kcon+0x030(SB)/8, $0xe9b5dba5e9b5dba5
    89	DATA  ·kcon+0x038(SB)/8, $0xe9b5dba5e9b5dba5
    90	DATA  ·kcon+0x040(SB)/8, $0x3956c25b3956c25b
    91	DATA  ·kcon+0x048(SB)/8, $0x3956c25b3956c25b
    92	DATA  ·kcon+0x050(SB)/8, $0x59f111f159f111f1
    93	DATA  ·kcon+0x058(SB)/8, $0x59f111f159f111f1
    94	DATA  ·kcon+0x060(SB)/8, $0x923f82a4923f82a4
    95	DATA  ·kcon+0x068(SB)/8, $0x923f82a4923f82a4
    96	DATA  ·kcon+0x070(SB)/8, $0xab1c5ed5ab1c5ed5
    97	DATA  ·kcon+0x078(SB)/8, $0xab1c5ed5ab1c5ed5
    98	DATA  ·kcon+0x080(SB)/8, $0xd807aa98d807aa98
    99	DATA  ·kcon+0x088(SB)/8, $0xd807aa98d807aa98
   100	DATA  ·kcon+0x090(SB)/8, $0x12835b0112835b01
   101	DATA  ·kcon+0x098(SB)/8, $0x12835b0112835b01
   102	DATA  ·kcon+0x0A0(SB)/8, $0x243185be243185be
   103	DATA  ·kcon+0x0A8(SB)/8, $0x243185be243185be
   104	DATA  ·kcon+0x0B0(SB)/8, $0x550c7dc3550c7dc3
   105	DATA  ·kcon+0x0B8(SB)/8, $0x550c7dc3550c7dc3
   106	DATA  ·kcon+0x0C0(SB)/8, $0x72be5d7472be5d74
   107	DATA  ·kcon+0x0C8(SB)/8, $0x72be5d7472be5d74
   108	DATA  ·kcon+0x0D0(SB)/8, $0x80deb1fe80deb1fe
   109	DATA  ·kcon+0x0D8(SB)/8, $0x80deb1fe80deb1fe
   110	DATA  ·kcon+0x0E0(SB)/8, $0x9bdc06a79bdc06a7
   111	DATA  ·kcon+0x0E8(SB)/8, $0x9bdc06a79bdc06a7
   112	DATA  ·kcon+0x0F0(SB)/8, $0xc19bf174c19bf174
   113	DATA  ·kcon+0x0F8(SB)/8, $0xc19bf174c19bf174
   114	DATA  ·kcon+0x100(SB)/8, $0xe49b69c1e49b69c1
   115	DATA  ·kcon+0x108(SB)/8, $0xe49b69c1e49b69c1
   116	DATA  ·kcon+0x110(SB)/8, $0xefbe4786efbe4786
   117	DATA  ·kcon+0x118(SB)/8, $0xefbe4786efbe4786
   118	DATA  ·kcon+0x120(SB)/8, $0x0fc19dc60fc19dc6
   119	DATA  ·kcon+0x128(SB)/8, $0x0fc19dc60fc19dc6
   120	DATA  ·kcon+0x130(SB)/8, $0x240ca1cc240ca1cc
   121	DATA  ·kcon+0x138(SB)/8, $0x240ca1cc240ca1cc
   122	DATA  ·kcon+0x140(SB)/8, $0x2de92c6f2de92c6f
   123	DATA  ·kcon+0x148(SB)/8, $0x2de92c6f2de92c6f
   124	DATA  ·kcon+0x150(SB)/8, $0x4a7484aa4a7484aa
   125	DATA  ·kcon+0x158(SB)/8, $0x4a7484aa4a7484aa
   126	DATA  ·kcon+0x160(SB)/8, $0x5cb0a9dc5cb0a9dc
   127	DATA  ·kcon+0x168(SB)/8, $0x5cb0a9dc5cb0a9dc
   128	DATA  ·kcon+0x170(SB)/8, $0x76f988da76f988da
   129	DATA  ·kcon+0x178(SB)/8, $0x76f988da76f988da
   130	DATA  ·kcon+0x180(SB)/8, $0x983e5152983e5152
   131	DATA  ·kcon+0x188(SB)/8, $0x983e5152983e5152
   132	DATA  ·kcon+0x190(SB)/8, $0xa831c66da831c66d
   133	DATA  ·kcon+0x198(SB)/8, $0xa831c66da831c66d
   134	DATA  ·kcon+0x1A0(SB)/8, $0xb00327c8b00327c8
   135	DATA  ·kcon+0x1A8(SB)/8, $0xb00327c8b00327c8
   136	DATA  ·kcon+0x1B0(SB)/8, $0xbf597fc7bf597fc7
   137	DATA  ·kcon+0x1B8(SB)/8, $0xbf597fc7bf597fc7
   138	DATA  ·kcon+0x1C0(SB)/8, $0xc6e00bf3c6e00bf3
   139	DATA  ·kcon+0x1C8(SB)/8, $0xc6e00bf3c6e00bf3
   140	DATA  ·kcon+0x1D0(SB)/8, $0xd5a79147d5a79147
   141	DATA  ·kcon+0x1D8(SB)/8, $0xd5a79147d5a79147
   142	DATA  ·kcon+0x1E0(SB)/8, $0x06ca635106ca6351
   143	DATA  ·kcon+0x1E8(SB)/8, $0x06ca635106ca6351
   144	DATA  ·kcon+0x1F0(SB)/8, $0x1429296714292967
   145	DATA  ·kcon+0x1F8(SB)/8, $0x1429296714292967
   146	DATA  ·kcon+0x200(SB)/8, $0x27b70a8527b70a85
   147	DATA  ·kcon+0x208(SB)/8, $0x27b70a8527b70a85
   148	DATA  ·kcon+0x210(SB)/8, $0x2e1b21382e1b2138
   149	DATA  ·kcon+0x218(SB)/8, $0x2e1b21382e1b2138
   150	DATA  ·kcon+0x220(SB)/8, $0x4d2c6dfc4d2c6dfc
   151	DATA  ·kcon+0x228(SB)/8, $0x4d2c6dfc4d2c6dfc
   152	DATA  ·kcon+0x230(SB)/8, $0x53380d1353380d13
   153	DATA  ·kcon+0x238(SB)/8, $0x53380d1353380d13
   154	DATA  ·kcon+0x240(SB)/8, $0x650a7354650a7354
   155	DATA  ·kcon+0x248(SB)/8, $0x650a7354650a7354
   156	DATA  ·kcon+0x250(SB)/8, $0x766a0abb766a0abb
   157	DATA  ·kcon+0x258(SB)/8, $0x766a0abb766a0abb
   158	DATA  ·kcon+0x260(SB)/8, $0x81c2c92e81c2c92e
   159	DATA  ·kcon+0x268(SB)/8, $0x81c2c92e81c2c92e
   160	DATA  ·kcon+0x270(SB)/8, $0x92722c8592722c85
   161	DATA  ·kcon+0x278(SB)/8, $0x92722c8592722c85
   162	DATA  ·kcon+0x280(SB)/8, $0xa2bfe8a1a2bfe8a1
   163	DATA  ·kcon+0x288(SB)/8, $0xa2bfe8a1a2bfe8a1
   164	DATA  ·kcon+0x290(SB)/8, $0xa81a664ba81a664b
   165	DATA  ·kcon+0x298(SB)/8, $0xa81a664ba81a664b
   166	DATA  ·kcon+0x2A0(SB)/8, $0xc24b8b70c24b8b70
   167	DATA  ·kcon+0x2A8(SB)/8, $0xc24b8b70c24b8b70
   168	DATA  ·kcon+0x2B0(SB)/8, $0xc76c51a3c76c51a3
   169	DATA  ·kcon+0x2B8(SB)/8, $0xc76c51a3c76c51a3
   170	DATA  ·kcon+0x2C0(SB)/8, $0xd192e819d192e819
   171	DATA  ·kcon+0x2C8(SB)/8, $0xd192e819d192e819
   172	DATA  ·kcon+0x2D0(SB)/8, $0xd6990624d6990624
   173	DATA  ·kcon+0x2D8(SB)/8, $0xd6990624d6990624
   174	DATA  ·kcon+0x2E0(SB)/8, $0xf40e3585f40e3585
   175	DATA  ·kcon+0x2E8(SB)/8, $0xf40e3585f40e3585
   176	DATA  ·kcon+0x2F0(SB)/8, $0x106aa070106aa070
   177	DATA  ·kcon+0x2F8(SB)/8, $0x106aa070106aa070
   178	DATA  ·kcon+0x300(SB)/8, $0x19a4c11619a4c116
   179	DATA  ·kcon+0x308(SB)/8, $0x19a4c11619a4c116
   180	DATA  ·kcon+0x310(SB)/8, $0x1e376c081e376c08
   181	DATA  ·kcon+0x318(SB)/8, $0x1e376c081e376c08
   182	DATA  ·kcon+0x320(SB)/8, $0x2748774c2748774c
   183	DATA  ·kcon+0x328(SB)/8, $0x2748774c2748774c
   184	DATA  ·kcon+0x330(SB)/8, $0x34b0bcb534b0bcb5
   185	DATA  ·kcon+0x338(SB)/8, $0x34b0bcb534b0bcb5
   186	DATA  ·kcon+0x340(SB)/8, $0x391c0cb3391c0cb3
   187	DATA  ·kcon+0x348(SB)/8, $0x391c0cb3391c0cb3
   188	DATA  ·kcon+0x350(SB)/8, $0x4ed8aa4a4ed8aa4a
   189	DATA  ·kcon+0x358(SB)/8, $0x4ed8aa4a4ed8aa4a
   190	DATA  ·kcon+0x360(SB)/8, $0x5b9cca4f5b9cca4f
   191	DATA  ·kcon+0x368(SB)/8, $0x5b9cca4f5b9cca4f
   192	DATA  ·kcon+0x370(SB)/8, $0x682e6ff3682e6ff3
   193	DATA  ·kcon+0x378(SB)/8, $0x682e6ff3682e6ff3
   194	DATA  ·kcon+0x380(SB)/8, $0x748f82ee748f82ee
   195	DATA  ·kcon+0x388(SB)/8, $0x748f82ee748f82ee
   196	DATA  ·kcon+0x390(SB)/8, $0x78a5636f78a5636f
   197	DATA  ·kcon+0x398(SB)/8, $0x78a5636f78a5636f
   198	DATA  ·kcon+0x3A0(SB)/8, $0x84c8781484c87814
   199	DATA  ·kcon+0x3A8(SB)/8, $0x84c8781484c87814
   200	DATA  ·kcon+0x3B0(SB)/8, $0x8cc702088cc70208
   201	DATA  ·kcon+0x3B8(SB)/8, $0x8cc702088cc70208
   202	DATA  ·kcon+0x3C0(SB)/8, $0x90befffa90befffa
   203	DATA  ·kcon+0x3C8(SB)/8, $0x90befffa90befffa
   204	DATA  ·kcon+0x3D0(SB)/8, $0xa4506ceba4506ceb
   205	DATA  ·kcon+0x3D8(SB)/8, $0xa4506ceba4506ceb
   206	DATA  ·kcon+0x3E0(SB)/8, $0xbef9a3f7bef9a3f7
   207	DATA  ·kcon+0x3E8(SB)/8, $0xbef9a3f7bef9a3f7
   208	DATA  ·kcon+0x3F0(SB)/8, $0xc67178f2c67178f2
   209	DATA  ·kcon+0x3F8(SB)/8, $0xc67178f2c67178f2
   210	DATA  ·kcon+0x400(SB)/8, $0x0000000000000000
   211	DATA  ·kcon+0x408(SB)/8, $0x0000000000000000
   212	DATA  ·kcon+0x410(SB)/8, $0x1011121310111213	// permutation control vectors
   213	DATA  ·kcon+0x418(SB)/8, $0x1011121300010203
   214	DATA  ·kcon+0x420(SB)/8, $0x1011121310111213
   215	DATA  ·kcon+0x428(SB)/8, $0x0405060700010203
   216	DATA  ·kcon+0x430(SB)/8, $0x1011121308090a0b
   217	DATA  ·kcon+0x438(SB)/8, $0x0405060700010203
   218	GLOBL ·kcon(SB), RODATA, $1088
   219	
   220	#define SHA256ROUND0(a, b, c, d, e, f, g, h, xi) \
   221		VSEL		g, f, e, FUNC; \
   222		VSHASIGMAW	$15, e, $1, S1; \
   223		VADDUWM		xi, h, h; \
   224		VSHASIGMAW	$0, a, $1, S0; \
   225		VADDUWM		FUNC, h, h; \
   226		VXOR		b, a, FUNC; \
   227		VADDUWM		S1, h, h; \
   228		VSEL		b, c, FUNC, FUNC; \
   229		VADDUWM		KI, g, g; \
   230		VADDUWM		h, d, d; \
   231		VADDUWM		FUNC, S0, S0; \
   232		LVX		(TBL)(IDX), KI; \
   233		ADD		$16, IDX; \
   234		VADDUWM		S0, h, h
   235	
   236	#define SHA256ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14) \
   237		VSHASIGMAW	$0, xj_1, $0, s0; \
   238		VSEL		g, f, e, FUNC; \
   239		VSHASIGMAW	$15, e, $1, S1; \
   240		VADDUWM		xi, h, h; \
   241		VSHASIGMAW	$0, a, $1, S0; \
   242		VSHASIGMAW	$15, xj_14, $0, s1; \
   243		VADDUWM		FUNC, h, h; \
   244		VXOR		b, a, FUNC; \
   245		VADDUWM		xj_9, xj, xj; \
   246		VADDUWM		S1, h, h; \
   247		VSEL		b, c, FUNC, FUNC; \
   248		VADDUWM		KI, g, g; \
   249		VADDUWM		h, d, d; \
   250		VADDUWM		FUNC, S0, S0; \
   251		VADDUWM		s0, xj, xj; \
   252		LVX		(TBL)(IDX), KI; \
   253		ADD		$16, IDX; \
   254		VADDUWM		S0, h, h; \
   255		VADDUWM		s1, xj, xj
   256	
   257	// func block(dig *digest, p []byte)
   258	TEXT ·block(SB),0,$128-32
   259		MOVD	dig+0(FP), CTX
   260		MOVD	p_base+8(FP), INP
   261		MOVD	p_len+16(FP), LEN
   262	
   263		SRD	$6, LEN
   264		SLD	$6, LEN
   265	
   266		ADD	INP, LEN, END
   267	
   268		CMP	INP, END
   269		BEQ	end
   270	
   271		MOVD	$·kcon(SB), TBL
   272		MOVD	R1, OFFLOAD
   273	
   274		MOVD	R0, CNT
   275		MOVWZ	$0x10, HEX10
   276		MOVWZ	$0x20, HEX20
   277		MOVWZ	$0x30, HEX30
   278		MOVWZ	$0x40, HEX40
   279		MOVWZ	$0x50, HEX50
   280		MOVWZ	$0x60, HEX60
   281		MOVWZ	$0x70, HEX70
   282	
   283		MOVWZ	$8, IDX
   284		LVSL	(IDX)(R0), LEMASK
   285		VSPLTISB	$0x0F, KI
   286		VXOR	KI, LEMASK, LEMASK
   287	
   288		LXVW4X	(CTX)(HEX00), VS32	// v0 = vs32
   289		LXVW4X	(CTX)(HEX10), VS36	// v4 = vs36
   290	
   291		// unpack the input values into vector registers
   292		VSLDOI	$4, V0, V0, V1
   293		VSLDOI	$8, V0, V0, V2
   294		VSLDOI	$12, V0, V0, V3
   295		VSLDOI	$4, V4, V4, V5
   296		VSLDOI	$8, V4, V4, V6
   297		VSLDOI	$12, V4, V4, V7
   298	
   299	loop:
   300		LVX	(TBL)(HEX00), KI
   301		MOVWZ	$16, IDX
   302	
   303		LXVD2X	(INP)(R0), VS40	// load v8 (=vs40) in advance
   304		ADD	$16, INP
   305	
   306		STVX	V0, (OFFLOAD+HEX00)
   307		STVX	V1, (OFFLOAD+HEX10)
   308		STVX	V2, (OFFLOAD+HEX20)
   309		STVX	V3, (OFFLOAD+HEX30)
   310		STVX	V4, (OFFLOAD+HEX40)
   311		STVX	V5, (OFFLOAD+HEX50)
   312		STVX	V6, (OFFLOAD+HEX60)
   313		STVX	V7, (OFFLOAD+HEX70)
   314	
   315		VADDUWM	KI, V7, V7	// h+K[i]
   316		LVX	(TBL)(IDX), KI
   317		ADD	$16, IDX
   318	
   319		VPERM	V8, V8, LEMASK, V8
   320		SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8)
   321		VSLDOI	$4, V8, V8, V9
   322		SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9)
   323		VSLDOI	$4, V9, V9, V10
   324		SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10)
   325		LXVD2X	(INP)(R0), VS44	// load v12 (=vs44) in advance
   326		ADD	$16, INP, INP
   327		VSLDOI	$4, V10, V10, V11
   328		SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11)
   329		VPERM	V12, V12, LEMASK, V12
   330		SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12)
   331		VSLDOI	$4, V12, V12, V13
   332		SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13)
   333		VSLDOI	$4, V13, V13, V14
   334		SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14)
   335		LXVD2X	(INP)(R0), VS48	// load v16 (=vs48) in advance
   336		ADD	$16, INP, INP
   337		VSLDOI	$4, V14, V14, V15
   338		SHA256ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15)
   339		VPERM	V16, V16, LEMASK, V16
   340		SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16)
   341		VSLDOI	$4, V16, V16, V17
   342		SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17)
   343		VSLDOI	$4, V17, V17, V18
   344		SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18)
   345		VSLDOI	$4, V18, V18, V19
   346		LXVD2X	(INP)(R0), VS52	// load v20 (=vs52) in advance
   347		ADD	$16, INP, INP
   348		SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19)
   349		VPERM	V20, V20, LEMASK, V20
   350		SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20)
   351		VSLDOI	$4, V20, V20, V21
   352		SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21)
   353		VSLDOI	$4, V21, V21, V22
   354		SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22)
   355		VSLDOI	$4, V22, V22, V23
   356		SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22)
   357	
   358		MOVWZ	$3, TEMP
   359		MOVWZ	TEMP, CTR
   360	
   361	L16_xx:
   362		SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23)
   363		SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8)
   364		SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9)
   365		SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10)
   366		SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11)
   367		SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12)
   368		SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13)
   369		SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14)
   370		SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15)
   371		SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16)
   372		SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17)
   373		SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18)
   374		SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19)
   375		SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20)
   376		SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21)
   377		SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22)
   378	
   379		BC	0x10, 0, L16_xx		// bdnz
   380	
   381		LVX	(OFFLOAD)(HEX00), V10
   382	
   383		LVX	(OFFLOAD)(HEX10), V11
   384		VADDUWM	V10, V0, V0
   385		LVX	(OFFLOAD)(HEX20), V12
   386		VADDUWM	V11, V1, V1
   387		LVX	(OFFLOAD)(HEX30), V13
   388		VADDUWM	V12, V2, V2
   389		LVX	(OFFLOAD)(HEX40), V14
   390		VADDUWM	V13, V3, V3
   391		LVX	(OFFLOAD)(HEX50), V15
   392		VADDUWM	V14, V4, V4
   393		LVX	(OFFLOAD)(HEX60), V16
   394		VADDUWM	V15, V5, V5
   395		LVX	(OFFLOAD)(HEX70), V17
   396		VADDUWM	V16, V6, V6
   397		VADDUWM	V17, V7, V7
   398	
   399		CMPU	INP, END
   400		BLT	loop
   401	
   402		LVX	(TBL)(IDX), V8
   403		ADD	$16, IDX
   404		VPERM	V0, V1, KI, V0
   405		LVX	(TBL)(IDX), V9
   406		VPERM	V4, V5, KI, V4
   407		VPERM	V0, V2, V8, V0
   408		VPERM	V4, V6, V8, V4
   409		VPERM	V0, V3, V9, V0
   410		VPERM	V4, V7, V9, V4
   411		STXVD2X	VS32, (CTX+HEX00)	// v0 = vs32
   412		STXVD2X	VS36, (CTX+HEX10)	// v4 = vs36
   413	
   414	end:
   415		RET
   416	

View as plain text