...
Run Format

Text file src/crypto/md5/md5block_ppc64le.s

Documentation: crypto/md5

     1	// Original source:
     2	//	http://www.zorinaq.com/papers/md5-amd64.html
     3	//	http://www.zorinaq.com/papers/md5-amd64.tar.bz2
     4	//
     5	// MD5 optimized for ppc64le using Go's assembler for
     6	// ppc64le, based on md5block_amd64.s implementation by
     7	// the Go authors.
     8	//
     9	// Author: Marc Bevand <bevand_m (at) epita.fr>
    10	// Licence: I hereby disclaim the copyright on this code and place it
    11	// in the public domain.
    12	
    13	#include "textflag.h"
    14	
    15	// TODO: Could be updated for ppc64 big endian
    16	// by using the correct byte reverse instruction.
    17	// Changes required in the Go assembler to make
    18	// that instruction work.
    19	
    20	#define MOVE_LITTLE_ENDIAN MOVWZ
    21	
    22	TEXT ·block(SB),NOSPLIT,$0-32
    23		MOVD	dig+0(FP), R10
    24		MOVD	p+8(FP), R6
    25		MOVD	p_len+16(FP), R5
    26		SLD	$6, R5
    27		SRD	$6, R5
    28		ADD	R6, R5, R7
    29	
    30		MOVWZ	0(R10), R22
    31		MOVWZ	4(R10), R3
    32		MOVWZ	8(R10), R4
    33		MOVWZ	12(R10), R5
    34		CMP	R6, R7
    35		BEQ	end
    36	
    37	loop:
    38		MOVWZ	R22, R14
    39		MOVWZ	R3, R15
    40		MOVWZ	R4, R16
    41		MOVWZ	R5, R17
    42	
    43		MOVE_LITTLE_ENDIAN	0(R6), R8
    44		MOVWZ	R5, R9
    45	
    46	#define ROUND1(a, b, c, d, index, const, shift) \
    47		XOR	c, R9; \
    48		ADD	$const, a; \
    49		ADD	R8, a; \
    50		AND	b, R9; \
    51		XOR	d, R9; \
    52		MOVE_LITTLE_ENDIAN	(index*4)(R6), R8; \
    53		ADD	R9, a; \
    54		RLWMI	$shift, a, $0xffffffff, a; \
    55		MOVWZ	c, R9; \
    56		ADD	b, a; \
    57		MOVWZ	a, a
    58	
    59		ROUND1(R22,R3,R4,R5, 1,0xd76aa478, 7);
    60		ROUND1(R5,R22,R3,R4, 2,0xe8c7b756,12);
    61		ROUND1(R4,R5,R22,R3, 3,0x242070db,17);
    62		ROUND1(R3,R4,R5,R22, 4,0xc1bdceee,22);
    63		ROUND1(R22,R3,R4,R5, 5,0xf57c0faf, 7);
    64		ROUND1(R5,R22,R3,R4, 6,0x4787c62a,12);
    65		ROUND1(R4,R5,R22,R3, 7,0xa8304613,17);
    66		ROUND1(R3,R4,R5,R22, 8,0xfd469501,22);
    67		ROUND1(R22,R3,R4,R5, 9,0x698098d8, 7);
    68		ROUND1(R5,R22,R3,R4,10,0x8b44f7af,12);
    69		ROUND1(R4,R5,R22,R3,11,0xffff5bb1,17);
    70		ROUND1(R3,R4,R5,R22,12,0x895cd7be,22);
    71		ROUND1(R22,R3,R4,R5,13,0x6b901122, 7);
    72		ROUND1(R5,R22,R3,R4,14,0xfd987193,12);
    73		ROUND1(R4,R5,R22,R3,15,0xa679438e,17);
    74		ROUND1(R3,R4,R5,R22, 0,0x49b40821,22);
    75	
    76		MOVE_LITTLE_ENDIAN	(1*4)(R6), R8
    77		MOVWZ	R5, R9
    78		MOVWZ	R5, R10
    79	
    80	#define ROUND2(a, b, c, d, index, const, shift) \
    81		XOR	$0xffffffff, R9; \ // NOTW R9
    82		ADD	$const, a; \
    83		ADD	R8, a; \
    84		AND	b, R10; \
    85		AND	c, R9; \
    86		MOVE_LITTLE_ENDIAN	(index*4)(R6), R8; \
    87		OR	R9, R10; \
    88		MOVWZ	c, R9; \
    89		ADD	R10, a; \
    90		MOVWZ	c, R10; \
    91		RLWMI	$shift, a, $0xffffffff, a; \
    92		ADD	b, a; \
    93		MOVWZ	a, a
    94	
    95		ROUND2(R22,R3,R4,R5, 6,0xf61e2562, 5);
    96		ROUND2(R5,R22,R3,R4,11,0xc040b340, 9);
    97		ROUND2(R4,R5,R22,R3, 0,0x265e5a51,14);
    98		ROUND2(R3,R4,R5,R22, 5,0xe9b6c7aa,20);
    99		ROUND2(R22,R3,R4,R5,10,0xd62f105d, 5);
   100		ROUND2(R5,R22,R3,R4,15, 0x2441453, 9);
   101		ROUND2(R4,R5,R22,R3, 4,0xd8a1e681,14);
   102		ROUND2(R3,R4,R5,R22, 9,0xe7d3fbc8,20);
   103		ROUND2(R22,R3,R4,R5,14,0x21e1cde6, 5);
   104		ROUND2(R5,R22,R3,R4, 3,0xc33707d6, 9);
   105		ROUND2(R4,R5,R22,R3, 8,0xf4d50d87,14);
   106		ROUND2(R3,R4,R5,R22,13,0x455a14ed,20);
   107		ROUND2(R22,R3,R4,R5, 2,0xa9e3e905, 5);
   108		ROUND2(R5,R22,R3,R4, 7,0xfcefa3f8, 9);
   109		ROUND2(R4,R5,R22,R3,12,0x676f02d9,14);
   110		ROUND2(R3,R4,R5,R22, 0,0x8d2a4c8a,20);
   111	
   112		MOVE_LITTLE_ENDIAN	(5*4)(R6), R8
   113		MOVWZ	R4, R9
   114	
   115	#define ROUND3(a, b, c, d, index, const, shift) \
   116		ADD	$const, a; \
   117		ADD	R8, a; \
   118		MOVE_LITTLE_ENDIAN	(index*4)(R6), R8; \
   119		XOR	d, R9; \
   120		XOR	b, R9; \
   121		ADD	R9, a; \
   122		RLWMI	$shift, a, $0xffffffff, a; \
   123		MOVWZ	b, R9; \
   124		ADD	b, a; \
   125		MOVWZ	a, a
   126	
   127		ROUND3(R22,R3,R4,R5, 8,0xfffa3942, 4);
   128		ROUND3(R5,R22,R3,R4,11,0x8771f681,11);
   129		ROUND3(R4,R5,R22,R3,14,0x6d9d6122,16);
   130		ROUND3(R3,R4,R5,R22, 1,0xfde5380c,23);
   131		ROUND3(R22,R3,R4,R5, 4,0xa4beea44, 4);
   132		ROUND3(R5,R22,R3,R4, 7,0x4bdecfa9,11);
   133		ROUND3(R4,R5,R22,R3,10,0xf6bb4b60,16);
   134		ROUND3(R3,R4,R5,R22,13,0xbebfbc70,23);
   135		ROUND3(R22,R3,R4,R5, 0,0x289b7ec6, 4);
   136		ROUND3(R5,R22,R3,R4, 3,0xeaa127fa,11);
   137		ROUND3(R4,R5,R22,R3, 6,0xd4ef3085,16);
   138		ROUND3(R3,R4,R5,R22, 9, 0x4881d05,23);
   139		ROUND3(R22,R3,R4,R5,12,0xd9d4d039, 4);
   140		ROUND3(R5,R22,R3,R4,15,0xe6db99e5,11);
   141		ROUND3(R4,R5,R22,R3, 2,0x1fa27cf8,16);
   142		ROUND3(R3,R4,R5,R22, 0,0xc4ac5665,23);
   143	
   144		MOVE_LITTLE_ENDIAN	(0*4)(R6), R8
   145		MOVWZ	$0xffffffff, R9
   146		XOR	R5, R9
   147	
   148	#define ROUND4(a, b, c, d, index, const, shift) \
   149		ADD	$const, a; \
   150		ADD	R8, a; \
   151		OR	b, R9; \
   152		XOR	c, R9; \
   153		ADD	R9, a; \
   154		MOVE_LITTLE_ENDIAN	(index*4)(R6), R8; \
   155		MOVWZ	$0xffffffff, R9; \
   156		RLWMI	$shift, a, $0xffffffff, a; \
   157		XOR	c, R9; \
   158		ADD	b, a; \
   159		MOVWZ	a, a
   160	
   161		ROUND4(R22,R3,R4,R5, 7,0xf4292244, 6);
   162		ROUND4(R5,R22,R3,R4,14,0x432aff97,10);
   163		ROUND4(R4,R5,R22,R3, 5,0xab9423a7,15);
   164		ROUND4(R3,R4,R5,R22,12,0xfc93a039,21);
   165		ROUND4(R22,R3,R4,R5, 3,0x655b59c3, 6);
   166		ROUND4(R5,R22,R3,R4,10,0x8f0ccc92,10);
   167		ROUND4(R4,R5,R22,R3, 1,0xffeff47d,15);
   168		ROUND4(R3,R4,R5,R22, 8,0x85845dd1,21);
   169		ROUND4(R22,R3,R4,R5,15,0x6fa87e4f, 6);
   170		ROUND4(R5,R22,R3,R4, 6,0xfe2ce6e0,10);
   171		ROUND4(R4,R5,R22,R3,13,0xa3014314,15);
   172		ROUND4(R3,R4,R5,R22, 4,0x4e0811a1,21);
   173		ROUND4(R22,R3,R4,R5,11,0xf7537e82, 6);
   174		ROUND4(R5,R22,R3,R4, 2,0xbd3af235,10);
   175		ROUND4(R4,R5,R22,R3, 9,0x2ad7d2bb,15);
   176		ROUND4(R3,R4,R5,R22, 0,0xeb86d391,21);
   177	
   178		ADD	R14, R22
   179		ADD	R15, R3
   180		ADD	R16, R4
   181		ADD	R17, R5
   182		ADD	$64, R6
   183		CMP	R6, R7
   184		BLT	loop
   185	
   186	end:
   187		MOVD	dig+0(FP), R10
   188		MOVWZ	R22, 0(R10)
   189		MOVWZ	R3, 4(R10)
   190		MOVWZ	R4, 8(R10)
   191		MOVWZ	R5, 12(R10)
   192		RET

View as plain text