Text file src/crypto/aes/asm_amd64.s

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
     8  TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
     9  	MOVQ nr+0(FP), CX
    10  	MOVQ xk+8(FP), AX
    11  	MOVQ dst+16(FP), DX
    12  	MOVQ src+24(FP), BX
    13  	MOVUPS 0(AX), X1
    14  	MOVUPS 0(BX), X0
    15  	ADDQ $16, AX
    16  	PXOR X1, X0
    17  	SUBQ $12, CX
    18  	JE Lenc192
    19  	JB Lenc128
    20  Lenc256:
    21  	MOVUPS 0(AX), X1
    22  	AESENC X1, X0
    23  	MOVUPS 16(AX), X1
    24  	AESENC X1, X0
    25  	ADDQ $32, AX
    26  Lenc192:
    27  	MOVUPS 0(AX), X1
    28  	AESENC X1, X0
    29  	MOVUPS 16(AX), X1
    30  	AESENC X1, X0
    31  	ADDQ $32, AX
    32  Lenc128:
    33  	MOVUPS 0(AX), X1
    34  	AESENC X1, X0
    35  	MOVUPS 16(AX), X1
    36  	AESENC X1, X0
    37  	MOVUPS 32(AX), X1
    38  	AESENC X1, X0
    39  	MOVUPS 48(AX), X1
    40  	AESENC X1, X0
    41  	MOVUPS 64(AX), X1
    42  	AESENC X1, X0
    43  	MOVUPS 80(AX), X1
    44  	AESENC X1, X0
    45  	MOVUPS 96(AX), X1
    46  	AESENC X1, X0
    47  	MOVUPS 112(AX), X1
    48  	AESENC X1, X0
    49  	MOVUPS 128(AX), X1
    50  	AESENC X1, X0
    51  	MOVUPS 144(AX), X1
    52  	AESENCLAST X1, X0
    53  	MOVUPS X0, 0(DX)
    54  	RET
    55  
    56  // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
    57  TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
    58  	MOVQ nr+0(FP), CX
    59  	MOVQ xk+8(FP), AX
    60  	MOVQ dst+16(FP), DX
    61  	MOVQ src+24(FP), BX
    62  	MOVUPS 0(AX), X1
    63  	MOVUPS 0(BX), X0
    64  	ADDQ $16, AX
    65  	PXOR X1, X0
    66  	SUBQ $12, CX
    67  	JE Ldec192
    68  	JB Ldec128
    69  Ldec256:
    70  	MOVUPS 0(AX), X1
    71  	AESDEC X1, X0
    72  	MOVUPS 16(AX), X1
    73  	AESDEC X1, X0
    74  	ADDQ $32, AX
    75  Ldec192:
    76  	MOVUPS 0(AX), X1
    77  	AESDEC X1, X0
    78  	MOVUPS 16(AX), X1
    79  	AESDEC X1, X0
    80  	ADDQ $32, AX
    81  Ldec128:
    82  	MOVUPS 0(AX), X1
    83  	AESDEC X1, X0
    84  	MOVUPS 16(AX), X1
    85  	AESDEC X1, X0
    86  	MOVUPS 32(AX), X1
    87  	AESDEC X1, X0
    88  	MOVUPS 48(AX), X1
    89  	AESDEC X1, X0
    90  	MOVUPS 64(AX), X1
    91  	AESDEC X1, X0
    92  	MOVUPS 80(AX), X1
    93  	AESDEC X1, X0
    94  	MOVUPS 96(AX), X1
    95  	AESDEC X1, X0
    96  	MOVUPS 112(AX), X1
    97  	AESDEC X1, X0
    98  	MOVUPS 128(AX), X1
    99  	AESDEC X1, X0
   100  	MOVUPS 144(AX), X1
   101  	AESDECLAST X1, X0
   102  	MOVUPS X0, 0(DX)
   103  	RET
   104  
   105  // func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
   106  // Note that round keys are stored in uint128 format, not uint32
   107  TEXT ·expandKeyAsm(SB),NOSPLIT,$0
   108  	MOVQ nr+0(FP), CX
   109  	MOVQ key+8(FP), AX
   110  	MOVQ enc+16(FP), BX
   111  	MOVQ dec+24(FP), DX
   112  	MOVUPS (AX), X0
   113  	// enc
   114  	MOVUPS X0, (BX)
   115  	ADDQ $16, BX
   116  	PXOR X4, X4 // _expand_key_* expect X4 to be zero
   117  	CMPL CX, $12
   118  	JE Lexp_enc192
   119  	JB Lexp_enc128
   120  Lexp_enc256:
   121  	MOVUPS 16(AX), X2
   122  	MOVUPS X2, (BX)
   123  	ADDQ $16, BX
   124  	AESKEYGENASSIST $0x01, X2, X1
   125  	CALL _expand_key_256a<>(SB)
   126  	AESKEYGENASSIST $0x01, X0, X1
   127  	CALL _expand_key_256b<>(SB)
   128  	AESKEYGENASSIST $0x02, X2, X1
   129  	CALL _expand_key_256a<>(SB)
   130  	AESKEYGENASSIST $0x02, X0, X1
   131  	CALL _expand_key_256b<>(SB)
   132  	AESKEYGENASSIST $0x04, X2, X1
   133  	CALL _expand_key_256a<>(SB)
   134  	AESKEYGENASSIST $0x04, X0, X1
   135  	CALL _expand_key_256b<>(SB)
   136  	AESKEYGENASSIST $0x08, X2, X1
   137  	CALL _expand_key_256a<>(SB)
   138  	AESKEYGENASSIST $0x08, X0, X1
   139  	CALL _expand_key_256b<>(SB)
   140  	AESKEYGENASSIST $0x10, X2, X1
   141  	CALL _expand_key_256a<>(SB)
   142  	AESKEYGENASSIST $0x10, X0, X1
   143  	CALL _expand_key_256b<>(SB)
   144  	AESKEYGENASSIST $0x20, X2, X1
   145  	CALL _expand_key_256a<>(SB)
   146  	AESKEYGENASSIST $0x20, X0, X1
   147  	CALL _expand_key_256b<>(SB)
   148  	AESKEYGENASSIST $0x40, X2, X1
   149  	CALL _expand_key_256a<>(SB)
   150  	JMP Lexp_dec
   151  Lexp_enc192:
   152  	MOVQ 16(AX), X2
   153  	AESKEYGENASSIST $0x01, X2, X1
   154  	CALL _expand_key_192a<>(SB)
   155  	AESKEYGENASSIST $0x02, X2, X1
   156  	CALL _expand_key_192b<>(SB)
   157  	AESKEYGENASSIST $0x04, X2, X1
   158  	CALL _expand_key_192a<>(SB)
   159  	AESKEYGENASSIST $0x08, X2, X1
   160  	CALL _expand_key_192b<>(SB)
   161  	AESKEYGENASSIST $0x10, X2, X1
   162  	CALL _expand_key_192a<>(SB)
   163  	AESKEYGENASSIST $0x20, X2, X1
   164  	CALL _expand_key_192b<>(SB)
   165  	AESKEYGENASSIST $0x40, X2, X1
   166  	CALL _expand_key_192a<>(SB)
   167  	AESKEYGENASSIST $0x80, X2, X1
   168  	CALL _expand_key_192b<>(SB)
   169  	JMP Lexp_dec
   170  Lexp_enc128:
   171  	AESKEYGENASSIST $0x01, X0, X1
   172  	CALL _expand_key_128<>(SB)
   173  	AESKEYGENASSIST $0x02, X0, X1
   174  	CALL _expand_key_128<>(SB)
   175  	AESKEYGENASSIST $0x04, X0, X1
   176  	CALL _expand_key_128<>(SB)
   177  	AESKEYGENASSIST $0x08, X0, X1
   178  	CALL _expand_key_128<>(SB)
   179  	AESKEYGENASSIST $0x10, X0, X1
   180  	CALL _expand_key_128<>(SB)
   181  	AESKEYGENASSIST $0x20, X0, X1
   182  	CALL _expand_key_128<>(SB)
   183  	AESKEYGENASSIST $0x40, X0, X1
   184  	CALL _expand_key_128<>(SB)
   185  	AESKEYGENASSIST $0x80, X0, X1
   186  	CALL _expand_key_128<>(SB)
   187  	AESKEYGENASSIST $0x1b, X0, X1
   188  	CALL _expand_key_128<>(SB)
   189  	AESKEYGENASSIST $0x36, X0, X1
   190  	CALL _expand_key_128<>(SB)
   191  Lexp_dec:
   192  	// dec
   193  	SUBQ $16, BX
   194  	MOVUPS (BX), X1
   195  	MOVUPS X1, (DX)
   196  	DECQ CX
   197  Lexp_dec_loop:
   198  	MOVUPS -16(BX), X1
   199  	AESIMC X1, X0
   200  	MOVUPS X0, 16(DX)
   201  	SUBQ $16, BX
   202  	ADDQ $16, DX
   203  	DECQ CX
   204  	JNZ Lexp_dec_loop
   205  	MOVUPS -16(BX), X0
   206  	MOVUPS X0, 16(DX)
   207  	RET
   208  
   209  TEXT _expand_key_128<>(SB),NOSPLIT,$0
   210  	PSHUFD $0xff, X1, X1
   211  	SHUFPS $0x10, X0, X4
   212  	PXOR X4, X0
   213  	SHUFPS $0x8c, X0, X4
   214  	PXOR X4, X0
   215  	PXOR X1, X0
   216  	MOVUPS X0, (BX)
   217  	ADDQ $16, BX
   218  	RET
   219  
   220  TEXT _expand_key_192a<>(SB),NOSPLIT,$0
   221  	PSHUFD $0x55, X1, X1
   222  	SHUFPS $0x10, X0, X4
   223  	PXOR X4, X0
   224  	SHUFPS $0x8c, X0, X4
   225  	PXOR X4, X0
   226  	PXOR X1, X0
   227  
   228  	MOVAPS X2, X5
   229  	MOVAPS X2, X6
   230  	PSLLDQ $0x4, X5
   231  	PSHUFD $0xff, X0, X3
   232  	PXOR X3, X2
   233  	PXOR X5, X2
   234  
   235  	MOVAPS X0, X1
   236  	SHUFPS $0x44, X0, X6
   237  	MOVUPS X6, (BX)
   238  	SHUFPS $0x4e, X2, X1
   239  	MOVUPS X1, 16(BX)
   240  	ADDQ $32, BX
   241  	RET
   242  
   243  TEXT _expand_key_192b<>(SB),NOSPLIT,$0
   244  	PSHUFD $0x55, X1, X1
   245  	SHUFPS $0x10, X0, X4
   246  	PXOR X4, X0
   247  	SHUFPS $0x8c, X0, X4
   248  	PXOR X4, X0
   249  	PXOR X1, X0
   250  
   251  	MOVAPS X2, X5
   252  	PSLLDQ $0x4, X5
   253  	PSHUFD $0xff, X0, X3
   254  	PXOR X3, X2
   255  	PXOR X5, X2
   256  
   257  	MOVUPS X0, (BX)
   258  	ADDQ $16, BX
   259  	RET
   260  
   261  TEXT _expand_key_256a<>(SB),NOSPLIT,$0
   262  	JMP _expand_key_128<>(SB)
   263  
   264  TEXT _expand_key_256b<>(SB),NOSPLIT,$0
   265  	PSHUFD $0xaa, X1, X1
   266  	SHUFPS $0x10, X2, X4
   267  	PXOR X4, X2
   268  	SHUFPS $0x8c, X2, X4
   269  	PXOR X4, X2
   270  	PXOR X1, X2
   271  
   272  	MOVUPS X2, (BX)
   273  	ADDQ $16, BX
   274  	RET
   275  

View as plain text