...
Run Format

Text file src/runtime/memmove_arm64.s

Documentation: runtime

     1	// Copyright 2014 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "textflag.h"
     6	
     7	// func memmove(to, from unsafe.Pointer, n uintptr)
     8	TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
     9		MOVD	to+0(FP), R3
    10		MOVD	from+8(FP), R4
    11		MOVD	n+16(FP), R5
    12		CBNZ	R5, check
    13		RET
    14	
    15	check:
    16		CMP	$16, R5
    17		BLE	copy16
    18	
    19		AND	$~31, R5, R7	// R7 is N&~31
    20		SUB	R7, R5, R6	// R6 is N&31
    21	
    22		CMP	R3, R4
    23		BLT	backward
    24	
    25		// Copying forward proceeds by copying R7/8 words then copying R6 bytes.
    26		// R3 and R4 are advanced as we copy.
    27	
    28	        // (There may be implementations of armv8 where copying by bytes until
    29	        // at least one of source or dest is word aligned is a worthwhile
    30	        // optimization, but the on the one tested so far (xgene) it did not
    31	        // make a significance difference.)
    32	
    33		CBZ	R7, noforwardlarge	// Do we need to do any doubleword-by-doubleword copying?
    34	
    35		ADD	R3, R7, R9	// R9 points just past where we copy by word
    36	
    37	forwardlargeloop:
    38		LDP.P	32(R4), (R8, R10)
    39		STP.P	(R8, R10), 32(R3)
    40		LDP	-16(R4), (R11, R12)
    41		STP	(R11, R12), -16(R3)
    42		SUB 	$32, R7, R7
    43		CBNZ	R7, forwardlargeloop
    44	
    45	noforwardlarge:
    46		CBNZ	R6, forwardtail		// Do we need to do any byte-by-byte copying?
    47		RET
    48	
    49	forwardtail:
    50		ADD	R3, R6, R9	// R9 points just past the destination memory
    51	
    52	forwardtailloop:
    53		MOVBU.P 1(R4), R8
    54		MOVBU.P	R8, 1(R3)
    55		CMP	R3, R9
    56		BNE	forwardtailloop
    57		RET
    58	
    59		// Small copies: 1..16 bytes.
    60	copy16:
    61		ADD	R4, R5, R8	// R8 points just past the last source byte
    62		ADD	R3, R5, R9	// R9 points just past the last destination byte
    63		CMP	$8, R5
    64		BLT	copy7
    65		MOVD	(R4), R6
    66		MOVD	-8(R8), R7
    67		MOVD	R6, (R3)
    68		MOVD	R7, -8(R9)
    69		RET
    70	
    71	copy7:
    72		TBZ	$2, R5, copy3
    73		MOVWU	(R4), R6
    74		MOVWU	-4(R8), R7
    75		MOVW	R6, (R3)
    76		MOVW	R7, -4(R9)
    77		RET
    78	
    79	copy3:
    80		TBZ	$1, R5, copy1
    81		MOVHU	(R4), R6
    82		MOVHU	-2(R8), R7
    83		MOVH	R6, (R3)
    84		MOVH	R7, -2(R9)
    85		RET
    86	
    87	copy1:
    88		MOVBU	(R4), R6
    89		MOVB	R6, (R3)
    90		RET
    91	
    92	backward:
    93		// Copying backwards proceeds by copying R6 bytes then copying R7/8 words.
    94		// R3 and R4 are advanced to the end of the destination/source buffers
    95		// respectively and moved back as we copy.
    96	
    97		ADD	R4, R5, R4	// R4 points just past the last source byte
    98		ADD	R3, R5, R3	// R3 points just past the last destination byte
    99	
   100		CBZ	R6, nobackwardtail	// Do we need to do any byte-by-byte copying?
   101	
   102		SUB	R6, R3, R9	// R9 points at the lowest destination byte that should be copied by byte.
   103	backwardtailloop:
   104		MOVBU.W	-1(R4), R8
   105		MOVBU.W	R8, -1(R3)
   106		CMP	R9, R3
   107		BNE	backwardtailloop
   108	
   109	nobackwardtail:
   110		CBNZ     R7, backwardlarge	// Do we need to do any doubleword-by-doubleword copying?
   111		RET
   112	
   113	backwardlarge:
   114	        SUB	R7, R3, R9      // R9 points at the lowest destination byte
   115	
   116	backwardlargeloop:
   117		LDP	-16(R4), (R8, R10)
   118		STP	(R8, R10), -16(R3)
   119		LDP.W	-32(R4), (R11, R12)
   120		STP.W	(R11, R12), -32(R3)
   121		CMP	R9, R3
   122		BNE	backwardlargeloop
   123		RET

View as plain text