...
Run Format

Text file src/runtime/memmove_386.s

Documentation: runtime

     1	// Inferno's libkern/memmove-386.s
     2	// https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-386.s
     3	//
     4	//         Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
     5	//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
     6	//         Portions Copyright 2009 The Go Authors. All rights reserved.
     7	//
     8	// Permission is hereby granted, free of charge, to any person obtaining a copy
     9	// of this software and associated documentation files (the "Software"), to deal
    10	// in the Software without restriction, including without limitation the rights
    11	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12	// copies of the Software, and to permit persons to whom the Software is
    13	// furnished to do so, subject to the following conditions:
    14	//
    15	// The above copyright notice and this permission notice shall be included in
    16	// all copies or substantial portions of the Software.
    17	//
    18	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    21	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    24	// THE SOFTWARE.
    25	
    26	// +build !plan9
    27	
    28	#include "textflag.h"
    29	
    30	// func memmove(to, from unsafe.Pointer, n uintptr)
    31	TEXT runtime·memmove(SB), NOSPLIT, $0-12
    32		MOVL	to+0(FP), DI
    33		MOVL	from+4(FP), SI
    34		MOVL	n+8(FP), BX
    35	
    36		// REP instructions have a high startup cost, so we handle small sizes
    37		// with some straightline code. The REP MOVSL instruction is really fast
    38		// for large sizes. The cutover is approximately 1K.  We implement up to
    39		// 128 because that is the maximum SSE register load (loading all data
    40		// into registers lets us ignore copy direction).
    41	tail:
    42		// BSR+branch table make almost all memmove/memclr benchmarks worse. Not worth doing.
    43		TESTL	BX, BX
    44		JEQ	move_0
    45		CMPL	BX, $2
    46		JBE	move_1or2
    47		CMPL	BX, $4
    48		JB	move_3
    49		JE	move_4
    50		CMPL	BX, $8
    51		JBE	move_5through8
    52		CMPL	BX, $16
    53		JBE	move_9through16
    54		CMPB	runtime·support_sse2(SB), $1
    55		JNE	nosse2
    56		CMPL	BX, $32
    57		JBE	move_17through32
    58		CMPL	BX, $64
    59		JBE	move_33through64
    60		CMPL	BX, $128
    61		JBE	move_65through128
    62	
    63	nosse2:
    64	/*
    65	 * check and set for backwards
    66	 */
    67		CMPL	SI, DI
    68		JLS	back
    69	
    70	/*
    71	 * forward copy loop
    72	 */
    73	forward:
    74		// If REP MOVSB isn't fast, don't use it
    75		CMPB	runtime·support_erms(SB), $1 // enhanced REP MOVSB/STOSB
    76		JNE	fwdBy4
    77	
    78		// Check alignment
    79		MOVL	SI, AX
    80		ORL	DI, AX
    81		TESTL	$3, AX
    82		JEQ	fwdBy4
    83	
    84		// Do 1 byte at a time
    85		MOVL	BX, CX
    86		REP;	MOVSB
    87		RET
    88	
    89	fwdBy4:
    90		// Do 4 bytes at a time
    91		MOVL	BX, CX
    92		SHRL	$2, CX
    93		ANDL	$3, BX
    94		REP;	MOVSL
    95		JMP	tail
    96	
    97	/*
    98	 * check overlap
    99	 */
   100	back:
   101		MOVL	SI, CX
   102		ADDL	BX, CX
   103		CMPL	CX, DI
   104		JLS	forward
   105	/*
   106	 * whole thing backwards has
   107	 * adjusted addresses
   108	 */
   109	
   110		ADDL	BX, DI
   111		ADDL	BX, SI
   112		STD
   113	
   114	/*
   115	 * copy
   116	 */
   117		MOVL	BX, CX
   118		SHRL	$2, CX
   119		ANDL	$3, BX
   120	
   121		SUBL	$4, DI
   122		SUBL	$4, SI
   123		REP;	MOVSL
   124	
   125		CLD
   126		ADDL	$4, DI
   127		ADDL	$4, SI
   128		SUBL	BX, DI
   129		SUBL	BX, SI
   130		JMP	tail
   131	
   132	move_1or2:
   133		MOVB	(SI), AX
   134		MOVB	-1(SI)(BX*1), CX
   135		MOVB	AX, (DI)
   136		MOVB	CX, -1(DI)(BX*1)
   137		RET
   138	move_0:
   139		RET
   140	move_3:
   141		MOVW	(SI), AX
   142		MOVB	2(SI), CX
   143		MOVW	AX, (DI)
   144		MOVB	CX, 2(DI)
   145		RET
   146	move_4:
   147		// We need a separate case for 4 to make sure we write pointers atomically.
   148		MOVL	(SI), AX
   149		MOVL	AX, (DI)
   150		RET
   151	move_5through8:
   152		MOVL	(SI), AX
   153		MOVL	-4(SI)(BX*1), CX
   154		MOVL	AX, (DI)
   155		MOVL	CX, -4(DI)(BX*1)
   156		RET
   157	move_9through16:
   158		MOVL	(SI), AX
   159		MOVL	4(SI), CX
   160		MOVL	-8(SI)(BX*1), DX
   161		MOVL	-4(SI)(BX*1), BP
   162		MOVL	AX, (DI)
   163		MOVL	CX, 4(DI)
   164		MOVL	DX, -8(DI)(BX*1)
   165		MOVL	BP, -4(DI)(BX*1)
   166		RET
   167	move_17through32:
   168		MOVOU	(SI), X0
   169		MOVOU	-16(SI)(BX*1), X1
   170		MOVOU	X0, (DI)
   171		MOVOU	X1, -16(DI)(BX*1)
   172		RET
   173	move_33through64:
   174		MOVOU	(SI), X0
   175		MOVOU	16(SI), X1
   176		MOVOU	-32(SI)(BX*1), X2
   177		MOVOU	-16(SI)(BX*1), X3
   178		MOVOU	X0, (DI)
   179		MOVOU	X1, 16(DI)
   180		MOVOU	X2, -32(DI)(BX*1)
   181		MOVOU	X3, -16(DI)(BX*1)
   182		RET
   183	move_65through128:
   184		MOVOU	(SI), X0
   185		MOVOU	16(SI), X1
   186		MOVOU	32(SI), X2
   187		MOVOU	48(SI), X3
   188		MOVOU	-64(SI)(BX*1), X4
   189		MOVOU	-48(SI)(BX*1), X5
   190		MOVOU	-32(SI)(BX*1), X6
   191		MOVOU	-16(SI)(BX*1), X7
   192		MOVOU	X0, (DI)
   193		MOVOU	X1, 16(DI)
   194		MOVOU	X2, 32(DI)
   195		MOVOU	X3, 48(DI)
   196		MOVOU	X4, -64(DI)(BX*1)
   197		MOVOU	X5, -48(DI)(BX*1)
   198		MOVOU	X6, -32(DI)(BX*1)
   199		MOVOU	X7, -16(DI)(BX*1)
   200		RET

View as plain text